Skip to content

Commit 9ac54e7

Browse files
committed
increase version 0.0.9 -> 0.0.10
1 parent c9046d9 commit 9ac54e7

File tree

6 files changed

+223
-184
lines changed

6 files changed

+223
-184
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "quantms-rescoring"
33
description = "quantms-rescoring: Python scripts and helpers for the quantMS workflow"
44
readme = "README.md"
55
license = "MIT"
6-
version = "0.0.9"
6+
version = "0.0.10"
77
authors = [
88
"Yasset Perez-Riverol <ypriverol@gmail.com>",
99
"Dai Chengxin <chengxin2024@126.com>",

quantmsrescore/annotator.py

Lines changed: 54 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,24 +23,12 @@ class FeatureAnnotator:
2323
from MS2PIP and DeepLC models to improve rescoring.
2424
"""
2525

26-
def __init__(
27-
self,
28-
feature_generators: str,
29-
only_features: Optional[str] = None,
30-
ms2pip_model: str = "HCD2021",
31-
force_model: bool = False,
32-
ms2pip_model_path: str = "models",
33-
ms2_tolerance: float = 0.05,
34-
calibration_set_size: float = 0.2,
35-
valid_correlations_size: float = 0.7,
36-
skip_deeplc_retrain: bool = False,
37-
processes: int = 2,
38-
log_level: str = "INFO",
39-
spectrum_id_pattern: str = "(.*)", # default for openms idXML
40-
psm_id_pattern: str = "(.*)", # default for openms idXML
41-
remove_missing_spectra: bool = True,
42-
ms2_only: bool = True
43-
):
26+
def __init__(self, feature_generators: str, only_features: Optional[str] = None, ms2pip_model: str = "HCD2021",
27+
force_model: bool = False, ms2pip_model_path: str = "models", ms2_tolerance: float = 0.05,
28+
calibration_set_size: float = 0.2, valid_correlations_size: float = 0.7,
29+
skip_deeplc_retrain: bool = False, processes: int = 2, log_level: str = "INFO",
30+
spectrum_id_pattern: str = "(.*)", psm_id_pattern: str = "(.*)", remove_missing_spectra: bool = True,
31+
ms2_only: bool = True, find_best_model: bool = False) -> None:
4432
"""
4533
Initialize the Annotator with configuration parameters.
4634
@@ -74,6 +62,10 @@ def __init__(
7462
Process only MS2-level PSMs (default: True).
7563
find_best_ms2pip_model : bool, optional
7664
Find best MS2PIP model for the dataset (default: False).
65+
force_model : bool, optional
66+
Force the use of the provided MS2PIP model (default: False).
67+
find_best_model : bool, optional
68+
Force the use of the best MS2PIP model (default: False).
7769
7870
Raises
7971
------
@@ -117,6 +109,7 @@ def __init__(
117109
self._remove_missing_spectra = remove_missing_spectra
118110
self._ms2_only = ms2_only
119111
self._force_model = force_model
112+
self._find_best_model = find_best_model
120113

121114
def build_idxml_data(
122115
self, idxml_file: Union[str, Path], spectrum_path: Union[str, Path]
@@ -233,20 +226,53 @@ def _run_ms2pip_annotation(self) -> None:
233226
logger.error(f"Failed to initialize MS2PIP: {e}")
234227
raise
235228

236-
# Apply MS2PIP annotation
229+
# Get PSM list
237230
psm_list = self._idxml_reader.psms
231+
238232
try:
239-
ms2pip_generator.add_features(psm_list)
240-
self._idxml_reader.psms = psm_list
241-
logger.info("MS2PIP annotations added to PSMs")
242-
except Ms2pipIncorrectModelException:
243-
if not self._force_model:
244-
self._find_and_apply_best_ms2pip_model(psm_list)
233+
# Save original model for reference
234+
original_model = ms2pip_generator.model
235+
236+
# Determine which model to use based on configuration and validation
237+
model_to_use = original_model
238+
239+
# Case 1: Force specific model regardless of validation
240+
if self._force_model:
241+
model_to_use = original_model
242+
logger.info(f"Using forced model: {model_to_use}")
243+
244+
# Case 2: Find best model if requested and not forcing original
245+
elif self._find_best_model:
246+
best_model = ms2pip_generator._find_best_ms2pip_model(psm_list)
247+
if ms2pip_generator.validate_features(psm_list=psm_list, model=best_model[0]):
248+
model_to_use = best_model[0]
249+
logger.info(f"Using best model: {model_to_use}")
250+
else:
251+
# Fallback to original model if best model doesn't validate
252+
if ms2pip_generator.validate_features(psm_list, model=original_model):
253+
logger.warning("Best model validation failed, falling back to original model")
254+
else:
255+
logger.error("Both best model and original model validation failed")
256+
return # Exit early since no valid model is available
257+
258+
# Case 3: Use original model but validate it first
245259
else:
246-
logger.error("MS2PIP model not suitable for this data. Skip MS2PIP annotations.")
260+
if not ms2pip_generator.validate_features(psm_list):
261+
logger.error("Original model validation failed. No features added.")
262+
return # Exit early since validation failed
263+
logger.info(f"Using original model: {model_to_use}")
264+
265+
# Apply the selected model
266+
ms2pip_generator.model = model_to_use
267+
ms2pip_generator.add_features(psm_list)
268+
logger.info(f"Successfully applied MS2PIP annotation using model: {model_to_use}")
247269

248270
except Exception as e:
249-
logger.error(f"Failed to add MS2PIP features: {e}")
271+
logger.error(f"Failed to apply MS2PIP annotation: {e}")
272+
return # Indicate failure through early return
273+
274+
return # Successful completion
275+
250276

251277
def _create_ms2pip_annotator(
252278
self, model: Optional[str] = None, tolerance: Optional[float] = None
@@ -580,4 +606,4 @@ def _get_mae_from_psm_list(self, psm_list: PSMList) -> float:
580606
logger.warning("No valid retention time differences for MAE calculation")
581607
return float("inf")
582608

583-
return total_error / count
609+
return total_error / count

0 commit comments

Comments
 (0)