@@ -23,24 +23,12 @@ class FeatureAnnotator:
23
23
from MS2PIP and DeepLC models to improve rescoring.
24
24
"""
25
25
26
- def __init__ (
27
- self ,
28
- feature_generators : str ,
29
- only_features : Optional [str ] = None ,
30
- ms2pip_model : str = "HCD2021" ,
31
- force_model : bool = False ,
32
- ms2pip_model_path : str = "models" ,
33
- ms2_tolerance : float = 0.05 ,
34
- calibration_set_size : float = 0.2 ,
35
- valid_correlations_size : float = 0.7 ,
36
- skip_deeplc_retrain : bool = False ,
37
- processes : int = 2 ,
38
- log_level : str = "INFO" ,
39
- spectrum_id_pattern : str = "(.*)" , # default for openms idXML
40
- psm_id_pattern : str = "(.*)" , # default for openms idXML
41
- remove_missing_spectra : bool = True ,
42
- ms2_only : bool = True
43
- ):
26
+ def __init__ (self , feature_generators : str , only_features : Optional [str ] = None , ms2pip_model : str = "HCD2021" ,
27
+ force_model : bool = False , ms2pip_model_path : str = "models" , ms2_tolerance : float = 0.05 ,
28
+ calibration_set_size : float = 0.2 , valid_correlations_size : float = 0.7 ,
29
+ skip_deeplc_retrain : bool = False , processes : int = 2 , log_level : str = "INFO" ,
30
+ spectrum_id_pattern : str = "(.*)" , psm_id_pattern : str = "(.*)" , remove_missing_spectra : bool = True ,
31
+ ms2_only : bool = True , find_best_model : bool = False ) -> None :
44
32
"""
45
33
Initialize the Annotator with configuration parameters.
46
34
@@ -74,6 +62,10 @@ def __init__(
74
62
Process only MS2-level PSMs (default: True).
75
63
find_best_ms2pip_model : bool, optional
76
64
Find best MS2PIP model for the dataset (default: False).
65
+ force_model : bool, optional
66
+ Force the use of the provided MS2PIP model (default: False).
67
+ find_best_model : bool, optional
68
+ Force the use of the best MS2PIP model (default: False).
77
69
78
70
Raises
79
71
------
@@ -117,6 +109,7 @@ def __init__(
117
109
self ._remove_missing_spectra = remove_missing_spectra
118
110
self ._ms2_only = ms2_only
119
111
self ._force_model = force_model
112
+ self ._find_best_model = find_best_model
120
113
121
114
def build_idxml_data (
122
115
self , idxml_file : Union [str , Path ], spectrum_path : Union [str , Path ]
@@ -233,20 +226,53 @@ def _run_ms2pip_annotation(self) -> None:
233
226
logger .error (f"Failed to initialize MS2PIP: { e } " )
234
227
raise
235
228
236
- # Apply MS2PIP annotation
229
+ # Get PSM list
237
230
psm_list = self ._idxml_reader .psms
231
+
238
232
try :
239
- ms2pip_generator .add_features (psm_list )
240
- self ._idxml_reader .psms = psm_list
241
- logger .info ("MS2PIP annotations added to PSMs" )
242
- except Ms2pipIncorrectModelException :
243
- if not self ._force_model :
244
- self ._find_and_apply_best_ms2pip_model (psm_list )
233
+ # Save original model for reference
234
+ original_model = ms2pip_generator .model
235
+
236
+ # Determine which model to use based on configuration and validation
237
+ model_to_use = original_model
238
+
239
+ # Case 1: Force specific model regardless of validation
240
+ if self ._force_model :
241
+ model_to_use = original_model
242
+ logger .info (f"Using forced model: { model_to_use } " )
243
+
244
+ # Case 2: Find best model if requested and not forcing original
245
+ elif self ._find_best_model :
246
+ best_model = ms2pip_generator ._find_best_ms2pip_model (psm_list )
247
+ if ms2pip_generator .validate_features (psm_list = psm_list , model = best_model [0 ]):
248
+ model_to_use = best_model [0 ]
249
+ logger .info (f"Using best model: { model_to_use } " )
250
+ else :
251
+ # Fallback to original model if best model doesn't validate
252
+ if ms2pip_generator .validate_features (psm_list , model = original_model ):
253
+ logger .warning ("Best model validation failed, falling back to original model" )
254
+ else :
255
+ logger .error ("Both best model and original model validation failed" )
256
+ return # Exit early since no valid model is available
257
+
258
+ # Case 3: Use original model but validate it first
245
259
else :
246
- logger .error ("MS2PIP model not suitable for this data. Skip MS2PIP annotations." )
260
+ if not ms2pip_generator .validate_features (psm_list ):
261
+ logger .error ("Original model validation failed. No features added." )
262
+ return # Exit early since validation failed
263
+ logger .info (f"Using original model: { model_to_use } " )
264
+
265
+ # Apply the selected model
266
+ ms2pip_generator .model = model_to_use
267
+ ms2pip_generator .add_features (psm_list )
268
+ logger .info (f"Successfully applied MS2PIP annotation using model: { model_to_use } " )
247
269
248
270
except Exception as e :
249
- logger .error (f"Failed to add MS2PIP features: { e } " )
271
+ logger .error (f"Failed to apply MS2PIP annotation: { e } " )
272
+ return # Indicate failure through early return
273
+
274
+ return # Successful completion
275
+
250
276
251
277
def _create_ms2pip_annotator (
252
278
self , model : Optional [str ] = None , tolerance : Optional [float ] = None
@@ -580,4 +606,4 @@ def _get_mae_from_psm_list(self, psm_list: PSMList) -> float:
580
606
logger .warning ("No valid retention time differences for MAE calculation" )
581
607
return float ("inf" )
582
608
583
- return total_error / count
609
+ return total_error / count
0 commit comments