7
7
8
8
9
9
# version tracking
10
- __version = "1.1.5 "
11
- __date = "2025-03-26 "
10
+ __version = "1.2.1 "
11
+ __date = "2025-07-23 "
12
12
13
13
# PARAMETERS
14
14
@@ -95,24 +95,31 @@ def read_spectral_library(filename: str) -> Dict[str, Dict[str, Any]]:
95
95
def generate_fragment_index (spectronaut : pd .DataFrame , index : dict ) -> Dict [str , Dict [str , Any ]]:
96
96
# dict keeping track of annotated fragments for every unique crosslink id
97
97
fragment_annotation = dict ()
98
+ # nr of spectronaut ions that could not be matched
99
+ nr_unmatched = 0
98
100
for i , row in tqdm (spectronaut .iterrows (), total = spectronaut .shape [0 ], desc = "Generating fragment ion index..." ):
99
101
# unique crosslink id
100
102
key = get_key_spectronaut (row )
103
+ # if fragment annotation struct does not have entry for crosslink id, create one that is empty
104
+ if key not in fragment_annotation :
105
+ fragment_annotation [key ] = {"matched_number_ions_a" : 0 ,
106
+ "matched_number_ions_b" : 0 ,
107
+ "fragments" : list (),
108
+ "ion_types" : set ()}
101
109
# current fragment ion from spectronaut row
102
110
ion = float (row [SPECTRONAUT_FRAGMENT_MZ_COLUMN_NAME ])
103
111
# all spectral library ions for the crosslink id
104
112
# this is a dict that matches ion mass -> list(fragments_with_that_mass[pd.Series])
105
113
ions = index [key ]["ions" ]
114
+ # has ion been matched?
115
+ matched = False
106
116
# for every ion mass of the crosslink id in the spec lib, do:
107
117
for current_ion_mz in ions .keys ():
108
118
fragment_key_part = get_fragment_key (current_ion_mz )
109
119
# check if spectronaut fragment is within tolerance bounds of spec lib ion
110
120
if round (current_ion_mz , 4 ) > round (ion - SPECTRONAUT_MATCH_TOLERANCE , 4 ) and round (current_ion_mz , 4 ) < round (ion + SPECTRONAUT_MATCH_TOLERANCE , 4 ):
111
- # if fragment annotation struct does not have entry for crosslink id, create one that is empty
112
- if key not in fragment_annotation :
113
- fragment_annotation [key ] = {"matched_number_ions_a" : 0 ,
114
- "matched_number_ions_b" : 0 ,
115
- "fragments" : []}
121
+ # ion has been matched
122
+ matched = True
116
123
# for all ions that match mass, do:
117
124
for current_ion in ions [current_ion_mz ]:
118
125
# if ion of peptide a
@@ -123,15 +130,24 @@ def generate_fragment_index(spectronaut: pd.DataFrame, index: dict) -> Dict[str,
123
130
if fragment_key not in fragment_annotation [key ]["fragments" ]:
124
131
fragment_annotation [key ]["matched_number_ions_a" ] += 1
125
132
fragment_annotation [key ]["fragments" ].append (fragment_key )
133
+ fragment_annotation [key ]["ion_types" ].add (
134
+ f"{ current_ion ['FragmentType' ]} ;{ current_ion ['FragmentNumber' ]} ;0"
135
+ )
126
136
else :
127
137
# same for peptide b
128
138
fragment_key = fragment_key_part + "_1"
129
139
if fragment_key not in fragment_annotation [key ]["fragments" ]:
130
140
fragment_annotation [key ]["matched_number_ions_b" ] += 1
131
141
fragment_annotation [key ]["fragments" ].append (fragment_key )
142
+ fragment_annotation [key ]["ion_types" ].add (
143
+ f"{ current_ion ['FragmentType' ]} ;{ current_ion ['FragmentNumber' ]} ;1"
144
+ )
132
145
# we do not break here, because we want to check the rest of the spec lib ions in case the spectronaut ion matches
133
146
# for both peptide a and peptide b
147
+ if not matched :
148
+ nr_unmatched += 1
134
149
150
+ print (f"[Fragment index] Nr. of unmatched ions: { nr_unmatched } <-> { (nr_unmatched / spectronaut .shape [0 ]) * 100 } %" )
135
151
return fragment_annotation
136
152
137
153
def annotate_spectronaut_result (filename : str ) -> pd .DataFrame :
@@ -372,6 +388,94 @@ def annotate_SourceScanID(row: pd.Series, index: dict) -> int:
372
388
tqdm .pandas (desc = "Annotating spectral library source scan ID..." )
373
389
spectronaut ["PP.SourceScanID" ] = spectronaut .progress_apply (lambda row : annotate_SourceScanID (row , index ), axis = 1 )
374
390
391
+ def annotate_SequenceCoverageNTerm (row : pd .Series , fragment_annotation : dict , alpha : bool ) -> float :
392
+ key = get_key_spectronaut (row )
393
+ ion_types = fragment_annotation [key ]["ion_types" ]
394
+ peptide = str (row ["PP.PeptideA" ]).strip () if alpha else str (row ["PP.PeptideB" ]).strip ()
395
+ pep_id_lookup = 0 if alpha else 1
396
+ ions = list ()
397
+ for ion in ion_types :
398
+ pep_id = int (ion .split (";" )[2 ])
399
+ ion_type = str (ion .split (";" )[0 ]).strip ()
400
+ ion_number = int (ion .split (";" )[1 ])
401
+ if len (ion_type ) != 1 :
402
+ raise RuntimeError (f"Could not parse ion type from ion { ion } !" )
403
+ if pep_id == pep_id_lookup and ion_type in ["a" , "b" , "c" ]:
404
+ ions .append ((ion_type , ion_number ))
405
+ return len (ions ) / len (peptide )
406
+
407
+ tqdm .pandas (desc = "Annotating n-terminal sequence coverage for alpha peptide..." )
408
+ spectronaut ["PP.SequenceCoverageNTermAlpha" ] = spectronaut .progress_apply (lambda row : annotate_SequenceCoverageNTerm (row , fragment_annotation , True ), axis = 1 )
409
+
410
+ tqdm .pandas (desc = "Annotating n-terminal sequence coverage for beta peptide..." )
411
+ spectronaut ["PP.SequenceCoverageNTermBeta" ] = spectronaut .progress_apply (lambda row : annotate_SequenceCoverageNTerm (row , fragment_annotation , False ), axis = 1 )
412
+
413
+ tqdm .pandas (desc = "Annotating n-terminal sequence coverage for full crosslink..." )
414
+ spectronaut ["PP.SequenceCoverageNTermFull" ] = spectronaut .progress_apply (lambda row : (float (row ["PP.SequenceCoverageNTermAlpha" ]) + float (row ["PP.SequenceCoverageNTermBeta" ])) / 2.0 , axis = 1 )
415
+
416
+ def annotate_SequenceCoverageCTerm (row : pd .Series , fragment_annotation : dict , alpha : bool ) -> float :
417
+ key = get_key_spectronaut (row )
418
+ ion_types = fragment_annotation [key ]["ion_types" ]
419
+ peptide = str (row ["PP.PeptideA" ]).strip () if alpha else str (row ["PP.PeptideB" ]).strip ()
420
+ pep_id_lookup = 0 if alpha else 1
421
+ ions = list ()
422
+ for ion in ion_types :
423
+ pep_id = int (ion .split (";" )[2 ])
424
+ ion_type = str (ion .split (";" )[0 ]).strip ()
425
+ ion_number = int (ion .split (";" )[1 ])
426
+ if len (ion_type ) != 1 :
427
+ raise RuntimeError (f"Could not parse ion type from ion { ion } !" )
428
+ if pep_id == pep_id_lookup and ion_type in ["x" , "y" , "z" ]:
429
+ ions .append ((ion_type , ion_number ))
430
+ return len (ions ) / len (peptide )
431
+
432
+ tqdm .pandas (desc = "Annotating c-terminal sequence coverage for alpha peptide..." )
433
+ spectronaut ["PP.SequenceCoverageCTermAlpha" ] = spectronaut .progress_apply (lambda row : annotate_SequenceCoverageCTerm (row , fragment_annotation , True ), axis = 1 )
434
+
435
+ tqdm .pandas (desc = "Annotating c-terminal sequence coverage for beta peptide..." )
436
+ spectronaut ["PP.SequenceCoverageCTermBeta" ] = spectronaut .progress_apply (lambda row : annotate_SequenceCoverageCTerm (row , fragment_annotation , False ), axis = 1 )
437
+
438
+ tqdm .pandas (desc = "Annotating c-terminal sequence coverage for full crosslink..." )
439
+ spectronaut ["PP.SequenceCoverageCTermFull" ] = spectronaut .progress_apply (lambda row : (float (row ["PP.SequenceCoverageCTermAlpha" ]) + float (row ["PP.SequenceCoverageCTermBeta" ])) / 2.0 , axis = 1 )
440
+
441
+ def annotate_SequenceCoverage (row : pd .Series , fragment_annotation : dict , alpha : bool ) -> float :
442
+ #
443
+ # 1 2 3 4 5 6 7
444
+ # P E P T I D E
445
+ # b1 b2 b3 b4 b5 b6
446
+ # y6 y5 y4 y3 y2 y1
447
+ #
448
+ # b[x] = y[len+1-x]
449
+ #
450
+ key = get_key_spectronaut (row )
451
+ ion_types = fragment_annotation [key ]["ion_types" ]
452
+ peptide = str (row ["PP.PeptideA" ]).strip () if alpha else str (row ["PP.PeptideB" ]).strip ()
453
+ pep_id_lookup = 0 if alpha else 1
454
+ unique_seq_positions = set ()
455
+ for ion in ion_types :
456
+ pep_id = int (ion .split (";" )[2 ])
457
+ ion_type = str (ion .split (";" )[0 ]).strip ()
458
+ ion_number = int (ion .split (";" )[1 ])
459
+ if len (ion_type ) != 1 :
460
+ raise RuntimeError (f"Could not parse ion type from ion { ion } !" )
461
+ if pep_id == pep_id_lookup :
462
+ if ion_type in ["a" , "b" , "c" ]:
463
+ unique_seq_positions .add (ion_number )
464
+ elif ion_type in ["x" , "y" , "z" ]:
465
+ unique_seq_positions .add (len (peptide ) + 1 - ion_number )
466
+ else :
467
+ raise RuntimeError (f"Found not-suppored ion type: { ion_type } " )
468
+ return len (unique_seq_positions ) / len (peptide )
469
+
470
+ tqdm .pandas (desc = "Annotating sequence coverage for alpha peptide..." )
471
+ spectronaut ["PP.SequenceCoverageAlpha" ] = spectronaut .progress_apply (lambda row : annotate_SequenceCoverage (row , fragment_annotation , True ), axis = 1 )
472
+
473
+ tqdm .pandas (desc = "Annotating sequence coverage for beta peptide..." )
474
+ spectronaut ["PP.SequenceCoverageBeta" ] = spectronaut .progress_apply (lambda row : annotate_SequenceCoverage (row , fragment_annotation , False ), axis = 1 )
475
+
476
+ tqdm .pandas (desc = "Annotating sequence coverage for full crosslink..." )
477
+ spectronaut ["PP.SequenceCoverageFull" ] = spectronaut .progress_apply (lambda row : (float (row ["PP.SequenceCoverageAlpha" ]) + float (row ["PP.SequenceCoverageBeta" ])) / 2.0 , axis = 1 )
478
+
375
479
spectronaut ["PP.PseudoScanNumber" ] = pd .Series (range (spectronaut .shape [0 ]))
376
480
spectronaut ["PP.Crosslinker" ] = pd .Series ([CROSSLINKER for i in range (spectronaut .shape [0 ])])
377
481
spectronaut ["PP.CrosslinkerMass" ] = pd .Series ([CROSSLINKER_MASS for i in range (spectronaut .shape [0 ])])
0 commit comments