Skip to content

Commit 4878642

Browse files
Merge pull request #36 from hgb-bin-proteomics/develop
Add theoretical ions to Spectral Library
2 parents 2cd186d + e5ece43 commit 4878642

File tree

5 files changed

+49
-23
lines changed

5 files changed

+49
-23
lines changed

POSTPROCESSING.md

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,20 @@ used with [xiFDR](https://www.rappsilberlab.org/software/xifdr/) for validation.
1010

1111
- First you need to set a few parameters in the `post_process.py` script:
1212
```python
13-
CROSSLINKER = "PhoX" # name of the crosslinker
14-
CROSSLINKER_MASS = 209.97181 # delta mass of the crosslinker
15-
SPECTRONAUT_DELIM = "," # delimiter in Spectronaut output file, e.g. "," for comma delimited files, "\t" for tab delimited files
16-
SPECTRONAUT_MATCH_TOLERANCE = 0.05 # match tolerance in Da
17-
SPECTRONAUT_FRAGMENT_MZ_COLUMN_NAME = "F.CalibratedMz" # which F Mz to use for matching
18-
SPECTRONAUT_CSCORE_COLUMN_NAME = "EG.Cscore" # which Cscore to use for re-soring
13+
# name of the crosslinker
14+
CROSSLINKER = "PhoX"
15+
# delta mass of the crosslinker
16+
CROSSLINKER_MASS = 209.97181
17+
# delimiter in Spectronaut output file, e.g. "," for comma delimited files, "\t" for tab delimited files
18+
SPECTRONAUT_DELIM = ","
19+
# match tolerance in Da
20+
SPECTRONAUT_MATCH_TOLERANCE = 0.05
21+
# which Spectronaut Fragment Mz to use for matching
22+
SPECTRONAUT_FRAGMENT_MZ_COLUMN_NAME = "F.CalibratedMz"
23+
# which Cscore to use for re-soring
24+
SPECTRONAUT_CSCORE_COLUMN_NAME = "EG.Cscore"
25+
# which Spectral Library column to use for fragment matching (change index to 0 or 1)
26+
SPECTRAL_LIBRARY_FRAGMENT_MZ_COLUMN_NAME = ["FragmentMz", "FragmentTheoMz"][0]
1927
```
2028
- Make sure that the Spectronaut result file, and the spectral library are in the same
2129
directory.

create_spectral_library.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
# micha.birklbauer@gmail.com
77

88
# version tracking
9-
__version = "1.4.10"
10-
__date = "2025-03-26"
9+
__version = "1.4.11"
10+
__date = "2025-07-29"
1111

1212
# REQUIREMENTS
1313
# pip install pandas
@@ -487,12 +487,14 @@ def check_if_xl_in_frag(row, alpha, ion_type, fragment, crosslinker):
487487
theoretical_fragments = generate_theoretical_fragments(sequence, modifications_processed, ion_types, max_charge)
488488

489489
matched_fragments = dict()
490+
matched_fragments_theo = dict()
490491

491492
# match fragments
492493
for peak_mz in spectrum["peaks"].keys():
493494
for fragment in theoretical_fragments.keys():
494495
if round(peak_mz, 4) < round(fragment + match_tolerance, 4) and round(peak_mz, 4) > round(fragment - match_tolerance, 4):
495496
matched_fragments[peak_mz] = theoretical_fragments[fragment]
497+
matched_fragments_theo[peak_mz] = fragment
496498
break
497499

498500
# get annotations
@@ -502,6 +504,7 @@ def check_if_xl_in_frag(row, alpha, ion_type, fragment, crosslinker):
502504
fragment_number = int(matched_fragments[match].split("+")[0][1:])
503505
fragment_pep_id = 0 if alpha else 1
504506
fragment_mz = match
507+
fragment_theo_mz = matched_fragments_theo[match]
505508
fragment_rel_intensity = float(spectrum["peaks"][match] / spectrum["max_intensity"])
506509
fragment_loss_type = ""
507510
fragment_contains_xl = check_if_xl_in_frag(row, alpha, fragment_type, matched_fragments[match].split(":")[1].strip(), crosslinker)
@@ -511,6 +514,7 @@ def check_if_xl_in_frag(row, alpha, ion_type, fragment, crosslinker):
511514
"FragmentNumber": fragment_number,
512515
"FragmentPepId": fragment_pep_id,
513516
"FragmentMz": fragment_mz,
517+
"FragmentTheoMz": fragment_theo_mz,
514518
"RelativeIntensity": fragment_rel_intensity,
515519
"FragmentLossType": fragment_loss_type,
516520
"CLContainingFragment": fragment_contains_xl,
@@ -797,6 +801,7 @@ def check_if_xl_in_frag(decoy_csm, pep_id, ion_type, ion_number, crosslinker) ->
797801
"FragmentNumber": fragment_number,
798802
"FragmentPepId": fragment_pep_id,
799803
"FragmentMz": fragment_mz,
804+
"FragmentTheoMz": fragment_mz,
800805
"RelativeIntensity": fragment_rel_intensity,
801806
"FragmentLossType": fragment_loss_type,
802807
"CLContainingFragment": fragment_contains_xl,
@@ -1111,6 +1116,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
11111116
FragmentNumber_s = list()
11121117
FragmentPepId_s = list()
11131118
FragmentMz_s = list()
1119+
FragmentTheoMz_s = list()
11141120
RelativeIntensity_s = list()
11151121
FragmentLossType_s = list()
11161122
CLContainingFragment_s = list()
@@ -1144,6 +1150,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
11441150
FragmentNumber_s_decoy = list()
11451151
FragmentPepId_s_decoy = list()
11461152
FragmentMz_s_decoy = list()
1153+
FragmentTheoMz_s_decoy = list()
11471154
RelativeIntensity_s_decoy = list()
11481155
FragmentLossType_s_decoy = list()
11491156
CLContainingFragment_s_decoy = list()
@@ -1177,6 +1184,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
11771184
FragmentNumber_s_decoy_dt = list()
11781185
FragmentPepId_s_decoy_dt = list()
11791186
FragmentMz_s_decoy_dt = list()
1187+
FragmentTheoMz_s_decoy_dt = list()
11801188
RelativeIntensity_s_decoy_dt = list()
11811189
FragmentLossType_s_decoy_dt = list()
11821190
CLContainingFragment_s_decoy_dt = list()
@@ -1210,6 +1218,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
12101218
FragmentNumber_s_decoy_td = list()
12111219
FragmentPepId_s_decoy_td = list()
12121220
FragmentMz_s_decoy_td = list()
1221+
FragmentTheoMz_s_decoy_td = list()
12131222
RelativeIntensity_s_decoy_td = list()
12141223
FragmentLossType_s_decoy_td = list()
12151224
CLContainingFragment_s_decoy_td = list()
@@ -1271,6 +1280,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
12711280
FragmentNumber_s.append(frag["FragmentNumber"])
12721281
FragmentPepId_s.append(frag["FragmentPepId"])
12731282
FragmentMz_s.append(frag["FragmentMz"])
1283+
FragmentTheoMz_s.append(frag["FragmentTheoMz"])
12741284
RelativeIntensity_s.append(frag["RelativeIntensity"])
12751285
FragmentLossType_s.append(frag["FragmentLossType"])
12761286
CLContainingFragment_s.append(frag["CLContainingFragment"])
@@ -1333,6 +1343,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
13331343
FragmentNumber_s_decoy.append(decoy_frag["FragmentNumber"])
13341344
FragmentPepId_s_decoy.append(decoy_frag["FragmentPepId"])
13351345
FragmentMz_s_decoy.append(decoy_frag["FragmentMz"])
1346+
FragmentTheoMz_s_decoy.append(decoy_frag["FragmentTheoMz"])
13361347
RelativeIntensity_s_decoy.append(decoy_frag["RelativeIntensity"])
13371348
FragmentLossType_s_decoy.append(decoy_frag["FragmentLossType"])
13381349
CLContainingFragment_s_decoy.append(decoy_frag["CLContainingFragment"])
@@ -1396,6 +1407,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
13961407
FragmentNumber_s_decoy_dt.append(decoy_frag_dt["FragmentNumber"])
13971408
FragmentPepId_s_decoy_dt.append(decoy_frag_dt["FragmentPepId"])
13981409
FragmentMz_s_decoy_dt.append(decoy_frag_dt["FragmentMz"])
1410+
FragmentTheoMz_s_decoy_dt.append(decoy_frag_dt["FragmentTheoMz"])
13991411
RelativeIntensity_s_decoy_dt.append(decoy_frag_dt["RelativeIntensity"])
14001412
FragmentLossType_s_decoy_dt.append(decoy_frag_dt["FragmentLossType"])
14011413
CLContainingFragment_s_decoy_dt.append(decoy_frag_dt["CLContainingFragment"])
@@ -1459,6 +1471,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
14591471
FragmentNumber_s_decoy_td.append(decoy_frag_td["FragmentNumber"])
14601472
FragmentPepId_s_decoy_td.append(decoy_frag_td["FragmentPepId"])
14611473
FragmentMz_s_decoy_td.append(decoy_frag_td["FragmentMz"])
1474+
FragmentTheoMz_s_decoy_td.append(decoy_frag_td["FragmentTheoMz"])
14621475
RelativeIntensity_s_decoy_td.append(decoy_frag_td["RelativeIntensity"])
14631476
FragmentLossType_s_decoy_td.append(decoy_frag_td["FragmentLossType"])
14641477
CLContainingFragment_s_decoy_td.append(decoy_frag_td["CLContainingFragment"])
@@ -1496,6 +1509,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
14961509
"FragmentNumber": FragmentNumber_s,
14971510
"FragmentPepId": FragmentPepId_s,
14981511
"FragmentMz": FragmentMz_s,
1512+
"FragmentTheoMz": FragmentTheoMz_s,
14991513
"RelativeIntensity": RelativeIntensity_s,
15001514
"FragmentLossType": FragmentLossType_s,
15011515
"CLContainingFragment": CLContainingFragment_s,
@@ -1530,6 +1544,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
15301544
"FragmentNumber": FragmentNumber_s_decoy,
15311545
"FragmentPepId": FragmentPepId_s_decoy,
15321546
"FragmentMz": FragmentMz_s_decoy,
1547+
"FragmentTheoMz": FragmentTheoMz_s_decoy,
15331548
"RelativeIntensity": RelativeIntensity_s_decoy,
15341549
"FragmentLossType": FragmentLossType_s_decoy,
15351550
"CLContainingFragment": CLContainingFragment_s_decoy,
@@ -1564,6 +1579,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
15641579
"FragmentNumber": FragmentNumber_s_decoy_dt,
15651580
"FragmentPepId": FragmentPepId_s_decoy_dt,
15661581
"FragmentMz": FragmentMz_s_decoy_dt,
1582+
"FragmentTheoMz": FragmentTheoMz_s_decoy_dt,
15671583
"RelativeIntensity": RelativeIntensity_s_decoy_dt,
15681584
"FragmentLossType": FragmentLossType_s_decoy_dt,
15691585
"CLContainingFragment": CLContainingFragment_s_decoy_dt,
@@ -1598,6 +1614,7 @@ def main(spectra_file: Union[List[str], List[BinaryIO]] = SPECTRA_FILE,
15981614
"FragmentNumber": FragmentNumber_s_decoy_td,
15991615
"FragmentPepId": FragmentPepId_s_decoy_td,
16001616
"FragmentMz": FragmentMz_s_decoy_td,
1617+
"FragmentTheoMz": FragmentTheoMz_s_decoy_td,
16011618
"RelativeIntensity": RelativeIntensity_s_decoy_td,
16021619
"FragmentLossType": FragmentLossType_s_decoy_td,
16031620
"CLContainingFragment": CLContainingFragment_s_decoy_td,

post_process.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414

1515

1616
# version tracking
17-
__version = "1.2.1"
18-
__date = "2025-07-23"
17+
__version = "1.2.2"
18+
__date = "2025-07-29"
1919

2020
# PARAMETERS
2121

@@ -26,6 +26,7 @@
2626
SPECTRONAUT_MATCH_TOLERANCE = 0.05 # match tolerance in Da
2727
SPECTRONAUT_FRAGMENT_MZ_COLUMN_NAME = "F.CalibratedMz" # which F Mz to use for matching
2828
SPECTRONAUT_CSCORE_COLUMN_NAME = "EG.Cscore" # which Cscore to use for re-soring
29+
SPECTRAL_LIBRARY_FRAGMENT_MZ_COLUMN_NAME = ["FragmentMz", "FragmentTheoMz"][0] # which Spectral Library column to use for fragment matching
2930

3031
# import packages
3132
import argparse
@@ -84,7 +85,7 @@ def read_spectral_library(filename: str) -> Dict[str, Dict[str, Any]]:
8485
else:
8586
index[key]["total_ions_b"] += 1
8687

87-
ion_mz = get_mz_key(float(row["FragmentMz"]))
88+
ion_mz = get_mz_key(float(row[SPECTRAL_LIBRARY_FRAGMENT_MZ_COLUMN_NAME]))
8889
if ion_mz in index[key]["ions"]:
8990
index[key]["ions"][ion_mz].append(row)
9091
else:
@@ -93,7 +94,7 @@ def read_spectral_library(filename: str) -> Dict[str, Dict[str, Any]]:
9394
index[key] = {"rows": [row],
9495
"total_ions_a": 1 if int(row["FragmentPepId"]) == 0 else 0,
9596
"total_ions_b": 1 if int(row["FragmentPepId"]) == 1 else 0,
96-
"ions": {get_mz_key(float(row["FragmentMz"])): [row]}}
97+
"ions": {get_mz_key(float(row[SPECTRAL_LIBRARY_FRAGMENT_MZ_COLUMN_NAME])): [row]}}
9798

9899
return index
99100

@@ -159,7 +160,7 @@ def annotate_spectronaut_result(filename: str) -> pd.DataFrame:
159160

160161
spectronaut = pd.read_csv(filename, sep = SPECTRONAUT_DELIM, low_memory = False)
161162
filepath = os.path.abspath(os.path.dirname(filename))
162-
163+
163164
filename_spec_lib = str(spectronaut["EG.Library"].at[0])
164165
filepath_spec_lib = os.path.join(filepath, filename_spec_lib)
165166
index = read_spectral_library(filepath_spec_lib)

tests/tests-xi.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def test1_spectral_library_exporter():
6565
sl = main()
6666
sl = sl["TargetLib"]
6767

68-
assert sl.shape[0] == 55 and sl.shape[1] == 31
68+
assert sl.shape[0] == 55 and sl.shape[1] == 32
6969

7070
# check values target
7171
def test2_spectral_library_exporter():
@@ -101,7 +101,7 @@ def test3_spectral_library_exporter():
101101
sl = main()
102102
sl = sl["DecoyLib"]
103103

104-
assert sl.shape[0] == 54 and sl.shape[1] == 31
104+
assert sl.shape[0] == 54 and sl.shape[1] == 32
105105

106106
# check values decoy dd
107107
def test4_spectral_library_exporter():
@@ -137,7 +137,7 @@ def test5_spectral_library_exporter():
137137
sl = main()
138138
sl = sl["DecoyLib_DT"]
139139

140-
assert sl.shape[0] == 54 and sl.shape[1] == 31
140+
assert sl.shape[0] == 54 and sl.shape[1] == 32
141141

142142
# check values decoy dt
143143
def test6_spectral_library_exporter():
@@ -173,7 +173,7 @@ def test7_spectral_library_exporter():
173173
sl = main()
174174
sl = sl["DecoyLib_TD"]
175175

176-
assert sl.shape[0] == 55 and sl.shape[1] == 31
176+
assert sl.shape[0] == 55 and sl.shape[1] == 32
177177

178178
# check values decoy td
179179
def test8_spectral_library_exporter():
@@ -209,7 +209,7 @@ def test9_spectral_library_exporter():
209209
sl = main()
210210
sl = sl["FullLib"]
211211

212-
assert sl.shape[0] == 218 and sl.shape[1] == 31
212+
assert sl.shape[0] == 218 and sl.shape[1] == 32
213213
assert sl["DecoyType"].value_counts()["TT"] == 55
214214
assert sl["DecoyType"].value_counts()["TD"] == 55
215215
assert sl["DecoyType"].value_counts()["DD"] == 54

tests/tests.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def test1_spectral_library_exporter():
1313
sl = main()
1414
sl = sl["TargetLib"]
1515

16-
assert sl.shape[0] == 12 and sl.shape[1] == 31
16+
assert sl.shape[0] == 12 and sl.shape[1] == 32
1717

1818
# check values target
1919
def test2_spectral_library_exporter():
@@ -76,7 +76,7 @@ def test3_spectral_library_exporter():
7676
sl = main()
7777
sl = sl["DecoyLib"]
7878

79-
assert sl.shape[0] == 22 and sl.shape[1] == 31
79+
assert sl.shape[0] == 22 and sl.shape[1] == 32
8080

8181
# check values decoy dd
8282
def test4_spectral_library_exporter():
@@ -139,7 +139,7 @@ def test5_spectral_library_exporter():
139139
sl = main()
140140
sl = sl["DecoyLib_DT"]
141141

142-
assert sl.shape[0] == 17 and sl.shape[1] == 31
142+
assert sl.shape[0] == 17 and sl.shape[1] == 32
143143

144144
# check values decoy dt
145145
def test6_spectral_library_exporter():
@@ -202,7 +202,7 @@ def test7_spectral_library_exporter():
202202
sl = main()
203203
sl = sl["DecoyLib_TD"]
204204

205-
assert sl.shape[0] == 17 and sl.shape[1] == 31
205+
assert sl.shape[0] == 17 and sl.shape[1] == 32
206206

207207
# check values decoy td
208208
def test8_spectral_library_exporter():
@@ -265,7 +265,7 @@ def test9_spectral_library_exporter():
265265
sl = main()
266266
sl = sl["FullLib"]
267267

268-
assert sl.shape[0] == 68 and sl.shape[1] == 31
268+
assert sl.shape[0] == 68 and sl.shape[1] == 32
269269

270270
# check values full
271271
def test10_spectral_library_exporter():

0 commit comments

Comments
 (0)