Skip to content

Commit 2024953

Browse files
Merge pull request #33 from hgb-bin-proteomics/develop
Fix for #31 and #32
2 parents ded6465 + 9b292bd commit 2024953

File tree

2 files changed

+31
-50
lines changed

2 files changed

+31
-50
lines changed

CITATION.cff

Lines changed: 0 additions & 28 deletions
This file was deleted.

create_spectral_library.py

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
# micha.birklbauer@gmail.com
77

88
# version tracking
9-
__version = "1.4.9"
10-
__date = "2025-02-28"
9+
__version = "1.4.10"
10+
__date = "2025-03-26"
1111

1212
# REQUIREMENTS
1313
# pip install pandas
@@ -98,30 +98,39 @@ def xi_get_sequence(row: pd.Series, alpha: bool = True) -> str:
9898
seq_a += aa
9999
return seq_a
100100

101+
def xi_parse_modifications_from_seq(sequence: str) -> Dict[int, str]:
102+
modifications = dict()
103+
pos = 0
104+
current_mod = ""
105+
for i, aa in enumerate(str(sequence).strip()):
106+
if aa.isupper():
107+
pos += 1
108+
current_mod = aa
109+
else:
110+
current_mod += aa
111+
if i + 1 >= len(sequence):
112+
if pos in modifications:
113+
raise RuntimeError(f"Modification at position {pos} already exists!")
114+
modifications[pos] = current_mod
115+
elif sequence[i + 1].isupper():
116+
if pos in modifications:
117+
raise RuntimeError(f"Modification at position {pos} already exists!")
118+
modifications[pos] = current_mod
119+
return modifications
120+
101121
def xi_get_modifications(row: pd.Series, alpha: bool = True) -> str:
102122
seq = str(row["PepSeq1"]).strip() if alpha else str(row["PepSeq2"]).strip()
103123
clean_seq = xi_get_sequence(row, alpha)
104124
xl_pos = int(row["LinkPos1"]) if alpha else int(row["LinkPos2"])
105-
106-
if len(MODIFICATIONS_XI) > 10:
107-
msg = "Found more than 10 possible modifications for xi. " + \
108-
"Maximum number of modifications supported is 10. " + \
109-
"Please update MODIFICATIONS_XI in the config file!"
110-
raise RuntimeError(msg)
111-
112-
mod_map = dict()
113-
mod_map_rev = dict()
114-
for i, key in enumerate(MODIFICATIONS_XI.keys()):
115-
mod_map[str(i)] = key
116-
mod_map_rev[key] = str(i)
117-
118-
for mod in MODIFICATIONS_XI.keys():
119-
seq = seq.replace(mod, mod_map_rev[mod])
125+
mods = xi_parse_modifications_from_seq(seq)
120126

121127
mod_str = ""
122-
for i, aa in enumerate(seq):
123-
if aa in mod_map:
124-
mod_str += f"{MODIFICATIONS_XI[mod_map[aa]][0]}{i+1}({MODIFICATIONS_XI[mod_map[aa]][1]});"
128+
for mod in mods.items():
129+
mod_pos = mod[0]
130+
mod_xi_key = mod[1]
131+
mod_aa = MODIFICATIONS_XI[mod_xi_key][0]
132+
mod_text = MODIFICATIONS_XI[mod_xi_key][1]
133+
mod_str += f"{mod_aa}{mod_pos}({mod_text});"
125134

126135
mod_str += f"{clean_seq[xl_pos-1]}{xl_pos}({str(row['Crosslinker']).strip()})"
127136

@@ -148,8 +157,8 @@ def xi_get_score(row: pd.Series) -> float:
148157
ms_annika_struc["Modifications B"].append(xi_get_modifications(row, False))
149158
ms_annika_struc["First Scan"].append(int(row["scan"]))
150159
ms_annika_struc["Spectrum File"].append(str(row["PeakListFileName"]).strip())
151-
ms_annika_struc["A in protein"].append(int(row["PepPos1"])-1)
152-
ms_annika_struc["B in protein"].append(int(row["PepPos2"])-1)
160+
ms_annika_struc["A in protein"].append(";".join([str(int(pos)-1) for pos in str(row["PepPos1"]).split(";")]))
161+
ms_annika_struc["B in protein"].append(";".join([str(int(pos)-1) for pos in str(row["PepPos2"]).split(";")]))
153162
ms_annika_struc["Crosslinker Position A"].append(int(row["LinkPos1"]))
154163
ms_annika_struc["Crosslinker Position B"].append(int(row["LinkPos2"]))
155164
ms_annika_struc["Accession A"].append(str(row["Protein1"]).strip())

0 commit comments

Comments
 (0)