hgb-bin-proteomics
diff --git a/‎CITATION.cff
Lines changed: 0 additions & 28 deletions b/‎CITATION.cff
Lines changed: 0 additions & 28 deletions
diff --git a/‎create_spectral_library.py
Lines changed: 31 additions & 22 deletions b/‎create_spectral_library.py
Lines changed: 31 additions & 22 deletions
@@ -6,8 +6,8 @@
 # micha.birklbauer@gmail.com
 
 # version tracking
-__version = "1.4.9"
-__date = "2025-02-28"
+__version = "1.4.10"
+__date = "2025-03-26"
 
 # REQUIREMENTS
 # pip install pandas
@@ -98,30 +98,39 @@ def xi_get_sequence(row: pd.Series, alpha: bool = True) -> str:
                 seq_a += aa
         return seq_a
 
+    def xi_parse_modifications_from_seq(sequence: str) -> Dict[int, str]:
+        modifications = dict()
+        pos = 0
+        current_mod = ""
+        for i, aa in enumerate(str(sequence).strip()):
+            if aa.isupper():
+                pos += 1
+                current_mod = aa
+            else:
+                current_mod += aa
+                if i + 1 >= len(sequence):
+                    if pos in modifications:
+                        raise RuntimeError(f"Modification at position {pos} already exists!")
+                    modifications[pos] = current_mod
+                elif sequence[i + 1].isupper():
+                    if pos in modifications:
+                        raise RuntimeError(f"Modification at position {pos} already exists!")
+                    modifications[pos] = current_mod
+        return modifications
+
     def xi_get_modifications(row: pd.Series, alpha: bool = True) -> str:
         seq = str(row["PepSeq1"]).strip() if alpha else str(row["PepSeq2"]).strip()
         clean_seq = xi_get_sequence(row, alpha)
         xl_pos = int(row["LinkPos1"]) if alpha else int(row["LinkPos2"])
-
-        if len(MODIFICATIONS_XI) > 10:
-            msg = "Found more than 10 possible modifications for xi. " + \
-                  "Maximum number of modifications supported is 10. " + \
-                  "Please update MODIFICATIONS_XI in the config file!"
-            raise RuntimeError(msg)
-
-        mod_map = dict()
-        mod_map_rev = dict()
-        for i, key in enumerate(MODIFICATIONS_XI.keys()):
-            mod_map[str(i)] = key
-            mod_map_rev[key] = str(i)
-
-        for mod in MODIFICATIONS_XI.keys():
-            seq = seq.replace(mod, mod_map_rev[mod])
+        mods = xi_parse_modifications_from_seq(seq)
 
         mod_str = ""
-        for i, aa in enumerate(seq):
-            if aa in mod_map:
-                mod_str += f"{MODIFICATIONS_XI[mod_map[aa]][0]}{i+1}({MODIFICATIONS_XI[mod_map[aa]][1]});"
+        for mod in mods.items():
+            mod_pos = mod[0]
+            mod_xi_key = mod[1]
+            mod_aa = MODIFICATIONS_XI[mod_xi_key][0]
+            mod_text = MODIFICATIONS_XI[mod_xi_key][1]
+            mod_str += f"{mod_aa}{mod_pos}({mod_text});"
 
         mod_str += f"{clean_seq[xl_pos-1]}{xl_pos}({str(row['Crosslinker']).strip()})"
 
@@ -148,8 +157,8 @@ def xi_get_score(row: pd.Series) -> float:
         ms_annika_struc["Modifications B"].append(xi_get_modifications(row, False))
         ms_annika_struc["First Scan"].append(int(row["scan"]))
         ms_annika_struc["Spectrum File"].append(str(row["PeakListFileName"]).strip())
-        ms_annika_struc["A in protein"].append(int(row["PepPos1"])-1)
-        ms_annika_struc["B in protein"].append(int(row["PepPos2"])-1)
+        ms_annika_struc["A in protein"].append(";".join([str(int(pos)-1) for pos in str(row["PepPos1"]).split(";")]))
+        ms_annika_struc["B in protein"].append(";".join([str(int(pos)-1) for pos in str(row["PepPos2"]).split(";")]))
         ms_annika_struc["Crosslinker Position A"].append(int(row["LinkPos1"]))
         ms_annika_struc["Crosslinker Position B"].append(int(row["LinkPos2"]))
         ms_annika_struc["Accession A"].append(str(row["Protein1"]).strip())