6
6
# micha.birklbauer@gmail.com
7
7
8
8
# version tracking
9
- __version = "1.4.9 "
10
- __date = "2025-02-28 "
9
+ __version = "1.4.10 "
10
+ __date = "2025-03-26 "
11
11
12
12
# REQUIREMENTS
13
13
# pip install pandas
@@ -98,30 +98,39 @@ def xi_get_sequence(row: pd.Series, alpha: bool = True) -> str:
98
98
seq_a += aa
99
99
return seq_a
100
100
101
+ def xi_parse_modifications_from_seq (sequence : str ) -> Dict [int , str ]:
102
+ modifications = dict ()
103
+ pos = 0
104
+ current_mod = ""
105
+ for i , aa in enumerate (str (sequence ).strip ()):
106
+ if aa .isupper ():
107
+ pos += 1
108
+ current_mod = aa
109
+ else :
110
+ current_mod += aa
111
+ if i + 1 >= len (sequence ):
112
+ if pos in modifications :
113
+ raise RuntimeError (f"Modification at position { pos } already exists!" )
114
+ modifications [pos ] = current_mod
115
+ elif sequence [i + 1 ].isupper ():
116
+ if pos in modifications :
117
+ raise RuntimeError (f"Modification at position { pos } already exists!" )
118
+ modifications [pos ] = current_mod
119
+ return modifications
120
+
101
121
def xi_get_modifications (row : pd .Series , alpha : bool = True ) -> str :
102
122
seq = str (row ["PepSeq1" ]).strip () if alpha else str (row ["PepSeq2" ]).strip ()
103
123
clean_seq = xi_get_sequence (row , alpha )
104
124
xl_pos = int (row ["LinkPos1" ]) if alpha else int (row ["LinkPos2" ])
105
-
106
- if len (MODIFICATIONS_XI ) > 10 :
107
- msg = "Found more than 10 possible modifications for xi. " + \
108
- "Maximum number of modifications supported is 10. " + \
109
- "Please update MODIFICATIONS_XI in the config file!"
110
- raise RuntimeError (msg )
111
-
112
- mod_map = dict ()
113
- mod_map_rev = dict ()
114
- for i , key in enumerate (MODIFICATIONS_XI .keys ()):
115
- mod_map [str (i )] = key
116
- mod_map_rev [key ] = str (i )
117
-
118
- for mod in MODIFICATIONS_XI .keys ():
119
- seq = seq .replace (mod , mod_map_rev [mod ])
125
+ mods = xi_parse_modifications_from_seq (seq )
120
126
121
127
mod_str = ""
122
- for i , aa in enumerate (seq ):
123
- if aa in mod_map :
124
- mod_str += f"{ MODIFICATIONS_XI [mod_map [aa ]][0 ]} { i + 1 } ({ MODIFICATIONS_XI [mod_map [aa ]][1 ]} );"
128
+ for mod in mods .items ():
129
+ mod_pos = mod [0 ]
130
+ mod_xi_key = mod [1 ]
131
+ mod_aa = MODIFICATIONS_XI [mod_xi_key ][0 ]
132
+ mod_text = MODIFICATIONS_XI [mod_xi_key ][1 ]
133
+ mod_str += f"{ mod_aa } { mod_pos } ({ mod_text } );"
125
134
126
135
mod_str += f"{ clean_seq [xl_pos - 1 ]} { xl_pos } ({ str (row ['Crosslinker' ]).strip ()} )"
127
136
@@ -148,8 +157,8 @@ def xi_get_score(row: pd.Series) -> float:
148
157
ms_annika_struc ["Modifications B" ].append (xi_get_modifications (row , False ))
149
158
ms_annika_struc ["First Scan" ].append (int (row ["scan" ]))
150
159
ms_annika_struc ["Spectrum File" ].append (str (row ["PeakListFileName" ]).strip ())
151
- ms_annika_struc ["A in protein" ].append (int (row ["PepPos1" ])- 1 )
152
- ms_annika_struc ["B in protein" ].append (int (row ["PepPos2" ])- 1 )
160
+ ms_annika_struc ["A in protein" ].append (";" . join ([ str ( int (pos ) - 1 ) for pos in str ( row ["PepPos1" ]). split ( ";" )]) )
161
+ ms_annika_struc ["B in protein" ].append (";" . join ([ str ( int (pos ) - 1 ) for pos in str ( row ["PepPos2" ]). split ( ";" )]) )
153
162
ms_annika_struc ["Crosslinker Position A" ].append (int (row ["LinkPos1" ]))
154
163
ms_annika_struc ["Crosslinker Position B" ].append (int (row ["LinkPos2" ]))
155
164
ms_annika_struc ["Accession A" ].append (str (row ["Protein1" ]).strip ())
0 commit comments