Skip to content

Commit 07bfadc

Browse files
merge mass/chem into annotation
1 parent 4f6874d commit 07bfadc

24 files changed

+2935
-1981
lines changed

src/peptacular/chem/chem_calc.py

Lines changed: 14 additions & 252 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,25 @@
33
sequence with/and modifications.
44
"""
55

6-
import warnings
76
from typing import Union, List, Optional
87

9-
from peptacular.chem.chem_constants import ISOTOPIC_AVERAGINE_MASS
10-
from peptacular.sequence.sequence_funcs import get_annotation_input
11-
from peptacular.types import ChemComposition
12-
from peptacular.proforma.input_convert import ModValue
13-
from peptacular.chem.chem_util import parse_chem_formula, write_chem_formula
14-
from peptacular.mods.mod_db_setup import MONOSACCHARIDES_DB
15-
from peptacular.constants import (
16-
AA_COMPOSITIONS,
8+
from ..proforma_dataclasses import Mod
9+
from ..utils2 import parse_ion_elements
10+
from ..util import parse_isotope_mods
11+
12+
from .chem_constants import ISOTOPIC_AVERAGINE_MASS
13+
from ..types import ChemComposition, ModValue
14+
from .chem_util import parse_chem_formula, write_chem_formula
15+
from ..mods.mod_db_setup import MONOSACCHARIDES_DB
16+
from ..constants import (
1717
AVERAGINE_RATIOS,
18-
NEUTRAL_FRAGMENT_COMPOSITION_ADJUSTMENTS,
19-
FRAGMENT_ION_BASE_CHARGE_ADDUCTS,
2018
)
21-
from peptacular.errors import (
19+
from ..errors import (
2220
InvalidCompositionError,
23-
AmbiguousAminoAcidError,
24-
UnknownAminoAcidError,
2521
DeltaMassCompositionError,
2622
)
27-
from peptacular.glycan import glycan_comp
28-
from peptacular.mods.mod_db import (
23+
from ..glycan import glycan_comp
24+
from ..mods.mod_db import (
2925
parse_unimod_comp,
3026
parse_psi_comp,
3127
is_psi_mod_str,
@@ -37,14 +33,8 @@
3733
is_gno_str,
3834
parse_gno_comp,
3935
)
40-
from peptacular.proforma.proforma_parser import (
41-
parse_static_mods,
42-
ProFormaAnnotation,
43-
Mod,
44-
parse_isotope_mods,
45-
parse_ion_elements,
46-
)
47-
from peptacular.util import convert_type
36+
37+
from ..utils2 import convert_type
4838

4939

5040
def glycan_to_chem(glycan: Union[ChemComposition, str]) -> str:
@@ -369,234 +359,6 @@ def _parse_mod_delta_mass(mod: str) -> Union[float, None]:
369359
return mass
370360

371361

372-
def _sequence_comp(
373-
sequence: Union[str, ProFormaAnnotation],
374-
ion_type: str,
375-
isotope: int = 0,
376-
use_isotope_on_mods: bool = False,
377-
) -> ChemComposition:
378-
"""
379-
Calculate the composition of a sequence.
380-
381-
:param annotation: The sequence or ProForma annotation.
382-
:type annotation: str | ProFormaAnnotation
383-
:param ion_type: The ion type.
384-
:type ion_type: str
385-
:param isotope: The number of Neutrons to add/subtract from the final mass. Default is 0.
386-
:type isotope: int
387-
:param use_isotope_on_mods: If True, the isotope modifications will be applied to the final composition.
388-
Default is False.
389-
:type use_isotope_on_mods: bool
390-
391-
:raises UnknownModificationError: If the modification is unknown.
392-
:raises AmbiguousAminoAcidError: If the sequence contains an ambiguous amino acid.
393-
394-
:return: The composition of the sequence.
395-
:rtype: Dict[str, int | float]
396-
397-
.. code-block:: python
398-
399-
# Calculate the mass of a peptide sequence.
400-
>>> _sequence_comp('PEPTIDE/1', 'y')
401-
{'C': 34, 'H': 54, 'N': 7, 'O': 15, 'e': -1}
402-
403-
>>> _sequence_comp('PEPTIDE/1', 'y', isotope=1)
404-
{'C': 34, 'H': 54, 'N': 7, 'O': 15, 'e': -1, 'n': 1}
405-
406-
>>> _sequence_comp('G/1', 'i')
407-
{'C': 1, 'H': 4, 'N': 1, 'e': -1}
408-
409-
>>> _sequence_comp('PEPTIDE/1', 'b')
410-
{'C': 34, 'H': 52, 'N': 7, 'O': 14, 'e': -1}
411-
412-
>>> _sequence_comp('<H>PEPTIDE/1', 'b')
413-
{'C': 34, 'H': 52, 'N': 7, 'O': 14, 'e': -1}
414-
415-
>>> _sequence_comp('{Unimod:2}PEPTIDE', 'p')
416-
{'C': 34, 'H': 54, 'N': 8, 'O': 14}
417-
418-
>>> _sequence_comp('<13C>PEPTIDE/1', 'b')
419-
{'H': 52, 'N': 7, 'O': 14, 'e': -1, '13C': 34}
420-
421-
>>> _sequence_comp('PEPTIDE[Unimod:2]/1', 'y')
422-
{'C': 34, 'H': 55, 'N': 8, 'O': 14, 'e': -1}
423-
424-
>>> _sequence_comp('<[Unimod:2]@T>PEPTIDE/1', 'y')
425-
{'C': 34, 'H': 55, 'N': 8, 'O': 14, 'e': -1}
426-
427-
>>> _sequence_comp('<[Unimod:2]@N-Term>PEPTIDE/1', 'y')
428-
{'C': 34, 'H': 55, 'N': 8, 'O': 14, 'e': -1}
429-
430-
>>> _sequence_comp('PEPTIDE/2', 'p')
431-
{'C': 34, 'H': 55, 'N': 7, 'O': 15, 'e': -2}
432-
433-
>>> _sequence_comp('PEPTIDE/2[+2Na+]', 'p')
434-
{'C': 34, 'H': 53, 'N': 7, 'O': 15, 'Na': 2, 'e': -2}
435-
436-
>>> _sequence_comp('<13C>PEPTIDE[Formula:C10]', 'p')
437-
{'H': 53, 'N': 7, 'O': 15, '13C': 34, 'C': 10}
438-
439-
>>> _sequence_comp('<13C>PEPTIDE[Formula:C10]', 'p', use_isotope_on_mods=True)
440-
{'H': 53, 'N': 7, 'O': 15, '13C': 44}
441-
442-
>>> _sequence_comp('<13C>PEPTIDE[Unimod:213413]', 'b')
443-
Traceback (most recent call last):
444-
peptacular.errors.UnknownModificationError: Unknown modification: Unimod:213413
445-
446-
>>> _sequence_comp('I', 'by')
447-
{'C': 6, 'H': 11, 'N': 1, 'O': 1}
448-
449-
# Ambiguous amino acid
450-
>>> _sequence_comp('B', 'by') # doctest: +ELLIPSIS
451-
Traceback (most recent call last):
452-
...
453-
peptacular.errors.AmbiguousAminoAcidError: Ambiguous amino acid: B! Cannot determine the composition ...
454-
455-
"""
456-
annotation = get_annotation_input(sequence, copy=True)
457-
458-
# If charge is not provided, set it to 0
459-
charge = 0
460-
if annotation.has_charge():
461-
charge = annotation.charge
462-
463-
# if charge_adducts is not provided, set it to None
464-
charge_adducts = None
465-
if annotation.has_charge_adducts():
466-
charge_adducts = annotation.charge_adducts[0]
467-
468-
if charge_adducts is None:
469-
if ion_type in ("p", "n"):
470-
charge_adducts = f"{charge}H+"
471-
else:
472-
charge_adducts = (
473-
f"{charge-1}H+,{FRAGMENT_ION_BASE_CHARGE_ADDUCTS[ion_type]}"
474-
)
475-
476-
if ion_type not in ("p", "n"):
477-
if charge == 0:
478-
warnings.warn(
479-
"Calculating the comp of a fragment ion with charge state 0. Fragment ions should have a "
480-
"charge state greater than 0 since the neutral form doesnt exist."
481-
)
482-
483-
if "B" in annotation.sequence:
484-
raise AmbiguousAminoAcidError(
485-
"B",
486-
"Cannot determine the composition of a sequence with an ambiguous amino acid.",
487-
)
488-
489-
if "Z" in annotation.sequence:
490-
raise AmbiguousAminoAcidError(
491-
"Z",
492-
"Cannot determine the composition of a sequence with an ambiguous amino acid.",
493-
)
494-
495-
# Get the composition of the base sequence
496-
sequence_composition = {}
497-
for aa in annotation.sequence:
498-
try:
499-
aa_comp = AA_COMPOSITIONS[aa]
500-
except KeyError as err:
501-
raise UnknownAminoAcidError(aa) from err
502-
for k, v in aa_comp.items():
503-
sequence_composition[k] = sequence_composition.get(k, 0) + v
504-
505-
# Apply the adjustments for the neutral fragment composition based on strictly the ion dissociation points.
506-
for k, v in NEUTRAL_FRAGMENT_COMPOSITION_ADJUSTMENTS[ion_type].items():
507-
sequence_composition[k] = sequence_composition.get(k, 0) + v
508-
509-
charge_adduct_comp = _parse_charge_adducts_comp(charge_adducts)
510-
511-
for k, v in charge_adduct_comp.items():
512-
sequence_composition[k] = sequence_composition.get(k, 0) + v
513-
514-
mod_composition = {}
515-
if annotation.has_unknown_mods():
516-
for unknown_mod in annotation.unknown_mods:
517-
for k, v in mod_comp(unknown_mod).items():
518-
mod_composition[k] = mod_composition.get(k, 0) + v
519-
520-
if annotation.has_intervals():
521-
for interval in annotation.intervals:
522-
if interval.has_mods():
523-
for interval_mod in interval.mods:
524-
for k, v in mod_comp(interval_mod).items():
525-
mod_composition[k] = mod_composition.get(k, 0) + v
526-
527-
if annotation.has_labile_mods() and ion_type == "p":
528-
for labile_mod in annotation.labile_mods:
529-
for k, v in mod_comp(labile_mod).items():
530-
mod_composition[k] = mod_composition.get(k, 0) + v
531-
532-
if annotation.has_nterm_mods():
533-
for nterm_mod in annotation.nterm_mods:
534-
for k, v in mod_comp(nterm_mod).items():
535-
mod_composition[k] = mod_composition.get(k, 0) + v
536-
537-
if annotation.has_cterm_mods():
538-
for cterm_mod in annotation.cterm_mods:
539-
for k, v in mod_comp(cterm_mod).items():
540-
mod_composition[k] = mod_composition.get(k, 0) + v
541-
542-
if annotation.has_internal_mods():
543-
for _, internal_mods in annotation.internal_mods.items():
544-
for internal_mod in internal_mods:
545-
for k, v in mod_comp(internal_mod).items():
546-
mod_composition[k] = mod_composition.get(k, 0) + v
547-
548-
if annotation.has_static_mods():
549-
static_map = parse_static_mods(annotation.static_mods)
550-
551-
n_term_mod = static_map.get("N-Term")
552-
if n_term_mod is not None:
553-
for m in n_term_mod:
554-
for k, v in mod_comp(m.val).items():
555-
mod_composition[k] = mod_composition.get(k, 0) + v
556-
557-
c_term_mod = static_map.get("C-Term")
558-
if c_term_mod is not None:
559-
for m in c_term_mod:
560-
for k, v in mod_comp(m.val).items():
561-
mod_composition[k] = mod_composition.get(k, 0) + v
562-
563-
for aa, mod in static_map.items():
564-
if aa in ["N-Term", "C-Term"]:
565-
continue
566-
567-
aa_count = annotation.sequence.count(aa)
568-
for m in mod:
569-
for k, v in mod_comp(m.val).items():
570-
mod_composition[k] = mod_composition.get(k, 0) + v * aa_count
571-
572-
mod_composition["n"] = mod_composition.get("n", 0) + isotope
573-
574-
# Apply isotopic mods
575-
if annotation.has_isotope_mods():
576-
if use_isotope_on_mods:
577-
sequence_composition = apply_isotope_mods_to_composition(
578-
sequence_composition, annotation.isotope_mods
579-
)
580-
mod_composition = apply_isotope_mods_to_composition(
581-
mod_composition, annotation.isotope_mods
582-
)
583-
else:
584-
sequence_composition = apply_isotope_mods_to_composition(
585-
sequence_composition, annotation.isotope_mods
586-
)
587-
588-
composition = {}
589-
for k, v in sequence_composition.items():
590-
composition[k] = composition.get(k, 0) + v
591-
592-
for k, v in mod_composition.items():
593-
composition[k] = composition.get(k, 0) + v
594-
595-
composition = {k: v for k, v in composition.items() if v != 0}
596-
597-
return composition
598-
599-
600362
def _parse_mod_delta_mass_only(mod: Union[str, Mod]) -> Union[float, None]:
601363
"""
602364
Parse a modification string.

src/peptacular/chem/chem_constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
from typing import Dict
66

7-
from peptacular.chem.chem_util import chem_mass
8-
from peptacular.constants import (
7+
from ..chem.chem_util import chem_mass
8+
from ..constants import (
99
NEUTRAL_FRAGMENT_COMPOSITION_ADJUSTMENTS,
1010
ISOTOPIC_ATOMIC_MASSES,
1111
AA_COMPOSITIONS,

src/peptacular/chem/chem_util.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from typing import Union, Optional, List
66

7-
from peptacular.constants import (
7+
from ..constants import (
88
HILL_ORDER,
99
ISOTOPIC_ATOMIC_MASSES,
1010
ELECTRON_MASS,
@@ -14,10 +14,9 @@
1414
ISOTOPE_COMPONENT_PATTERN,
1515
CONDENSED_CHEM_FORMULA_PATTERN,
1616
)
17-
from peptacular.errors import InvalidChemFormulaError
18-
from peptacular.util import convert_type
19-
20-
from peptacular.types import ChemComposition
17+
from ..errors import InvalidChemFormulaError
18+
from ..utils2 import convert_type
19+
from ..types import ChemComposition
2120

2221

2322
def parse_chem_formula(formula: str, sep: str = "") -> ChemComposition:

src/peptacular/constants.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,23 @@
1717
map_atomic_number_to_comp,
1818
map_atomic_number_to_symbol,
1919
)
20-
from .util import merge_dicts
20+
21+
22+
def merge_dicts(d1: Dict, d2: Dict) -> Dict:
23+
"""
24+
Merge two dictionaries. And remove any keys with value 0.
25+
"""
26+
d = {}
27+
for k, v in d1.items():
28+
d[k] = v
29+
for k, v in d2.items():
30+
d[k] = d.get(k, 0) + v
31+
32+
# remove any keys with value 0
33+
d = {k: v for k, v in d.items() if v != 0}
34+
35+
return d
36+
2137

2238
# Partical masses
2339
PROTON_MASS = 1.00727646688

src/peptacular/fragmentation.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
INTERNAL_ION_TYPES,
1616
TERMINAL_ION_TYPES,
1717
)
18-
from .mass_calc import mass, adjust_mass, adjust_mz
18+
from .mass_calc import adjust_mass, adjust_mz
1919
from .sequence.sequence_funcs import get_annotation_input, sequence_length
2020
from .spans import (
2121
build_non_enzymatic_spans,
@@ -638,8 +638,8 @@ def fragment(
638638
if _mass_components is None:
639639
components = annotation.split()
640640
_mass_components = [
641-
mass(sequence=component, charge=0, ion_type="n", monoisotopic=monoisotopic)
642-
for component in components
641+
c.add_charge(0).mass(ion_type="n", monoisotopic=monoisotopic)
642+
for c in components
643643
]
644644

645645
frags = []
@@ -706,8 +706,7 @@ def __init__(
706706

707707
self.components = self.annotation.split()
708708
self.mass_components = [
709-
mass(
710-
sequence=component,
709+
component.mass(
711710
charge=0,
712711
ion_type="n",
713712
monoisotopic=self.monoisotopic,

0 commit comments

Comments
 (0)