|
3 | 3 | sequence with/and modifications.
|
4 | 4 | """
|
5 | 5 |
|
6 |
| -import warnings |
7 | 6 | from typing import Union, List, Optional
|
8 | 7 |
|
9 |
| -from peptacular.chem.chem_constants import ISOTOPIC_AVERAGINE_MASS |
10 |
| -from peptacular.sequence.sequence_funcs import get_annotation_input |
11 |
| -from peptacular.types import ChemComposition |
12 |
| -from peptacular.proforma.input_convert import ModValue |
13 |
| -from peptacular.chem.chem_util import parse_chem_formula, write_chem_formula |
14 |
| -from peptacular.mods.mod_db_setup import MONOSACCHARIDES_DB |
15 |
| -from peptacular.constants import ( |
16 |
| - AA_COMPOSITIONS, |
| 8 | +from ..proforma_dataclasses import Mod |
| 9 | +from ..utils2 import parse_ion_elements |
| 10 | +from ..util import parse_isotope_mods |
| 11 | + |
| 12 | +from .chem_constants import ISOTOPIC_AVERAGINE_MASS |
| 13 | +from ..types import ChemComposition, ModValue |
| 14 | +from .chem_util import parse_chem_formula, write_chem_formula |
| 15 | +from ..mods.mod_db_setup import MONOSACCHARIDES_DB |
| 16 | +from ..constants import ( |
17 | 17 | AVERAGINE_RATIOS,
|
18 |
| - NEUTRAL_FRAGMENT_COMPOSITION_ADJUSTMENTS, |
19 |
| - FRAGMENT_ION_BASE_CHARGE_ADDUCTS, |
20 | 18 | )
|
21 |
| -from peptacular.errors import ( |
| 19 | +from ..errors import ( |
22 | 20 | InvalidCompositionError,
|
23 |
| - AmbiguousAminoAcidError, |
24 |
| - UnknownAminoAcidError, |
25 | 21 | DeltaMassCompositionError,
|
26 | 22 | )
|
27 |
| -from peptacular.glycan import glycan_comp |
28 |
| -from peptacular.mods.mod_db import ( |
| 23 | +from ..glycan import glycan_comp |
| 24 | +from ..mods.mod_db import ( |
29 | 25 | parse_unimod_comp,
|
30 | 26 | parse_psi_comp,
|
31 | 27 | is_psi_mod_str,
|
|
37 | 33 | is_gno_str,
|
38 | 34 | parse_gno_comp,
|
39 | 35 | )
|
40 |
| -from peptacular.proforma.proforma_parser import ( |
41 |
| - parse_static_mods, |
42 |
| - ProFormaAnnotation, |
43 |
| - Mod, |
44 |
| - parse_isotope_mods, |
45 |
| - parse_ion_elements, |
46 |
| -) |
47 |
| -from peptacular.util import convert_type |
| 36 | + |
| 37 | +from ..utils2 import convert_type |
48 | 38 |
|
49 | 39 |
|
50 | 40 | def glycan_to_chem(glycan: Union[ChemComposition, str]) -> str:
|
@@ -369,234 +359,6 @@ def _parse_mod_delta_mass(mod: str) -> Union[float, None]:
|
369 | 359 | return mass
|
370 | 360 |
|
371 | 361 |
|
372 |
| -def _sequence_comp( |
373 |
| - sequence: Union[str, ProFormaAnnotation], |
374 |
| - ion_type: str, |
375 |
| - isotope: int = 0, |
376 |
| - use_isotope_on_mods: bool = False, |
377 |
| -) -> ChemComposition: |
378 |
| - """ |
379 |
| - Calculate the composition of a sequence. |
380 |
| -
|
381 |
| - :param annotation: The sequence or ProForma annotation. |
382 |
| - :type annotation: str | ProFormaAnnotation |
383 |
| - :param ion_type: The ion type. |
384 |
| - :type ion_type: str |
385 |
| - :param isotope: The number of Neutrons to add/subtract from the final mass. Default is 0. |
386 |
| - :type isotope: int |
387 |
| - :param use_isotope_on_mods: If True, the isotope modifications will be applied to the final composition. |
388 |
| - Default is False. |
389 |
| - :type use_isotope_on_mods: bool |
390 |
| -
|
391 |
| - :raises UnknownModificationError: If the modification is unknown. |
392 |
| - :raises AmbiguousAminoAcidError: If the sequence contains an ambiguous amino acid. |
393 |
| -
|
394 |
| - :return: The composition of the sequence. |
395 |
| - :rtype: Dict[str, int | float] |
396 |
| -
|
397 |
| - .. code-block:: python |
398 |
| -
|
399 |
| - # Calculate the mass of a peptide sequence. |
400 |
| - >>> _sequence_comp('PEPTIDE/1', 'y') |
401 |
| - {'C': 34, 'H': 54, 'N': 7, 'O': 15, 'e': -1} |
402 |
| -
|
403 |
| - >>> _sequence_comp('PEPTIDE/1', 'y', isotope=1) |
404 |
| - {'C': 34, 'H': 54, 'N': 7, 'O': 15, 'e': -1, 'n': 1} |
405 |
| -
|
406 |
| - >>> _sequence_comp('G/1', 'i') |
407 |
| - {'C': 1, 'H': 4, 'N': 1, 'e': -1} |
408 |
| -
|
409 |
| - >>> _sequence_comp('PEPTIDE/1', 'b') |
410 |
| - {'C': 34, 'H': 52, 'N': 7, 'O': 14, 'e': -1} |
411 |
| -
|
412 |
| - >>> _sequence_comp('<H>PEPTIDE/1', 'b') |
413 |
| - {'C': 34, 'H': 52, 'N': 7, 'O': 14, 'e': -1} |
414 |
| -
|
415 |
| - >>> _sequence_comp('{Unimod:2}PEPTIDE', 'p') |
416 |
| - {'C': 34, 'H': 54, 'N': 8, 'O': 14} |
417 |
| -
|
418 |
| - >>> _sequence_comp('<13C>PEPTIDE/1', 'b') |
419 |
| - {'H': 52, 'N': 7, 'O': 14, 'e': -1, '13C': 34} |
420 |
| -
|
421 |
| - >>> _sequence_comp('PEPTIDE[Unimod:2]/1', 'y') |
422 |
| - {'C': 34, 'H': 55, 'N': 8, 'O': 14, 'e': -1} |
423 |
| -
|
424 |
| - >>> _sequence_comp('<[Unimod:2]@T>PEPTIDE/1', 'y') |
425 |
| - {'C': 34, 'H': 55, 'N': 8, 'O': 14, 'e': -1} |
426 |
| -
|
427 |
| - >>> _sequence_comp('<[Unimod:2]@N-Term>PEPTIDE/1', 'y') |
428 |
| - {'C': 34, 'H': 55, 'N': 8, 'O': 14, 'e': -1} |
429 |
| -
|
430 |
| - >>> _sequence_comp('PEPTIDE/2', 'p') |
431 |
| - {'C': 34, 'H': 55, 'N': 7, 'O': 15, 'e': -2} |
432 |
| -
|
433 |
| - >>> _sequence_comp('PEPTIDE/2[+2Na+]', 'p') |
434 |
| - {'C': 34, 'H': 53, 'N': 7, 'O': 15, 'Na': 2, 'e': -2} |
435 |
| -
|
436 |
| - >>> _sequence_comp('<13C>PEPTIDE[Formula:C10]', 'p') |
437 |
| - {'H': 53, 'N': 7, 'O': 15, '13C': 34, 'C': 10} |
438 |
| -
|
439 |
| - >>> _sequence_comp('<13C>PEPTIDE[Formula:C10]', 'p', use_isotope_on_mods=True) |
440 |
| - {'H': 53, 'N': 7, 'O': 15, '13C': 44} |
441 |
| -
|
442 |
| - >>> _sequence_comp('<13C>PEPTIDE[Unimod:213413]', 'b') |
443 |
| - Traceback (most recent call last): |
444 |
| - peptacular.errors.UnknownModificationError: Unknown modification: Unimod:213413 |
445 |
| -
|
446 |
| - >>> _sequence_comp('I', 'by') |
447 |
| - {'C': 6, 'H': 11, 'N': 1, 'O': 1} |
448 |
| -
|
449 |
| - # Ambiguous amino acid |
450 |
| - >>> _sequence_comp('B', 'by') # doctest: +ELLIPSIS |
451 |
| - Traceback (most recent call last): |
452 |
| - ... |
453 |
| - peptacular.errors.AmbiguousAminoAcidError: Ambiguous amino acid: B! Cannot determine the composition ... |
454 |
| -
|
455 |
| - """ |
456 |
| - annotation = get_annotation_input(sequence, copy=True) |
457 |
| - |
458 |
| - # If charge is not provided, set it to 0 |
459 |
| - charge = 0 |
460 |
| - if annotation.has_charge(): |
461 |
| - charge = annotation.charge |
462 |
| - |
463 |
| - # if charge_adducts is not provided, set it to None |
464 |
| - charge_adducts = None |
465 |
| - if annotation.has_charge_adducts(): |
466 |
| - charge_adducts = annotation.charge_adducts[0] |
467 |
| - |
468 |
| - if charge_adducts is None: |
469 |
| - if ion_type in ("p", "n"): |
470 |
| - charge_adducts = f"{charge}H+" |
471 |
| - else: |
472 |
| - charge_adducts = ( |
473 |
| - f"{charge-1}H+,{FRAGMENT_ION_BASE_CHARGE_ADDUCTS[ion_type]}" |
474 |
| - ) |
475 |
| - |
476 |
| - if ion_type not in ("p", "n"): |
477 |
| - if charge == 0: |
478 |
| - warnings.warn( |
479 |
| - "Calculating the comp of a fragment ion with charge state 0. Fragment ions should have a " |
480 |
| - "charge state greater than 0 since the neutral form doesnt exist." |
481 |
| - ) |
482 |
| - |
483 |
| - if "B" in annotation.sequence: |
484 |
| - raise AmbiguousAminoAcidError( |
485 |
| - "B", |
486 |
| - "Cannot determine the composition of a sequence with an ambiguous amino acid.", |
487 |
| - ) |
488 |
| - |
489 |
| - if "Z" in annotation.sequence: |
490 |
| - raise AmbiguousAminoAcidError( |
491 |
| - "Z", |
492 |
| - "Cannot determine the composition of a sequence with an ambiguous amino acid.", |
493 |
| - ) |
494 |
| - |
495 |
| - # Get the composition of the base sequence |
496 |
| - sequence_composition = {} |
497 |
| - for aa in annotation.sequence: |
498 |
| - try: |
499 |
| - aa_comp = AA_COMPOSITIONS[aa] |
500 |
| - except KeyError as err: |
501 |
| - raise UnknownAminoAcidError(aa) from err |
502 |
| - for k, v in aa_comp.items(): |
503 |
| - sequence_composition[k] = sequence_composition.get(k, 0) + v |
504 |
| - |
505 |
| - # Apply the adjustments for the neutral fragment composition based on strictly the ion dissociation points. |
506 |
| - for k, v in NEUTRAL_FRAGMENT_COMPOSITION_ADJUSTMENTS[ion_type].items(): |
507 |
| - sequence_composition[k] = sequence_composition.get(k, 0) + v |
508 |
| - |
509 |
| - charge_adduct_comp = _parse_charge_adducts_comp(charge_adducts) |
510 |
| - |
511 |
| - for k, v in charge_adduct_comp.items(): |
512 |
| - sequence_composition[k] = sequence_composition.get(k, 0) + v |
513 |
| - |
514 |
| - mod_composition = {} |
515 |
| - if annotation.has_unknown_mods(): |
516 |
| - for unknown_mod in annotation.unknown_mods: |
517 |
| - for k, v in mod_comp(unknown_mod).items(): |
518 |
| - mod_composition[k] = mod_composition.get(k, 0) + v |
519 |
| - |
520 |
| - if annotation.has_intervals(): |
521 |
| - for interval in annotation.intervals: |
522 |
| - if interval.has_mods(): |
523 |
| - for interval_mod in interval.mods: |
524 |
| - for k, v in mod_comp(interval_mod).items(): |
525 |
| - mod_composition[k] = mod_composition.get(k, 0) + v |
526 |
| - |
527 |
| - if annotation.has_labile_mods() and ion_type == "p": |
528 |
| - for labile_mod in annotation.labile_mods: |
529 |
| - for k, v in mod_comp(labile_mod).items(): |
530 |
| - mod_composition[k] = mod_composition.get(k, 0) + v |
531 |
| - |
532 |
| - if annotation.has_nterm_mods(): |
533 |
| - for nterm_mod in annotation.nterm_mods: |
534 |
| - for k, v in mod_comp(nterm_mod).items(): |
535 |
| - mod_composition[k] = mod_composition.get(k, 0) + v |
536 |
| - |
537 |
| - if annotation.has_cterm_mods(): |
538 |
| - for cterm_mod in annotation.cterm_mods: |
539 |
| - for k, v in mod_comp(cterm_mod).items(): |
540 |
| - mod_composition[k] = mod_composition.get(k, 0) + v |
541 |
| - |
542 |
| - if annotation.has_internal_mods(): |
543 |
| - for _, internal_mods in annotation.internal_mods.items(): |
544 |
| - for internal_mod in internal_mods: |
545 |
| - for k, v in mod_comp(internal_mod).items(): |
546 |
| - mod_composition[k] = mod_composition.get(k, 0) + v |
547 |
| - |
548 |
| - if annotation.has_static_mods(): |
549 |
| - static_map = parse_static_mods(annotation.static_mods) |
550 |
| - |
551 |
| - n_term_mod = static_map.get("N-Term") |
552 |
| - if n_term_mod is not None: |
553 |
| - for m in n_term_mod: |
554 |
| - for k, v in mod_comp(m.val).items(): |
555 |
| - mod_composition[k] = mod_composition.get(k, 0) + v |
556 |
| - |
557 |
| - c_term_mod = static_map.get("C-Term") |
558 |
| - if c_term_mod is not None: |
559 |
| - for m in c_term_mod: |
560 |
| - for k, v in mod_comp(m.val).items(): |
561 |
| - mod_composition[k] = mod_composition.get(k, 0) + v |
562 |
| - |
563 |
| - for aa, mod in static_map.items(): |
564 |
| - if aa in ["N-Term", "C-Term"]: |
565 |
| - continue |
566 |
| - |
567 |
| - aa_count = annotation.sequence.count(aa) |
568 |
| - for m in mod: |
569 |
| - for k, v in mod_comp(m.val).items(): |
570 |
| - mod_composition[k] = mod_composition.get(k, 0) + v * aa_count |
571 |
| - |
572 |
| - mod_composition["n"] = mod_composition.get("n", 0) + isotope |
573 |
| - |
574 |
| - # Apply isotopic mods |
575 |
| - if annotation.has_isotope_mods(): |
576 |
| - if use_isotope_on_mods: |
577 |
| - sequence_composition = apply_isotope_mods_to_composition( |
578 |
| - sequence_composition, annotation.isotope_mods |
579 |
| - ) |
580 |
| - mod_composition = apply_isotope_mods_to_composition( |
581 |
| - mod_composition, annotation.isotope_mods |
582 |
| - ) |
583 |
| - else: |
584 |
| - sequence_composition = apply_isotope_mods_to_composition( |
585 |
| - sequence_composition, annotation.isotope_mods |
586 |
| - ) |
587 |
| - |
588 |
| - composition = {} |
589 |
| - for k, v in sequence_composition.items(): |
590 |
| - composition[k] = composition.get(k, 0) + v |
591 |
| - |
592 |
| - for k, v in mod_composition.items(): |
593 |
| - composition[k] = composition.get(k, 0) + v |
594 |
| - |
595 |
| - composition = {k: v for k, v in composition.items() if v != 0} |
596 |
| - |
597 |
| - return composition |
598 |
| - |
599 |
| - |
600 | 362 | def _parse_mod_delta_mass_only(mod: Union[str, Mod]) -> Union[float, None]:
|
601 | 363 | """
|
602 | 364 | Parse a modification string.
|
|
0 commit comments