From 36ddc497a3b00cdc69efcc2e3b1e0ab63a6ec02c Mon Sep 17 00:00:00 2001 From: Joe Wandy Date: Tue, 27 Jan 2026 16:13:51 +0000 Subject: [PATCH 01/10] Improve isotope/adduct controls and deisotoping --- tests/test_deisotoping.py | 61 ++++++++++ vimms/Chemicals.py | 227 +++++++++++++++++++++++++------------- vimms/Common.py | 70 +++++++++++- vimms/Deisotoping.py | 162 +++++++++++++++++++++++++++ 4 files changed, 438 insertions(+), 82 deletions(-) create mode 100644 tests/test_deisotoping.py create mode 100644 vimms/Deisotoping.py diff --git a/tests/test_deisotoping.py b/tests/test_deisotoping.py new file mode 100644 index 0000000..b5415b2 --- /dev/null +++ b/tests/test_deisotoping.py @@ -0,0 +1,61 @@ +# test deisotoping and isotope generation + +import numpy as np + +from vimms.Chemicals import Isotopes, Adducts +from vimms.Common import Formula, ADDUCT_TERMS, POSITIVE, PROTON_MASS +from vimms.Deisotoping import Deisotoper +from vimms.MassSpecUtils import adduct_transformation + + +def test_isotope_distribution_multi_element(): + formula = Formula("C10H16N2O2S") + isotopes = Isotopes(formula) + peaks = isotopes.get_isotopes(total_proportion=0.99) + + proportions = [peak[1] for peak in peaks] + mzs = [peak[0] for peak in peaks] + + assert len(peaks) > 1 + assert np.isclose(sum(proportions), 1.0, atol=1e-6) + assert all(mzs[i] < mzs[i + 1] for i in range(len(mzs) - 1)) + + +def test_isotope_distribution_chlorine_m2_peak(): + formula = Formula("C5H10Cl2") + isotopes = Isotopes(formula) + peaks = isotopes.get_isotopes(total_proportion=0.99) + + mono_mz = peaks[0][0] + deltas = [mz - mono_mz for mz, _, _ in peaks[1:]] + + assert any(np.isclose(delta, 1.997, atol=0.01) for delta in deltas) + + +def test_deisotoper_recovers_mono(): + formula = Formula("C10H16N2O2S") + isotopes = Isotopes(formula) + adducts = Adducts(formula, adduct_prior_dict={POSITIVE: {"M+H": 1.0}}) + adduct_name = adducts.get_adducts()[POSITIVE][0][0] + mul, add = ADDUCT_TERMS[adduct_name] + + peaks = [] + for mz, proportion, _ in isotopes.get_isotopes(total_proportion=0.99): + adducted_mz = adduct_transformation(mz, mul, add) + peaks.append((adducted_mz, proportion * 1e5)) + + deisotoper = Deisotoper(ppm_tolerance=10.0, max_charge=1, min_isotopes=2) + clusters = deisotoper.deisotope(peaks) + + assert len(clusters) == 1 + expected_mz = formula.mass + PROTON_MASS + assert np.isclose(clusters[0].monoisotopic_mz, expected_mz, atol=1e-3) + + +def test_deisotoper_handles_m_plus_2_only(): + peaks = [(100.0, 1e5), (101.997, 6e4)] + deisotoper = Deisotoper(ppm_tolerance=10.0, max_charge=1, min_isotopes=2) + clusters = deisotoper.deisotope(peaks) + + assert len(clusters) == 1 + assert np.isclose(clusters[0].monoisotopic_mz, 100.0, atol=1e-6) diff --git a/vimms/Chemicals.py b/vimms/Chemicals.py index 3bd626b..d167513 100644 --- a/vimms/Chemicals.py +++ b/vimms/Chemicals.py @@ -10,8 +10,6 @@ from collections import deque import numpy as np -import scipy -import scipy.stats from loguru import logger from vimms.ChemicalSamplers import ( @@ -26,14 +24,14 @@ PROTON_MASS, POSITIVE, NEGATIVE, - C12_PROPORTION, C13_MZ_DIFF, - C, MONO, - C13, load_obj, ADDUCT_NAMES_POS, ADDUCT_NAMES_NEG, + ADDUCT_PRIOR_POS, + ADDUCT_PRIOR_NEG, + NATURAL_ISOTOPES, ) from vimms.Noise import GaussianPeakNoise from vimms.Roi import make_roi, RoiBuilderParams @@ -70,15 +68,21 @@ class Isotopes: A class to represent an isotope of a chemical """ - def __init__(self, formula): + def __init__(self, formula, min_prob=1e-12, max_peaks=20, max_states=4000, mass_precision=8): """ Create an Isotope object Args: formula: the formula for the given isotope """ self.formula = formula + self.min_prob = min_prob + self.max_peaks = max_peaks + self.max_states = max_states + self.mass_precision = mass_precision - def get_isotopes(self, total_proportion): + def get_isotopes( + self, total_proportion, min_prob=None, max_peaks=None, max_states=None, mass_precision=None + ): """ Gets the isotope total proportion @@ -87,68 +91,107 @@ def get_isotopes(self, total_proportion): Returns: the computed isotope total proportion - TODO: Add functionality for elements other than Carbon """ - peaks = [() for i in range(len(self._get_isotope_proportions(total_proportion)))] - for i in range(len(peaks)): - peaks[i] += (self._get_isotope_mz(self._get_isotope_names(i)),) - peaks[i] += (self._get_isotope_proportions(total_proportion)[i],) - peaks[i] += (self._get_isotope_names(i),) + peaks = [] + distributions = self._get_isotope_distribution( + total_proportion=total_proportion, + min_prob=self.min_prob if min_prob is None else min_prob, + max_peaks=self.max_peaks if max_peaks is None else max_peaks, + max_states=self.max_states if max_states is None else max_states, + mass_precision=self.mass_precision if mass_precision is None else mass_precision, + ) + base_mz = self.formula._get_mz() + for idx, (mass_shift, proportion) in enumerate(distributions): + name = MONO if idx == 0 else f"M+{idx}" + peaks.append((base_mz + mass_shift, proportion, name)) return peaks - def _get_isotope_proportions(self, total_proportion): - """ - Get isotope proportion by sampling from a binomial pmf - - Args: - total_proportion: the total proportion to compute - - Returns: the computed isotope total proportion - - """ - proportions = [] - while sum(proportions) < total_proportion: - proportions.extend( - [ - scipy.stats.binom.pmf( - len(proportions), self.formula._get_n_element(C), 1 - C12_PROPORTION - ) - ] + def _get_isotope_distribution( + self, total_proportion, min_prob=1e-12, max_peaks=20, max_states=4000, mass_precision=8 + ): + distribution = [(0.0, 1.0)] + for element, count in self.formula.atoms.items(): + if count <= 0: + continue + isotopes = NATURAL_ISOTOPES.get(element) + if not isotopes or len(isotopes) == 1: + continue + mono_mass = isotopes[0][0] + base_distribution = [(mass - mono_mass, abundance) for mass, abundance in isotopes] + element_distribution = self._power_distribution( + base_distribution, + count, + min_prob=min_prob, + max_states=max_states, + mass_precision=mass_precision, + ) + distribution = self._convolve_distributions( + distribution, + element_distribution, + min_prob=min_prob, + max_states=max_states, + mass_precision=mass_precision, ) - normalised_proportions = [ - proportions[i] / sum(proportions) for i in range(len(proportions)) - ] - return normalised_proportions - - def _get_isotope_names(self, isotope_number): - """ - Get the isotope name given the number, e.g. 0 is the monoisotope - Args: - isotope_number: the isotope number - - Returns: the isotope name - - """ - if isotope_number == 0: - return MONO - else: - return str(isotope_number) + C13 - - def _get_isotope_mz(self, isotope): - """ - Get the isotope m/z value - Args: - isotope: the isotope name - - Returns: the isotope m/z value - """ - if isotope == MONO: - return self.formula._get_mz() - elif isotope[-3:] == C13: - return self.formula._get_mz() + float(isotope.split(C13)[0]) * C13_MZ_DIFF - else: - return None + distribution = [(shift, prob) for shift, prob in distribution if prob >= min_prob] + distribution.sort(key=lambda x: x[0]) + + selected = [] + cumulative = 0.0 + for mass_shift, prob in distribution: + selected.append((mass_shift, prob)) + cumulative += prob + if cumulative >= total_proportion or len(selected) >= max_peaks: + break + + total = sum(prob for _, prob in selected) + if total == 0: + return [(0.0, 1.0)] + return [(shift, prob / total) for shift, prob in selected] + + def _power_distribution(self, base_distribution, count, min_prob, max_states, mass_precision): + if count == 1: + return base_distribution + result = [(0.0, 1.0)] + power = base_distribution + remaining = count + while remaining > 0: + if remaining % 2 == 1: + result = self._convolve_distributions( + result, + power, + min_prob=min_prob, + max_states=max_states, + mass_precision=mass_precision, + ) + remaining //= 2 + if remaining: + power = self._convolve_distributions( + power, + power, + min_prob=min_prob, + max_states=max_states, + mass_precision=mass_precision, + ) + return result + + def _convolve_distributions(self, left, right, min_prob, max_states, mass_precision): + new_distribution = {} + for left_shift, left_prob in left: + for right_shift, right_prob in right: + prob = left_prob * right_prob + if prob < min_prob: + continue + shift = left_shift + right_shift + key = round(shift, mass_precision) + new_distribution[key] = new_distribution.get(key, 0.0) + prob + if not new_distribution: + return [] + distribution = list(new_distribution.items()) + if len(distribution) > max_states: + distribution.sort(key=lambda x: x[1], reverse=True) + distribution = distribution[:max_states] + return distribution class Adducts: @@ -156,24 +199,43 @@ class Adducts: A class to represent an adduct of a chemical """ - def __init__(self, formula, adduct_proportion_cutoff=0.05, adduct_prior_dict=None): + def __init__( + self, + formula, + adduct_proportion_cutoff=0.05, + adduct_prior_dict=None, + adduct_profile=None, + adduct_concentration=15.0, + ): """ Create an Adduct class Args: formula: the formula of this adduct adduct_proportion_cutoff: proportion cut-off of the adduct - adduct_prior_dict: custom adduct dictionary, if any + adduct_prior_dict: custom adduct dictionary or callable, if any + adduct_profile: preset profile name or dict of adduct priors + adduct_concentration: dirichlet concentration for adduct sampling """ + if callable(adduct_prior_dict): + adduct_prior_dict = adduct_prior_dict(formula) + + if adduct_prior_dict is None and adduct_profile is not None: + from vimms.Common import ADDUCT_PROFILE_PRESETS + + if isinstance(adduct_profile, str): + adduct_prior_dict = ADDUCT_PROFILE_PRESETS.get(adduct_profile) + if adduct_prior_dict is None: + raise ValueError(f"Unknown adduct profile '{adduct_profile}'") + else: + adduct_prior_dict = adduct_profile + if adduct_prior_dict is None: self.adduct_names = {POSITIVE: ADDUCT_NAMES_POS, NEGATIVE: ADDUCT_NAMES_NEG} self.adduct_prior = { - POSITIVE: np.ones(len(self.adduct_names[POSITIVE])) * 0.1, - NEGATIVE: np.ones(len(self.adduct_names[NEGATIVE])) * 0.1, + POSITIVE: np.array([ADDUCT_PRIOR_POS.get(name, 0.05) for name in ADDUCT_NAMES_POS]), + NEGATIVE: np.array([ADDUCT_PRIOR_NEG.get(name, 0.05) for name in ADDUCT_NAMES_NEG]), } - # give more weight to the first one, i.e. M+H - self.adduct_prior[POSITIVE][0] = 1.0 - self.adduct_prior[NEGATIVE][0] = 1.0 else: assert POSITIVE in adduct_prior_dict or NEGATIVE in adduct_prior_dict self.adduct_names = {k: list(adduct_prior_dict[k].keys()) for k in adduct_prior_dict} @@ -182,6 +244,7 @@ def __init__(self, formula, adduct_proportion_cutoff=0.05, adduct_prior_dict=Non } self.formula = formula self.adduct_proportion_cutoff = adduct_proportion_cutoff + self.adduct_concentration = adduct_concentration def get_adducts(self): """ @@ -204,15 +267,17 @@ def _get_adduct_proportions(self): Returns: adduct proportion after sampling """ - # TODO: replace this with something proper proportions = {} for k in self.adduct_prior: - proportions[k] = np.random.dirichlet(self.adduct_prior[k]) - while max(proportions[k]) < 0.2: - proportions[k] = np.random.dirichlet(self.adduct_prior[k]) + alpha = self.adduct_prior[k] * self.adduct_concentration + alpha = np.where(alpha > 0, alpha, 0.001) + proportions[k] = np.random.dirichlet(alpha) proportions[k][np.where(proportions[k] < self.adduct_proportion_cutoff)] = 0 - proportions[k] = proportions[k] / max(proportions[k]) - proportions[k].tolist() + if proportions[k].sum() == 0: + proportions[k] = np.zeros_like(proportions[k]) + proportions[k][np.argmax(alpha)] = 1.0 + else: + proportions[k] = proportions[k] / proportions[k].sum() assert len(proportions[k]) == len(self.adduct_names[k]) return proportions @@ -625,6 +690,8 @@ def __init__( ms2_sampler=UniformMS2Sampler(), adduct_proportion_cutoff=0.05, adduct_prior_dict=None, + adduct_profile=None, + adduct_concentration=15.0, ): """ Create a mixture of [vimms.Chemicals.KnownChemical][] objects. @@ -642,6 +709,8 @@ def __init__( fragmentation spectra. adduct_proportion_cutoff: proportion of adduct cut-off adduct_prior_dict: custom adduct dictionary + adduct_profile: preset name or dict of adduct priors + adduct_concentration: dirichlet concentration for adduct sampling """ self.formula_sampler = formula_sampler self.rt_and_intensity_sampler = rt_and_intensity_sampler @@ -649,6 +718,8 @@ def __init__( self.ms2_sampler = ms2_sampler self.adduct_proportion_cutoff = adduct_proportion_cutoff self.adduct_prior_dict = adduct_prior_dict + self.adduct_profile = adduct_profile + self.adduct_concentration = adduct_concentration # if self.database is not None: # logger.debug('Sorting database compounds by masses') @@ -691,6 +762,8 @@ def sample(self, n_chemicals, ms_levels, include_adducts_isotopes=True): formula, self.adduct_proportion_cutoff, adduct_prior_dict=self.adduct_prior_dict, + adduct_profile=self.adduct_profile, + adduct_concentration=self.adduct_concentration, ) chemicals.append( diff --git a/vimms/Common.py b/vimms/Common.py index 322189c..56a3f09 100644 --- a/vimms/Common.py +++ b/vimms/Common.py @@ -79,24 +79,25 @@ ADDUCT_NAMES_POS = [ "M+H", - "[M+ACN]+H", - "[M+CH3OH]+H", + "M+NH4", "[M+NH3]+H", "M+Na", "M+K", - "M+2Na-H", "M+ACN+Na", + "M+2Na-H", "M+2K+H", + "[M+ACN]+H", + "[M+CH3OH]+H", "[M+DMSO]+H", "[M+2ACN]+H", "2M+H", - "M+ACN+Na", "2M+NH4", ] -ADDUCT_NAMES_NEG = ["M-H"] +ADDUCT_NAMES_NEG = ["M-H", "M+Cl", "M+FA-H", "M+Ac-H"] ADDUCT_TERMS = { "M+H": (1, PROTON_MASS), + "M+NH4": (1, 18.033823), "[M+ACN]+H": (1, 42.033823), "[M+CH3OH]+H": (1, 33.033489), "[M+NH3]+H": (1, 18.033823), @@ -110,12 +111,71 @@ "2M+H": (2, 1.007276), "2M+NH4": (2, 18), "M-H": (1, -PROTON_MASS), + "M+Cl": (1, 34.96885268), + "M+FA-H": (1, 44.998201), + "M+Ac-H": (1, 59.013851), } # example prior dictionary to be passed when creating an # adducts object to only get M+H adducts out ADDUCT_DICT_POS_MH = {POSITIVE: {"M+H": 1.0}} +ADDUCT_PRIOR_POS = { + "M+H": 1.0, + "M+NH4": 0.3, + "[M+NH3]+H": 0.15, + "M+Na": 0.25, + "M+K": 0.15, + "M+ACN+Na": 0.05, + "M+2Na-H": 0.03, + "M+2K+H": 0.02, + "[M+ACN]+H": 0.08, + "[M+CH3OH]+H": 0.05, + "[M+DMSO]+H": 0.03, + "[M+2ACN]+H": 0.02, + "2M+H": 0.04, + "2M+NH4": 0.02, +} +ADDUCT_PRIOR_NEG = { + "M-H": 1.0, + "M+Cl": 0.2, + "M+FA-H": 0.35, + "M+Ac-H": 0.15, +} + +ADDUCT_PROFILE_PRESETS = { + "default": {POSITIVE: ADDUCT_PRIOR_POS, NEGATIVE: ADDUCT_PRIOR_NEG}, + "esi_positive": {POSITIVE: ADDUCT_PRIOR_POS}, + "esi_negative": {NEGATIVE: ADDUCT_PRIOR_NEG}, +} + +NATURAL_ISOTOPES = { + "C": [(12.0, 0.9893), (13.0033548378, 0.0107)], + "H": [(1.00782503214, 0.999885), (2.01410177812, 0.000115)], + "N": [(14.00307400524, 0.99636), (15.00010889888, 0.00364)], + "O": [ + (15.9949146221, 0.99757), + (16.9991317565, 0.00038), + (17.9991596129, 0.00205), + ], + "S": [ + (31.97207069, 0.9499), + (32.9714585, 0.0075), + (33.96786683, 0.0425), + (35.96708088, 0.0001), + ], + "Cl": [(34.96885268, 0.7576), (36.96590259, 0.2424)], + "Br": [(78.9183376, 0.5069), (80.9162906, 0.4931)], + "Si": [ + (27.9769265327, 0.92223), + (28.976494700, 0.04685), + (29.97377017, 0.03092), + ], + "P": [(30.973761512, 1.0)], + "F": [(18.998403205, 1.0)], + "I": [(126.904468, 1.0)], +} + ATOM_NAMES = ["C", "H", "N", "O", "P", "S", "Cl", "I", "Br", "Si", "F", "D"] ATOM_MASSES = { "C": 12.00000000000, diff --git a/vimms/Deisotoping.py b/vimms/Deisotoping.py new file mode 100644 index 0000000..516c630 --- /dev/null +++ b/vimms/Deisotoping.py @@ -0,0 +1,162 @@ +from dataclasses import dataclass +from typing import Iterable, List, Tuple + +import numpy as np + +from vimms.Common import C13_MZ_DIFF, NATURAL_ISOTOPES + + +@dataclass(frozen=True) +class IsotopeCluster: + monoisotopic_mz: float + charge: int + peak_indices: Tuple[int, ...] + + +class Deisotoper: + def __init__( + self, + ppm_tolerance=10.0, + max_charge=3, + min_isotopes=2, + isotope_mass_diffs=None, + max_relative_intensity_increase=1.5, + max_relative_intensity_increase_heavy=3.0, + heavy_isotope_threshold=1.5, + ): + self.ppm_tolerance = ppm_tolerance + self.max_charge = max_charge + self.min_isotopes = min_isotopes + self.isotope_mass_diffs = ( + tuple(isotope_mass_diffs) + if isotope_mass_diffs is not None + else self._default_isotope_mass_diffs() + ) + self.max_relative_intensity_increase = max_relative_intensity_increase + self.max_relative_intensity_increase_heavy = max_relative_intensity_increase_heavy + self.heavy_isotope_threshold = heavy_isotope_threshold + + def deisotope(self, peaks: Iterable[Tuple[float, float]]) -> List[IsotopeCluster]: + peaks = np.array(list(peaks), dtype=float) + if peaks.size == 0: + return [] + + mzs = peaks[:, 0] + intensities = peaks[:, 1] + order = np.argsort(mzs) + mzs = mzs[order] + intensities = intensities[order] + + assigned = np.full(len(mzs), False) + clusters = [] + + for idx in range(len(mzs)): + if assigned[idx]: + continue + mz = mzs[idx] + charge = self._guess_charge(mzs, mz, idx) + cluster_indices = self._grow_cluster(mzs, intensities, idx, charge) + + if len(cluster_indices) >= self.min_isotopes: + for ci in cluster_indices: + assigned[ci] = True + clusters.append( + IsotopeCluster( + monoisotopic_mz=mz, + charge=charge, + peak_indices=tuple(order[cluster_indices]), + ) + ) + + return clusters + + def _guess_charge(self, mzs: np.ndarray, mz: float, idx: int) -> int: + best_charge = 1 + best_match = None + for charge in range(1, self.max_charge + 1): + match = self._find_isotope_peak(mzs, idx + 1, mz, charge, isotope_idx=1) + if match is None: + continue + _, _, delta = match + if best_match is None or delta < best_match: + best_match = delta + best_charge = charge + return best_charge + + def _grow_cluster( + self, mzs: np.ndarray, intensities: np.ndarray, start_idx: int, charge: int + ) -> List[int]: + cluster_indices = [start_idx] + isotope_idx = 1 + prev_intensity = intensities[start_idx] + while True: + match = self._find_isotope_peak( + mzs, cluster_indices[-1] + 1, mzs[start_idx], charge, isotope_idx + ) + if match is None: + break + match_idx, match_diff, _ = match + max_increase = self.max_relative_intensity_increase + if match_diff >= self.heavy_isotope_threshold: + max_increase = self.max_relative_intensity_increase_heavy + if intensities[match_idx] > prev_intensity * max_increase: + break + cluster_indices.append(match_idx) + prev_intensity = intensities[match_idx] + isotope_idx += 1 + return cluster_indices + + def _find_isotope_peak( + self, mzs: np.ndarray, start_idx: int, base_mz: float, charge: int, isotope_idx: int + ) -> Tuple[int, float, float] | None: + best = None + for diff in self.isotope_mass_diffs: + target = base_mz + (diff / charge) * isotope_idx + match_idx = self._find_peak(mzs, target, start_idx) + if match_idx is None: + continue + delta = abs(mzs[match_idx] - target) + if best is None or delta < best[2]: + best = (match_idx, diff, delta) + return best + + def _find_peak(self, mzs: np.ndarray, target: float, start_idx: int) -> int | None: + if start_idx >= len(mzs): + return None + left = start_idx + right = len(mzs) - 1 + while left <= right: + mid = (left + right) // 2 + if mzs[mid] < target: + left = mid + 1 + else: + right = mid - 1 + candidates = [] + for idx in (left - 1, left, left + 1): + if 0 <= idx < len(mzs): + candidates.append(idx) + for idx in candidates: + if self._ppm_error(mzs[idx], target) <= self.ppm_tolerance: + return idx + return None + + @staticmethod + def _ppm_error(mz: float, target: float) -> float: + return abs(mz - target) / target * 1e6 + + @staticmethod + def _default_isotope_mass_diffs( + min_abundance: float = 0.0005, max_shift: float = 4.0 + ) -> Tuple[float, ...]: + diffs = {round(C13_MZ_DIFF, 6)} + for isotopes in NATURAL_ISOTOPES.values(): + if len(isotopes) <= 1: + continue + mono_mass = isotopes[0][0] + for mass, abundance in isotopes[1:]: + if abundance < min_abundance: + continue + diff = mass - mono_mass + if 0 < diff <= max_shift: + diffs.add(round(diff, 6)) + return tuple(sorted(diffs)) From 15be0cbaf23b91c6ccbf3acb09c76a00683a8e54 Mon Sep 17 00:00:00 2001 From: Joe Wandy Date: Wed, 28 Jan 2026 17:00:29 +0000 Subject: [PATCH 02/10] Fix deisotoping fine-structure matching --- vimms/Deisotoping.py | 82 ++++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 25 deletions(-) diff --git a/vimms/Deisotoping.py b/vimms/Deisotoping.py index 516c630..e73893d 100644 --- a/vimms/Deisotoping.py +++ b/vimms/Deisotoping.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from collections import deque from typing import Iterable, List, Tuple import numpy as np @@ -71,6 +72,21 @@ def deisotope(self, peaks: Iterable[Tuple[float, float]]) -> List[IsotopeCluster return clusters def _guess_charge(self, mzs: np.ndarray, mz: float, idx: int) -> int: + # Prefer charge assignments that match the 13C spacing. + best_charge = None + best_error = None + for charge in range(1, self.max_charge + 1): + target = mz + C13_MZ_DIFF / charge + match_idx = self._find_peak(mzs, target, idx + 1) + if match_idx is None: + continue + error = self._ppm_error(mzs[match_idx], target) + if best_error is None or error < best_error: + best_error = error + best_charge = charge + if best_charge is not None: + return best_charge + best_charge = 1 best_match = None for charge in range(1, self.max_charge + 1): @@ -86,25 +102,33 @@ def _guess_charge(self, mzs: np.ndarray, mz: float, idx: int) -> int: def _grow_cluster( self, mzs: np.ndarray, intensities: np.ndarray, start_idx: int, charge: int ) -> List[int]: - cluster_indices = [start_idx] - isotope_idx = 1 - prev_intensity = intensities[start_idx] - while True: - match = self._find_isotope_peak( - mzs, cluster_indices[-1] + 1, mzs[start_idx], charge, isotope_idx - ) - if match is None: - break - match_idx, match_diff, _ = match - max_increase = self.max_relative_intensity_increase - if match_diff >= self.heavy_isotope_threshold: - max_increase = self.max_relative_intensity_increase_heavy - if intensities[match_idx] > prev_intensity * max_increase: - break - cluster_indices.append(match_idx) - prev_intensity = intensities[match_idx] - isotope_idx += 1 - return cluster_indices + # Build a connected component of peaks linked by any single-isotope mass + # difference. This avoids splitting fine-structure isotope patterns into + # multiple clusters. + cluster = {start_idx} + queue = deque([start_idx]) + + while queue: + current_idx = queue.popleft() + current_mz = mzs[current_idx] + current_intensity = intensities[current_idx] + + for diff in self.isotope_mass_diffs: + target = current_mz + diff / charge + match_idx = self._find_peak(mzs, target, current_idx + 1) + if match_idx is None or match_idx in cluster: + continue + + max_increase = self.max_relative_intensity_increase + if diff >= self.heavy_isotope_threshold: + max_increase = self.max_relative_intensity_increase_heavy + if intensities[match_idx] > current_intensity * max_increase: + continue + + cluster.add(match_idx) + queue.append(match_idx) + + return sorted(cluster) def _find_isotope_peak( self, mzs: np.ndarray, start_idx: int, base_mz: float, charge: int, isotope_idx: int @@ -132,13 +156,21 @@ def _find_peak(self, mzs: np.ndarray, target: float, start_idx: int) -> int | No else: right = mid - 1 candidates = [] - for idx in (left - 1, left, left + 1): - if 0 <= idx < len(mzs): + for idx in (left - 2, left - 1, left, left + 1, left + 2): + if start_idx <= idx < len(mzs): candidates.append(idx) + + best_idx = None + best_error = None for idx in candidates: - if self._ppm_error(mzs[idx], target) <= self.ppm_tolerance: - return idx - return None + error = self._ppm_error(mzs[idx], target) + if error > self.ppm_tolerance: + continue + if best_error is None or error < best_error: + best_error = error + best_idx = idx + + return best_idx @staticmethod def _ppm_error(mz: float, target: float) -> float: @@ -146,7 +178,7 @@ def _ppm_error(mz: float, target: float) -> float: @staticmethod def _default_isotope_mass_diffs( - min_abundance: float = 0.0005, max_shift: float = 4.0 + min_abundance: float = 0.0001, max_shift: float = 4.0 ) -> Tuple[float, ...]: diffs = {round(C13_MZ_DIFF, 6)} for isotopes in NATURAL_ISOTOPES.values(): From f3f4955a2ee67a7b77e841251f7e5821441e46a4 Mon Sep 17 00:00:00 2001 From: Joe Wandy Date: Wed, 28 Jan 2026 17:52:58 +0000 Subject: [PATCH 03/10] Avoid duplicate NH4 adduct in defaults --- vimms/Common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vimms/Common.py b/vimms/Common.py index 56a3f09..f20f123 100644 --- a/vimms/Common.py +++ b/vimms/Common.py @@ -80,7 +80,6 @@ ADDUCT_NAMES_POS = [ "M+H", "M+NH4", - "[M+NH3]+H", "M+Na", "M+K", "M+ACN+Na", @@ -123,7 +122,6 @@ ADDUCT_PRIOR_POS = { "M+H": 1.0, "M+NH4": 0.3, - "[M+NH3]+H": 0.15, "M+Na": 0.25, "M+K": 0.15, "M+ACN+Na": 0.05, From 0b95a4195fb757409f98c4ac427b7fb65b1279e8 Mon Sep 17 00:00:00 2001 From: Joe Wandy Date: Thu, 29 Jan 2026 11:15:09 +0000 Subject: [PATCH 04/10] Keep adduct scaling relative to dominant --- vimms/Chemicals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vimms/Chemicals.py b/vimms/Chemicals.py index d167513..6f66815 100644 --- a/vimms/Chemicals.py +++ b/vimms/Chemicals.py @@ -277,7 +277,7 @@ def _get_adduct_proportions(self): proportions[k] = np.zeros_like(proportions[k]) proportions[k][np.argmax(alpha)] = 1.0 else: - proportions[k] = proportions[k] / proportions[k].sum() + proportions[k] = proportions[k] / proportions[k].max() assert len(proportions[k]) == len(self.adduct_names[k]) return proportions From 828602f77626f53a262666267b5238231e28a225 Mon Sep 17 00:00:00 2001 From: Joe Wandy Date: Thu, 29 Jan 2026 11:28:56 +0000 Subject: [PATCH 05/10] Preserve monoisotope when pruning isotope distribution --- tests/test_deisotoping.py | 8 ++++++++ vimms/Chemicals.py | 16 +++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/test_deisotoping.py b/tests/test_deisotoping.py index b5415b2..444be7d 100644 --- a/tests/test_deisotoping.py +++ b/tests/test_deisotoping.py @@ -32,6 +32,14 @@ def test_isotope_distribution_chlorine_m2_peak(): assert any(np.isclose(delta, 1.997, atol=0.01) for delta in deltas) +def test_isotope_distribution_preserves_mono_when_filtered(): + formula = Formula("C500H1000") + isotopes = Isotopes(formula) + peaks = isotopes.get_isotopes(total_proportion=0.99, min_prob=0.01) + + assert np.isclose(peaks[0][0], formula.mass, atol=1e-6) + + def test_deisotoper_recovers_mono(): formula = Formula("C10H16N2O2S") isotopes = Isotopes(formula) diff --git a/vimms/Chemicals.py b/vimms/Chemicals.py index 6f66815..5c0ee6a 100644 --- a/vimms/Chemicals.py +++ b/vimms/Chemicals.py @@ -110,12 +110,14 @@ def _get_isotope_distribution( self, total_proportion, min_prob=1e-12, max_peaks=20, max_states=4000, mass_precision=8 ): distribution = [(0.0, 1.0)] + monoisotope_log_prob = 0.0 for element, count in self.formula.atoms.items(): if count <= 0: continue isotopes = NATURAL_ISOTOPES.get(element) if not isotopes or len(isotopes) == 1: continue + monoisotope_log_prob += count * np.log(isotopes[0][1]) mono_mass = isotopes[0][0] base_distribution = [(mass - mono_mass, abundance) for mass, abundance in isotopes] element_distribution = self._power_distribution( @@ -133,7 +135,19 @@ def _get_isotope_distribution( mass_precision=mass_precision, ) - distribution = [(shift, prob) for shift, prob in distribution if prob >= min_prob] + # Ensure the monoisotopic (zero-shift) peak is always present, even if + # truncation/pruning would otherwise drop it. + monoisotope_shift = round(0.0, mass_precision) + monoisotope_prob = float(np.exp(monoisotope_log_prob)) + distribution_dict = dict(distribution) + distribution_dict[monoisotope_shift] = monoisotope_prob + distribution = list(distribution_dict.items()) + + distribution = [ + (shift, prob) + for shift, prob in distribution + if shift == monoisotope_shift or prob >= min_prob + ] distribution.sort(key=lambda x: x[0]) selected = [] From dafc287a335e5d7ebdfa2ab95cf95c3fb989523d Mon Sep 17 00:00:00 2001 From: Joe Wandy Date: Sat, 7 Feb 2026 00:30:53 +0000 Subject: [PATCH 06/10] Add more pyopenms helper tests --- pyproject.toml | 2 + tests/test_deisotoping.py | 55 ++++++++++++++++++ vimms/Deisotoping.py | 119 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 892312c..ebddc10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,8 @@ pysmiles = "^2.0.0" numba = "^0.61.2" numba-stats = "^1.10.1" optuna = "^4.3.0" +ms-deisotope = "^0.0.60" +pyopenms = "^3.1.0" [tool.poetry.group.dev.dependencies] jupyterlab = "^4.4.3" diff --git a/tests/test_deisotoping.py b/tests/test_deisotoping.py index 444be7d..71a21cc 100644 --- a/tests/test_deisotoping.py +++ b/tests/test_deisotoping.py @@ -1,6 +1,7 @@ # test deisotoping and isotope generation import numpy as np +import pytest from vimms.Chemicals import Isotopes, Adducts from vimms.Common import Formula, ADDUCT_TERMS, POSITIVE, PROTON_MASS @@ -67,3 +68,57 @@ def test_deisotoper_handles_m_plus_2_only(): assert len(clusters) == 1 assert np.isclose(clusters[0].monoisotopic_mz, 100.0, atol=1e-6) + + +def test_pyopenms_deisotope_helper(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + peaks = [(100.0, 1e5), (101.003355, 5e4), (102.00671, 2e4)] + _, mzs, intensities = deisotope_with_pyopenms(peaks, min_isopeaks=2, max_isopeaks=3) + + assert len(mzs) <= len(peaks) + assert len(mzs) == len(intensities) + + +def test_pyopenms_deadduct_helper(): + pytest.importorskip("pyopenms") + import pyopenms as oms + + from vimms.Deisotoping import deadduct_with_pyopenms + + fmap = oms.FeatureMap() + feature = oms.Feature() + feature.setMZ(100.0) + feature.setIntensity(1e5) + fmap.push_back(feature) + + output = deadduct_with_pyopenms(fmap, adducts=["[M+H]+"]) + assert output.size() >= 1 + + +def test_pyopenms_deadduct_multiple_features(): + pytest.importorskip("pyopenms") + import pyopenms as oms + + from vimms.Deisotoping import deadduct_with_pyopenms + + fmap = oms.FeatureMap() + for mz, intensity in [(100.0, 1e5), (122.989218, 4e4)]: + feature = oms.Feature() + feature.setMZ(mz) + feature.setIntensity(intensity) + fmap.push_back(feature) + + output = deadduct_with_pyopenms(fmap, adducts=["[M+H]+", "[M+Na]+"]) + assert output.size() >= 1 + + +def test_pyopenms_deisotope_empty_peaks(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + spectrum, mzs, intensities = deisotope_with_pyopenms([]) + assert spectrum is None + assert mzs.size == 0 + assert intensities.size == 0 diff --git a/vimms/Deisotoping.py b/vimms/Deisotoping.py index e73893d..0853ea8 100644 --- a/vimms/Deisotoping.py +++ b/vimms/Deisotoping.py @@ -192,3 +192,122 @@ def _default_isotope_mass_diffs( if 0 < diff <= max_shift: diffs.add(round(diff, 6)) return tuple(sorted(diffs)) + + +def deisotope_with_ms_deisotope( + peaks: Iterable[Tuple[float, float]], + charge_range: Tuple[int, int] = (1, 3), + averagine: str = "peptide", + ms1_tolerance: float = 10.0, +): + """ + Deisotope peaks using the optional ms_deisotope package. + + Args: + peaks: iterable of (mz, intensity) pairs. + charge_range: inclusive min/max charge range. + averagine: averagine model name used by ms_deisotope. + ms1_tolerance: ppm tolerance for isotopic matching. + + Returns: + The ms_deisotope deconvolution result object. + """ + from ms_deisotope.deconvolution import deconvolute_peaks + + peaks_array = np.array(list(peaks), dtype=float) + if peaks_array.size == 0: + return [] + + return deconvolute_peaks( + peaks_array, + charge_range=charge_range, + averagine=averagine, + ms1_tolerance=ms1_tolerance, + ) + + +def deisotope_with_pyopenms( + peaks: Iterable[Tuple[float, float]], + fragment_tolerance: float = 10.0, + fragment_unit_ppm: bool = True, + min_charge: int = 1, + max_charge: int = 3, + keep_only_deisotoped: bool = True, + min_isopeaks: int = 2, + max_isopeaks: int = 10, + make_single_charged: bool = False, +): + """ + Deisotope peaks using pyopenms Deisotoper on an MS1-like spectrum. + + Args: + peaks: iterable of (mz, intensity) pairs. + fragment_tolerance: m/z tolerance for matching isotopic peaks. + fragment_unit_ppm: whether the tolerance is in ppm. + min_charge: minimum charge to consider. + max_charge: maximum charge to consider. + keep_only_deisotoped: keep only deisotoped peaks in the output. + min_isopeaks: minimum number of isotopic peaks in a cluster. + max_isopeaks: maximum number of isotopic peaks in a cluster. + make_single_charged: convert all features to single charge if True. + + Returns: + Tuple of (spectrum, peak_mzs, peak_intensities) after deisotoping. + """ + import pyopenms as oms + + peaks_array = np.array(list(peaks), dtype=float) + if peaks_array.size == 0: + return None, np.array([]), np.array([]) + + spectrum = oms.MSSpectrum() + spectrum.set_peaks((peaks_array[:, 0], peaks_array[:, 1])) + + oms.Deisotoper.deisotopeAndSingleCharge( + spectrum, + fragment_tolerance, + fragment_unit_ppm, + min_charge, + max_charge, + keep_only_deisotoped, + min_isopeaks, + max_isopeaks, + make_single_charged, + ) + + mzs, intensities = spectrum.get_peaks() + return spectrum, np.array(mzs), np.array(intensities) + + +def deadduct_with_pyopenms( + feature_map, + adducts: List[str] | None = None, + max_charge: int = 3, + ppm_tolerance: float = 10.0, +): + """ + De-adduct features using pyopenms MetaboliteAdductDecharger. + + Args: + feature_map: pyopenms FeatureMap containing detected features. + adducts: list of adduct strings (e.g. ["[M+H]+", "[M+Na]+"]) or None for defaults. + max_charge: maximum charge to consider. + ppm_tolerance: mass tolerance used for matching (ppm). + + Returns: + De-adducted pyopenms FeatureMap. + """ + import pyopenms as oms + + decharger = oms.MetaboliteAdductDecharger() + params = decharger.getParameters() + params.setValue("mass_error_ppm", ppm_tolerance) + params.setValue("charge_max", max_charge) + if adducts: + adduct_info = oms.AdductInfo() + adduct_info.setAdducts(adducts) + decharger.setAdducts(adduct_info) + decharger.setParameters(params) + output = oms.FeatureMap() + decharger.decharge(feature_map, output) + return output From 0d87851daa8ae2b19d316f936bd1c1cb547c61ca Mon Sep 17 00:00:00 2001 From: Joe Wandy Date: Sat, 7 Feb 2026 20:07:52 +0000 Subject: [PATCH 07/10] Improve OpenMS deadducting and e2e tests --- poetry.lock | 332 +++++++++++++++++++++++++++++++- tests/test_deisotoping.py | 384 +++++++++++++++++++++++++++++++++++++- vimms/Deisotoping.py | 218 ++++++++++++++++++++-- 3 files changed, 913 insertions(+), 21 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7daeb4a..7babbbf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "alembic" @@ -206,6 +206,131 @@ files = [ [package.extras] dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata ; sys_platform == \"win32\""] +[[package]] +name = "backports-zstd" +version = "1.3.0" +description = "Backport of compression.zstd" +optional = false +python-versions = "<3.14,>=3.9" +groups = ["main"] +files = [ + {file = "backports_zstd-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a2db17a6d9bf6b4dc223b3f6414aa9db6d1afe9de9bff61d582c2934ca456a0"}, + {file = "backports_zstd-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a7f16b98ba81780a9517ce6c493e1aea9b7d72de2b1efa08375136c270e1ecba"}, + {file = "backports_zstd-1.3.0-cp310-cp310-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:1124a169a647671ccb4654a0ef1d0b42d6735c45ce3d0adf609df22fb1f099db"}, + {file = "backports_zstd-1.3.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8410fda08b36202d01ab4503f6787c763898888cb1a48c19fce94711563d3ee3"}, + {file = "backports_zstd-1.3.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab139d1fc0e91a697e82fa834e6404098802f11b6035607174776173ded9a2cc"}, + {file = "backports_zstd-1.3.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6f3115d203f387f77c23b5461fb6678d282d4f276f9f39298ad242b00120afc7"}, + {file = "backports_zstd-1.3.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:116f65cce84e215dfac0414924b051faf8d29dc7188cf3944dd1e5be8dd15a32"}, + {file = "backports_zstd-1.3.0-cp310-cp310-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:04def169e4a9ae291298124da4e097c6d6545d0e93164f934b716da04d24630a"}, + {file = "backports_zstd-1.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:481b586291ef02a250f03d4c31a37c9881e5e93556568abbd20ca1ad720d443f"}, + {file = "backports_zstd-1.3.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0290979eea67f7275fa42d5859cc5bea94f2c08cca6bc36396673476773d2bad"}, + {file = "backports_zstd-1.3.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:01c699d8c803dc9f9c9d6ede21b75ec99f45c3b411821011692befca538928cb"}, + {file = "backports_zstd-1.3.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:2c662912cfc1a5ebd1d2162ac651549d58bd3c97a8096130ec13c703fca355f2"}, + {file = "backports_zstd-1.3.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:3180c8eb085396928e9946167e610aa625922b82c3e2263c5f17000556370168"}, + {file = "backports_zstd-1.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5b9a8c75a294e7ffa18fc8425a763facc366435a8b442e4dffdc19fa9499a22c"}, + {file = "backports_zstd-1.3.0-cp310-cp310-win32.whl", hash = "sha256:845defdb172385f17123d92a00d2e952d341e9ae310bfa2410c292bf03846034"}, + {file = "backports_zstd-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:43a9fea6299c801da85221e387b32d90a9ad7c62aa2a34edf525359ce5ad8f3a"}, + {file = "backports_zstd-1.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:df8473cb117e1316e6c6101f2724e025bd8f50af2dc009d0001c0aabfb5eb57c"}, + {file = "backports_zstd-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:249f90b39d3741c48620021a968b35f268ca70e35f555abeea9ff95a451f35f9"}, + {file = "backports_zstd-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b0e71e83e46154a9d3ced6d4de9a2fea8207ee1e4832aeecf364dc125eda305c"}, + {file = "backports_zstd-1.3.0-cp311-cp311-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:cbc6193acd21f96760c94dd71bf32b161223e8503f5277acb0a5ab54e5598957"}, + {file = "backports_zstd-1.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1df583adc0ae84a8d13d7139f42eade6d90182b1dd3e0d28f7df3c564b9fd55d"}, + {file = "backports_zstd-1.3.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d833fc23aa3cc2e05aeffc7cfadd87b796654ad3a7fb214555cda3f1db2d4dc2"}, + {file = "backports_zstd-1.3.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:142178fe981061f1d2a57c5348f2cd31a3b6397a35593e7a17dbda817b793a7f"}, + {file = "backports_zstd-1.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5eed0a09a163f3a8125a857cb031be87ed052e4a47bc75085ed7fca786e9bb5b"}, + {file = "backports_zstd-1.3.0-cp311-cp311-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:60aa483fef5843749e993dde01229e5eedebca8c283023d27d6bf6800d1d4ce3"}, + {file = "backports_zstd-1.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ea0886c1b619773544546e243ed73f6d6c2b1ae3c00c904ccc9903a352d731e1"}, + {file = "backports_zstd-1.3.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5e137657c830a5ce99be40a1d713eb1d246bae488ada28ff0666ac4387aebdd5"}, + {file = "backports_zstd-1.3.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:94048c8089755e482e4b34608029cf1142523a625873c272be2b1c9253871a72"}, + {file = "backports_zstd-1.3.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:d339c1ec40485e97e600eb9a285fb13169dbf44c5094b945788a62f38b96e533"}, + {file = "backports_zstd-1.3.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:8aeee9210c54cf8bf83f4d263a6d0d6e7a0298aeb5a14a0a95e90487c5c3157c"}, + {file = "backports_zstd-1.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ba7114a3099e5ea05cbb46568bd0e08bca2ca11e12c6a7b563a24b86b2b4a67f"}, + {file = "backports_zstd-1.3.0-cp311-cp311-win32.whl", hash = "sha256:08dfdfb85da5915383bfae680b6ac10ab5769ab22e690f9a854320720011ae8e"}, + {file = "backports_zstd-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:d8aac2e7cdcc8f310c16f98a0062b48d0a081dbb82862794f4f4f5bdafde30a4"}, + {file = "backports_zstd-1.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:440ef1be06e82dc0d69dbb57177f2ce98bbd2151013ee7e551e2f2b54caa6120"}, + {file = "backports_zstd-1.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f4a292e357f3046d18766ce06d990ccbab97411708d3acb934e63529c2ea7786"}, + {file = "backports_zstd-1.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fb4c386f38323698991b38edcc9c091d46d4713f5df02a3b5c80a28b40e289ea"}, + {file = "backports_zstd-1.3.0-cp312-cp312-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:f52523d2bdada29e653261abdc9cfcecd9e5500d305708b7e37caddb24909d4e"}, + {file = "backports_zstd-1.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3321d00beaacbd647252a7f581c1e1cdbdbda2407f2addce4bfb10e8e404b7c7"}, + {file = "backports_zstd-1.3.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:88f94d238ef36c639c0ae17cf41054ce103da9c4d399c6a778ce82690d9f4919"}, + {file = "backports_zstd-1.3.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:97d8c78fe20c7442c810adccfd5e3ea6a4e6f4f1fa4c73da2bc083260ebead17"}, + {file = "backports_zstd-1.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eefda80c3dbfbd924f1c317e7b0543d39304ee645583cb58bae29e19f42948ed"}, + {file = "backports_zstd-1.3.0-cp312-cp312-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:2ab5d3b5a54a674f4f6367bb9e0914063f22cd102323876135e9cc7a8f14f17e"}, + {file = "backports_zstd-1.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7558fb0e8c8197c59a5f80c56bf8f56c3690c45fd62f14e9e2081661556e3e64"}, + {file = "backports_zstd-1.3.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:27744870e38f017159b9c0241ea51562f94c7fefcfa4c5190fb3ec4a65a7fc63"}, + {file = "backports_zstd-1.3.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b099750755bb74c280827c7d68de621da0f245189082ab48ff91bda0ec2db9df"}, + {file = "backports_zstd-1.3.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5434e86f2836d453ae3e19a2711449683b7e21e107686838d12a255ad256ca99"}, + {file = "backports_zstd-1.3.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:407e451f64e2f357c9218f5be4e372bb6102d7ae88582d415262a9d0a4f9b625"}, + {file = "backports_zstd-1.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:58a071f3c198c781b2df801070290b7174e3ff61875454e9df93ab7ea9ea832b"}, + {file = "backports_zstd-1.3.0-cp312-cp312-win32.whl", hash = "sha256:21a9a542ccc7958ddb51ae6e46d8ed25d585b54d0d52aaa1c8da431ea158046a"}, + {file = "backports_zstd-1.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:89ea8281821123b071a06b30b80da8e4d8a2b40a4f57315a19850337a21297ac"}, + {file = "backports_zstd-1.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:f6843ecb181480e423b02f60fe29e393cbc31a95fb532acdf0d3a2c87bd50ce3"}, + {file = "backports_zstd-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e86e03e3661900955f01afed6c59cae9baa63574e3b66896d99b7de97eaffce9"}, + {file = "backports_zstd-1.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:41974dcacc9824c1effe1c8d2f9d762bcf47d265ca4581a3c63321c7b06c61f0"}, + {file = "backports_zstd-1.3.0-cp313-cp313-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:3090a97738d6ce9545d3ca5446df43370928092a962cbc0153e5445a947e98ed"}, + {file = "backports_zstd-1.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ddc874638abf03ea1ff3b0525b4a26a8d0adf7cb46a448c3449f08e4abc276b3"}, + {file = "backports_zstd-1.3.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:db609e57b8ed88b3472930c87e93c08a4bbd5ffeb94608cd9c7c6f0ac0e166c6"}, + {file = "backports_zstd-1.3.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5f13033a3dd95f323c067199f2e61b4589a7880188ef4ef356c7ffbdb78a9f11"}, + {file = "backports_zstd-1.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c4c7bcda5619a754726e7f5b391827f5efbe4bed8e62e9ec7490d42bff18aa6"}, + {file = "backports_zstd-1.3.0-cp313-cp313-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:884a94c40f27affe986f394f219a4fd3cbbd08e1cff2e028d29d467574cd266e"}, + {file = "backports_zstd-1.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:497f5765126f11a5b3fd8fedfdae0166d1dd867e7179b8148370a3313d047197"}, + {file = "backports_zstd-1.3.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a6ff6769948bb29bba07e1c2e8582d5a9765192a366108e42d6581a458475881"}, + {file = "backports_zstd-1.3.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:1623e5bff1acd9c8ef90d24fc548110f20df2d14432bfe5de59e76fc036824ef"}, + {file = "backports_zstd-1.3.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:622c28306dcc429c8f2057fc4421d5722b1f22968d299025b35d71b50cfd4e03"}, + {file = "backports_zstd-1.3.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09a2785e410ed2e812cb39b684ef5eb55083a5897bfd0e6f5de3bbd2c6345f70"}, + {file = "backports_zstd-1.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ade1f4127fdbe36a02f8067d75aa79c1ea1c8a306bf63c7b818bb7b530e1beaa"}, + {file = "backports_zstd-1.3.0-cp313-cp313-win32.whl", hash = "sha256:668e6fb1805b825cb7504c71436f7b28d4d792bb2663ee901ec9a2bb15804437"}, + {file = "backports_zstd-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:385bdadf0ea8fe6ba780a95e4c7d7f018db7bafdd630932f0f9f0fad05d608ff"}, + {file = "backports_zstd-1.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:4321a8a367537224b3559fe7aeb8012b98aea2a60a737e59e51d86e2e856fe0a"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:10057d66fa4f0a7d3f6419ffb84b4fe61088da572e3ac4446134a1c8089e4166"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4abf29d706ba05f658ca0247eb55675bcc00e10f12bca15736e45b05f1f2d2dc"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:127b0d73c745b0684da3d95c31c0939570810dad8967dfe8231eea8f0e047b2f"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0205ef809fb38bb5ca7f59fa03993596f918768b9378fb7fbd8a68889a6ce028"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1c389b667b0b07915781aa28beabf2481f11a6062a1a081873c4c443b98601a7"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8e7ac5ef693d49d6fb35cd7bbb98c4762cfea94a8bd2bf2ab112027004f70b11"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5d5543945aae2a76a850b23f283249424f535de6a622d6002957b7d971e6a36d"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e38be15ebce82737deda2c9410c1f942f1df9da74121049243a009810432db75"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3e3f58c76f4730607a4e0130d629173aa114ae72a5c8d3d5ad94e1bf51f18d8"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:b808bf889722d889b792f7894e19c1f904bb0e9092d8c0eb0787b939b08bad9a"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f7be27d56f2f715bcd252d0c65c232146d8e1e039c7e2835b8a3ad3dc88bc508"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:cbe341c7fcc723893663a37175ba859328b907a4e6d2d40a4c26629cc55efb67"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:b4116a9e12dfcd834dd9132cf6a94657bf0d328cba5b295f26de26ea0ae1adc8"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1049e804cc8754290b24dab383d4d6ed0b7f794ad8338813ddcb3907d15a89d0"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-win32.whl", hash = "sha256:7d3f0f2499d2049ec53d2674c605a4b3052c217cc7ee49c05258046411685adc"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eb2f8fab0b1ea05148394cb34a9e543a43477178765f2d6e7c84ed332e34935e"}, + {file = "backports_zstd-1.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:c66ad9eb5bfbe28c2387b7fc58ddcdecfb336d6e4e60bcba1694a906c1f21a6c"}, + {file = "backports_zstd-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cab7dc828e19d8871935f3061e0550713aacb230fc3a3919bed0440a1295c255"}, + {file = "backports_zstd-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef2a0bfb7aa590134ef43479cda439de054d5503b1be4756aca0afa9181cc3a5"}, + {file = "backports_zstd-1.3.0-cp39-cp39-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:78693e344544bceddc6f475873e2353b5990d74a836b4f1b8a182e1c55c8ae05"}, + {file = "backports_zstd-1.3.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9d75cca9bed9da91c6e8bfdd4807fc1af08c8b25716cfdc5d50c119071641cf"}, + {file = "backports_zstd-1.3.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c3d777a0cacca20fa8ea3a24178e7cae872fcec26cc84ebe3250b374f9127a21"}, + {file = "backports_zstd-1.3.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:82332651e737b16025397af59405a355e354254483fa93c585613d314c7ac199"}, + {file = "backports_zstd-1.3.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:59b52ad18326c0f9473906de3caf47ade68a063dcbe1663b0351638421fd5458"}, + {file = "backports_zstd-1.3.0-cp39-cp39-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:472f590cf3270d79dae699c9641db9400e794a7ebe8574da7edc3ca3abf342cc"}, + {file = "backports_zstd-1.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1f215062302f450ac61ff23991ee6619f07add6c20e1f4659bf9a500b37fc7c2"}, + {file = "backports_zstd-1.3.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:102392989442094f3cf1a4bf01fdd4db746d0e755341888998ffbbffdf76a207"}, + {file = "backports_zstd-1.3.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:88961d8c5760a4febeba78d2cdff2e380a05d18cbc2089d985684fc3d6b3b836"}, + {file = "backports_zstd-1.3.0-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:3ddebc1b6f8a37d63cdf18bf98854c62ff2710aeba7057cb5d2bda58c885bbd2"}, + {file = "backports_zstd-1.3.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:79efb1ddb7d22e3eabdee8ab9fb0020fce951dafcac787fdb7ec2d2cbc4f170a"}, + {file = "backports_zstd-1.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f6d7aa2caa38b9e0d68004f0618290a4e4b0eb26afc482bd5e5c5fba6e40fd94"}, + {file = "backports_zstd-1.3.0-cp39-cp39-win32.whl", hash = "sha256:975ba1c52200f8d01adf66ea4c353da8e0f967687406ac1bf1d9051a088242fe"}, + {file = "backports_zstd-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:f5fca92a20e6ef22702914237c4f99f50d5450941529100ef3f5351f5e1e9eb6"}, + {file = "backports_zstd-1.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:3895857d06ba58a2bea21019843bc53b0b4df1ce64b55a184c5fb6236b798947"}, + {file = "backports_zstd-1.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3ab0d5632b84eff4355c42a04668cfe6466f7d390890f718978582bd1ff36949"}, + {file = "backports_zstd-1.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6b97cea95dbb1a97c02afd718155fad93f747815069722107a429804c355e206"}, + {file = "backports_zstd-1.3.0-pp310-pypy310_pp73-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:477895f2642f9397aeba69618df2c91d7f336e02df83d1e623ac37c5d3a5115e"}, + {file = "backports_zstd-1.3.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:330172aaf5fd3bfa53f49318abc6d1d4238cb043c384cf71f7b8f0fe2fb7ce31"}, + {file = "backports_zstd-1.3.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:32974e71eff15897ed3f8b7766a753d9f3197ea4f1c9025d80f8de099a691b99"}, + {file = "backports_zstd-1.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:993e3a34eaba5928a2065545e34bf75c65b9c34ecb67e43d5ef49b16cc182077"}, + {file = "backports_zstd-1.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:968167d29f012cee7b112ad031a8925e484e97e99288e55e4d62962c3a1013e3"}, + {file = "backports_zstd-1.3.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8f6fc7d62b71083b574193dd8fb3a60e6bb34880cc0132aad242943af301f7a"}, + {file = "backports_zstd-1.3.0-pp311-pypy311_pp73-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:e0f2eca6aac280fdb77991ad3362487ee91a7fb064ad40043fb5a0bf5a376943"}, + {file = "backports_zstd-1.3.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:676eb5e177d4ef528cf3baaeea4fffe05f664e4dd985d3ac06960ef4619c81a9"}, + {file = "backports_zstd-1.3.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:199eb9bd8aca6a9d489c41a682fad22c587dffe57b613d0fe6d492d0d38ce7c5"}, + {file = "backports_zstd-1.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:2524bd6777a828d5e7ccd7bd1a57f9e7007ae654fc2bd1bc1a207f6428674e4a"}, + {file = "backports_zstd-1.3.0.tar.gz", hash = "sha256:e8b2d68e2812f5c9970cabc5e21da8b409b5ed04e79b4585dbffa33e9b45ebe2"}, +] + [[package]] name = "backrefs" version = "5.9" @@ -313,6 +438,32 @@ webencodings = "*" [package.extras] css = ["tinycss2 (>=1.1.0,<1.5)"] +[[package]] +name = "brain-isotopic-distribution" +version = "1.5.19" +description = "Fast and efficient theoretical isotopic profile generation" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "brain_isotopic_distribution-1.5.19-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9cb8a5009177eacb43c8323f88ecd0b58385db4293a38593ccfb6caf52fe2e56"}, + {file = "brain_isotopic_distribution-1.5.19-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd6675cdc9d4d815df8b1571c209d35dccb02fc95492e55ca3ab49825073dfdb"}, + {file = "brain_isotopic_distribution-1.5.19-cp310-cp310-win_amd64.whl", hash = "sha256:855c040f3ee967202e0c55c88c2bfca0050c7ca7b99c5001487be68155eac8c8"}, + {file = "brain_isotopic_distribution-1.5.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:417faed1d00ed8a973241f992895d728ede46bd8778e4bce7398fdb6b9467f5b"}, + {file = "brain_isotopic_distribution-1.5.19-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f4b9f6ac714e958ebf263276a484a8368778e3e1ade8fdf4a87e0576e1384cf"}, + {file = "brain_isotopic_distribution-1.5.19-cp311-cp311-win_amd64.whl", hash = "sha256:42b19145326fc15362a9ce943d69448b766b4c1f6cf046fa84cb40d428d9c698"}, + {file = "brain_isotopic_distribution-1.5.19-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7e59fe63c4a57f2db17b65d5aab98ae470ae7adeaef6a8c6c7dc52116b9d7925"}, + {file = "brain_isotopic_distribution-1.5.19-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0651e9490880c5d699f573277705b2ef0972d32a2b752ec3e49b768a7f75b597"}, + {file = "brain_isotopic_distribution-1.5.19-cp312-cp312-win_amd64.whl", hash = "sha256:5bca2d1dd1e7bdbe760456583cf158444686566f30025630fc685308af57cea6"}, + {file = "brain_isotopic_distribution-1.5.19-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c894611c3756226ca7bc962fbca8154dbe430501ed8c191d995ddb7d2d935994"}, + {file = "brain_isotopic_distribution-1.5.19-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13926c14efcc7eb13d7d8a61d30985f7eaa866b8d58dc847da724d48d560ca60"}, + {file = "brain_isotopic_distribution-1.5.19-cp38-cp38-win_amd64.whl", hash = "sha256:6005068203fda8438724d20c01cb1b717095211939f56ea1bac09753106ff1a9"}, + {file = "brain_isotopic_distribution-1.5.19-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fa751b98ef59981308d42ea40102dc8c3cddad038b7c93b791adbceddb9f081e"}, + {file = "brain_isotopic_distribution-1.5.19-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb622c81e8d5633bf334fe151bba54abf76889251ccb19e43660d5c3db3544af"}, + {file = "brain_isotopic_distribution-1.5.19-cp39-cp39-win_amd64.whl", hash = "sha256:a4bc7da61d21a8d3e7a5769665b1edbf4e20c7ed3a7da604d26c478e4296309a"}, + {file = "brain_isotopic_distribution-1.5.19.tar.gz", hash = "sha256:cad44fcb1ddbd26787dafb65345558df5119a7928b6e773fbb9dd5fd4f3334de"}, +] + [[package]] name = "certifi" version = "2025.4.26" @@ -817,6 +968,22 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "dill" +version = "0.4.1" +description = "serialize all of Python" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "dill-0.4.1-py3-none-any.whl", hash = "sha256:1e1ce33e978ae97fcfcff5638477032b801c46c7c65cf717f95fbc2248f79a9d"}, + {file = "dill-0.4.1.tar.gz", hash = "sha256:423092df4182177d4d8ba8290c8a5b640c66ab35ec7da59ccfa00f6fa3eea5fa"}, +] + +[package.extras] +graph = ["objgraph (>=1.7.2)"] +profile = ["gprof2dot (>=2022.7.29)"] + [[package]] name = "distlib" version = "0.3.9" @@ -2423,6 +2590,75 @@ files = [ all = ["Flask", "pandas"] gui = ["wxPython (>=4.0)"] +[[package]] +name = "ms-deisotope" +version = "0.0.60" +description = "Access, Deisotope, and Charge Deconvolute Mass Spectra" +optional = false +python-versions = ">3.8" +groups = ["main"] +files = [ + {file = "ms_deisotope-0.0.60-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3b0d92490b22b5013205307dac5d67ec793bdef8064af8c9ba719ae811774d80"}, + {file = "ms_deisotope-0.0.60-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:14a1ca8ec7bbb4208f13a7afb91b12e8f69fe9628dc11ff118ec722b21a4711c"}, + {file = "ms_deisotope-0.0.60-cp310-cp310-win_amd64.whl", hash = "sha256:ed231c25a43453ea6b0d30c947bd8213b86b7e7345669641a170d8848ea41882"}, + {file = "ms_deisotope-0.0.60-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:af794e9b08ae5e25b1d1443d01ba2fbd33b9921e81ecff486dc957aa3a5be330"}, + {file = "ms_deisotope-0.0.60-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:27b9349a09119293dfc82b9b03768c2c6965cb632de3782dff3db81292e3686c"}, + {file = "ms_deisotope-0.0.60-cp311-cp311-win_amd64.whl", hash = "sha256:d384c6a7ea8dabb8a3ebf228b758208b49367756076538a3e07edc8cae47718c"}, + {file = "ms_deisotope-0.0.60-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7ea518852ec9ce0296491d1789fd86b90c6d7910f0f29880f19b9f6d40bbd912"}, + {file = "ms_deisotope-0.0.60-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f171a8b76e0217b46e3ef9b08811c965172d5e21a348128e3ece5adec522d9d1"}, + {file = "ms_deisotope-0.0.60-cp312-cp312-win_amd64.whl", hash = "sha256:bae54fe9100a0677761e2c2d9a463661b9352dd8799dcb2918bd9c21a4795b4c"}, + {file = "ms_deisotope-0.0.60.tar.gz", hash = "sha256:a7141ad90e60e67e843d14c7396e925ea1f97e343248515452ecc40dff79b23c"}, +] + +[package.dependencies] +brain-isotopic-distribution = ">=1.5.8" +dill = "*" +lxml = "*" +ms_peak_picker = ">=0.1.46" +numpy = {version = ">=2.0.0", markers = "python_version >= \"3.9\""} +psims = ">=1.3.0" +pyteomics = ">=4.6.2" +python-idzip = ">=0.3.2" +pyzstd = "*" +scipy = "*" +six = "*" + +[package.extras] +all = ["click (>=8.2)", "comtypes", "h5py", "hdf5plugin", "matplotlib", "pythonnet"] +cli = ["click (>=8.2)"] +com = ["comtypes"] +mzmlb = ["h5py", "hdf5plugin"] +net = ["pythonnet"] +plot = ["matplotlib"] + +[[package]] +name = "ms-peak-picker" +version = "0.1.46" +description = "A library to pick peaks from mass spectral data" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "ms_peak_picker-0.1.46-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f3a826b379b36d174a5caeb23cb3ecbac416fe35ac2f614a0eda5083e417648c"}, + {file = "ms_peak_picker-0.1.46-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc53870147553286710ae250f8f8565b73a523da92b9cef1b32f326ef4134302"}, + {file = "ms_peak_picker-0.1.46-cp310-cp310-win_amd64.whl", hash = "sha256:74b471a69b8bf0a6e6444eef18f7da35d3ac1cbefadb45f5666c3e39fe969c5d"}, + {file = "ms_peak_picker-0.1.46-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a04066ddf32d204db164c50a5bae93932815464d603af8a4fa39a92ffad16c38"}, + {file = "ms_peak_picker-0.1.46-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b89e8e21490ceffcc5a8256ca03a8fd501a40d9b4ec9bcf85dc7b51a39a9f882"}, + {file = "ms_peak_picker-0.1.46-cp311-cp311-win_amd64.whl", hash = "sha256:a9e1378a452d1f674746c1e27a6a766b409d99aa2040b0bdd1db8113575b7575"}, + {file = "ms_peak_picker-0.1.46-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ae12412f254e5d69e406503a0ca6bfcebf586f673b3cb053c304859c3641a012"}, + {file = "ms_peak_picker-0.1.46-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:772ce0b7ba4b88ca4c8ce2321a7dd19c7b47a81a43d2606b587dce141d1cf94f"}, + {file = "ms_peak_picker-0.1.46-cp38-cp38-win_amd64.whl", hash = "sha256:10550cf91369906badd0723117be9a37bcbd97df89d500aa2c99222d976eaf32"}, + {file = "ms_peak_picker-0.1.46-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3c504cf0657054ae14a4b8e043ce2754872e5e1065432220698fd7b16ff1bd3a"}, + {file = "ms_peak_picker-0.1.46-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fc65d60ec8c6e71efda3dc2d65c815701e26de36c59fa00b92a0f6491e99a6c"}, + {file = "ms_peak_picker-0.1.46-cp39-cp39-win_amd64.whl", hash = "sha256:fa663da2bd09ff407091b89e552f30612a50a594f7e28f4b97b70b485cece1ac"}, + {file = "ms_peak_picker-0.1.46.tar.gz", hash = "sha256:3fee82ede3ea53a3b266d514fe26da68b48acbdfdf01f25870b51c5754f35670"}, +] + +[package.dependencies] +numpy = "*" +scipy = "*" +six = "*" + [[package]] name = "mypy-extensions" version = "1.1.0" @@ -3339,6 +3575,46 @@ full = ["ms_deisotope", "plotly (<5.0)", "pynumpress (>=0.0.4)"] plot = ["plotly (<5.0)"] pynumpress = ["pynumpress (>=0.0.4)"] +[[package]] +name = "pyopenms" +version = "3.5.0" +description = "Python wrapper for C++ LC-MS library OpenMS" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "pyopenms-3.5.0-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:0f906fbe6d52d5d00a948fa9cd3f56846125e9c7fe1ca1fd50275c1790428779"}, + {file = "pyopenms-3.5.0-cp310-cp310-macosx_15_0_x86_64.whl", hash = "sha256:5fb116a51d4b9c7e5954d25855a68306e01bbefff543223c95c7ef58b0be5817"}, + {file = "pyopenms-3.5.0-cp310-cp310-manylinux_2_34_aarch64.whl", hash = "sha256:c593f8a5fd5540ef9f536b0e0b73d4b3189010d6b4562d91860265db46f4a759"}, + {file = "pyopenms-3.5.0-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:3efaf1e33caf8042285c239cd292cbdd0f58309221c1c9eaf1c5ab3867a1abb8"}, + {file = "pyopenms-3.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:569c1a9a82ab56fd5d532932916f86596da70fc103ec2f126b245f90312c2010"}, + {file = "pyopenms-3.5.0-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:be4af8bfb6af8184af7d1cce1d2fb8514d26e7137692f5e9f59d64df8d112499"}, + {file = "pyopenms-3.5.0-cp311-cp311-macosx_15_0_x86_64.whl", hash = "sha256:809062158ed9ab6fd050ec31f544baacfe35a4182f89ab0f469edf7fe941ae06"}, + {file = "pyopenms-3.5.0-cp311-cp311-manylinux_2_34_aarch64.whl", hash = "sha256:0163fd9c8de8793bbe91b2bd341c6f6dcdd69a6160fcf763d3089adb18585e7b"}, + {file = "pyopenms-3.5.0-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:6ed007843237436714dd1d5f55a418bd81300e9316853492c3fd4a619641e90d"}, + {file = "pyopenms-3.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:741f385677fddf3e0f8ec9dace72035fb39a74d5f28cd5c4c5d2893488192d73"}, + {file = "pyopenms-3.5.0-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:fc31741cd079d157dd7fe8e4d59078c10d488cc0907c3a21cfcfbac5ba0b3692"}, + {file = "pyopenms-3.5.0-cp312-cp312-macosx_15_0_x86_64.whl", hash = "sha256:7fa3681d79e86132a78aa7f3947df9f494ef853768f67ce71c3e9a9828eef47b"}, + {file = "pyopenms-3.5.0-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:38585df8b4d11fe5134ea49820905f8e0fd2bcebe8f104031abff727902041ee"}, + {file = "pyopenms-3.5.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:41b5326266cb9336bff8d0d418d9fcdd43f17801fd35129d98a1efe1c416a279"}, + {file = "pyopenms-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:b94d86dd1ebd1b1cdd5ddeeb1978a3d1e3902c80241195da612a8664a4b44a61"}, + {file = "pyopenms-3.5.0-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:1169fda7d073e44679b14cf8676adbeb097ede1eec0b508e199c4b4fa77686df"}, + {file = "pyopenms-3.5.0-cp313-cp313-macosx_15_0_x86_64.whl", hash = "sha256:e8762f7121671cbf9a1a288b022ab073edf25a3735e748dca03e32abcd2115d6"}, + {file = "pyopenms-3.5.0-cp313-cp313-manylinux_2_34_aarch64.whl", hash = "sha256:07b45b4ee695b4ccc56463bb867efb908fcda8a8a90ab76f67c8959f488a4098"}, + {file = "pyopenms-3.5.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:624347c140c5df8757733d2d3adee31d1b844ac104a5dde4ac4111865264c388"}, + {file = "pyopenms-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:51c5944366a2efb4a9f393da875f91e390dcd2dba5ccf736421ab322e4345747"}, + {file = "pyopenms-3.5.0-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:8bdd98583496a0a2ecb774b5d46a4db0e5d2f3dbd58af18cda401d45d3152003"}, + {file = "pyopenms-3.5.0-cp314-cp314-macosx_15_0_x86_64.whl", hash = "sha256:e0a694d2c2d0cdb54d345dee5128e9e26ec5babc64623aab900fd89b5346ec8e"}, + {file = "pyopenms-3.5.0-cp314-cp314-manylinux_2_34_aarch64.whl", hash = "sha256:2c92b6dc46d77d26552c8f727382b4358b266f8f74131d2bb5cdaf6c05920229"}, + {file = "pyopenms-3.5.0-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:d9fb8fa5b3d9e11111f08c1f4713f3c18b1f5d6469bc0a1c592905fe9b296bc4"}, + {file = "pyopenms-3.5.0-cp314-cp314-win_amd64.whl", hash = "sha256:b1184710d15cb3cec703f2442bf49c24390b1310a40841fc22ea6b63d15be62a"}, +] + +[package.dependencies] +matplotlib = ">=3.5" +numpy = ">=1.25.0" +pandas = "*" + [[package]] name = "pyparsing" version = "3.2.3" @@ -3370,6 +3646,30 @@ files = [ networkx = "*" pbr = "*" +[[package]] +name = "pyteomics" +version = "4.7.5" +description = "A framework for proteomics data analysis." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "pyteomics-4.7.5-py2.py3-none-any.whl", hash = "sha256:9b8008ad8d8bbbc6856c4e804bc88e018df44809cd9a86900862b311e760862d"}, + {file = "pyteomics-4.7.5-py3-none-any.whl", hash = "sha256:5155e1d2581845926e49b0abd0be8cfd6ea45ffd3511958b805347037c5934c8"}, + {file = "pyteomics-4.7.5.tar.gz", hash = "sha256:382aeaa8b921bdd2a7e5b4aa9fe46c6184bb43701205a845b4b861ee3e88f46a"}, +] + +[package.extras] +all = ["h5py", "hdf5plugin", "lxml", "matplotlib", "numpy", "pandas (>=0.17)", "psims (>v0.1.42)", "pynumpress", "scikit-learn", "sqlalchemy"] +df = ["pandas (>=0.17)"] +graphics = ["matplotlib"] +mzmlb = ["h5py", "hdf5plugin"] +numpress = ["pynumpress"] +proforma = ["psims (>v0.1.42)"] +tda = ["numpy"] +unimod = ["lxml", "sqlalchemy"] +xml = ["lxml", "numpy"] + [[package]] name = "pytest" version = "8.4.0" @@ -3426,6 +3726,18 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-idzip" +version = "0.3.10" +description = "DictZip - Random Access gzip files" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "python_idzip-0.3.10-py3-none-any.whl", hash = "sha256:7b0dfc782b6d33382f85f95a86ac8cb586659d0951303ed3f02a748c0969280b"}, + {file = "python_idzip-0.3.10.tar.gz", hash = "sha256:dd6f688225b0ba94e4c58e2c00aab807ec1206a37f90b04ccf161345eec39837"}, +] + [[package]] name = "python-json-logger" version = "3.3.0" @@ -3682,6 +3994,22 @@ files = [ [package.dependencies] cffi = {version = "*", markers = "implementation_name == \"pypy\""} +[[package]] +name = "pyzstd" +version = "0.19.1" +description = "Support for Zstandard (zstd) compression" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "pyzstd-0.19.1-py3-none-any.whl", hash = "sha256:267e2eb0de0291dfcb7ccfebc4ffe75b2f9d84e7007ac38844f6ccafc6dce081"}, + {file = "pyzstd-0.19.1.tar.gz", hash = "sha256:36723d3c915b3981de9198d0a2c82b2f5fe3eaa36e4d8d586937830a8afc7d72"}, +] + +[package.dependencies] +backports-zstd = {version = ">=1.0.0", markers = "python_version < \"3.14\""} +typing-extensions = {version = ">=4.13.2", markers = "python_version < \"3.13\""} + [[package]] name = "referencing" version = "0.36.2" @@ -4719,4 +5047,4 @@ dev = ["black (>=19.3b0) ; python_version >= \"3.6\"", "pytest (>=4.6.2)"] [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "bec8134931993bede996136f67e48ce7c94b80f9c9db22c86ab52241bb35b0f5" +content-hash = "5afba1e135d061d1ca0cff6c57c4afcb7ac491f44a0f2af555b7f69a73073217" diff --git a/tests/test_deisotoping.py b/tests/test_deisotoping.py index 71a21cc..7e239b4 100644 --- a/tests/test_deisotoping.py +++ b/tests/test_deisotoping.py @@ -4,11 +4,37 @@ import pytest from vimms.Chemicals import Isotopes, Adducts -from vimms.Common import Formula, ADDUCT_TERMS, POSITIVE, PROTON_MASS +from vimms.Common import C13_MZ_DIFF, Formula, ADDUCT_TERMS, POSITIVE, PROTON_MASS from vimms.Deisotoping import Deisotoper from vimms.MassSpecUtils import adduct_transformation +def _fmap_to_merged_peaks(feature_map): + # Consolidate near-duplicate neutral masses emitted by OpenMS MFD. + # We use an absolute tolerance rather than ppm because these duplicates + # are typically micro-Da artifacts (e.g. atomic-vs-ion mass handling). + abs_tol = 1e-3 + + peaks = sorted( + ((float(feature.getMZ()), float(feature.getIntensity())) for feature in feature_map), + key=lambda x: x[0], + ) + if not peaks: + return [] + + merged: list[tuple[float, float]] = [] + current_mz, current_intensity = peaks[0] + for mz, intensity in peaks[1:]: + if abs(mz - current_mz) <= abs_tol: + current_intensity += intensity + continue + merged.append((current_mz, current_intensity)) + current_mz, current_intensity = mz, intensity + merged.append((current_mz, current_intensity)) + + return merged + + def test_isotope_distribution_multi_element(): formula = Formula("C10H16N2O2S") isotopes = Isotopes(formula) @@ -122,3 +148,359 @@ def test_pyopenms_deisotope_empty_peaks(): assert spectrum is None assert mzs.size == 0 assert intensities.size == 0 + + +def test_pyopenms_deisotope_sorts_input(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + peaks = [(102.00671, 2e4), (100.0, 1e5), (101.003355, 5e4)] + _, mzs, intensities = deisotope_with_pyopenms(peaks, min_isopeaks=2, max_isopeaks=3) + + assert mzs.size == intensities.size + assert all(mzs[i] <= mzs[i + 1] for i in range(len(mzs) - 1)) + + +def test_pyopenms_deisotope_keeps_non_isotopic_peaks_by_default(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + peaks = [(100.0, 1e5)] + _, mzs, intensities = deisotope_with_pyopenms(peaks) + + assert mzs.size == 1 + assert intensities.size == 1 + assert np.isclose(mzs[0], 100.0, atol=1e-12) + + +def test_pyopenms_deisotope_keep_only_deisotoped_returns_mono_peak(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + peaks = [(100.0, 1e5), (101.003355, 5e4), (102.00671, 2e4)] + _, mzs, intensities = deisotope_with_pyopenms( + peaks, + keep_only_deisotoped=True, + min_charge=1, + max_charge=1, + min_isopeaks=3, + max_isopeaks=3, + make_single_charged=False, + ) + + assert mzs.size == 1 + assert intensities.size == 1 + assert np.isclose(mzs[0], 100.0, atol=1e-6) + assert np.isclose(intensities[0], 1e5, atol=1e-6) + + +def test_pyopenms_deisotope_add_up_intensity_sums_cluster(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + peaks = [(100.0, 1e5), (101.003355, 5e4), (102.00671, 2e4)] + _, mzs, intensities = deisotope_with_pyopenms( + peaks, + keep_only_deisotoped=True, + min_charge=1, + max_charge=1, + min_isopeaks=3, + max_isopeaks=3, + make_single_charged=False, + add_up_intensity=True, + ) + + assert mzs.size == 1 + assert np.isclose(intensities[0], 1e5 + 5e4 + 2e4, atol=1e-6) + + +def test_pyopenms_deisotope_annotation_arrays_present_and_consistent(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + peaks = [(100.0, 1e5), (101.003355, 5e4), (102.00671, 2e4)] + spectrum, mzs, _ = deisotope_with_pyopenms( + peaks, + keep_only_deisotoped=True, + min_charge=1, + max_charge=1, + min_isopeaks=3, + max_isopeaks=3, + make_single_charged=False, + annotate_charge=True, + annotate_iso_peak_count=True, + annotate_features=True, + ) + + assert spectrum is not None + assert mzs.size == spectrum.size() + + integer_arrays = spectrum.getIntegerDataArrays() + names = {arr.getName() for arr in integer_arrays} + assert {"charge", "iso_peak_count", "feature_number"} <= names + assert all(len(arr) == spectrum.size() for arr in integer_arrays) + + charge_arr = next(arr for arr in integer_arrays if arr.getName() == "charge") + iso_count_arr = next(arr for arr in integer_arrays if arr.getName() == "iso_peak_count") + feature_arr = next(arr for arr in integer_arrays if arr.getName() == "feature_number") + + assert int(charge_arr[0]) == 1 + assert int(iso_count_arr[0]) == 3 + assert int(feature_arr[0]) == 0 + + +def test_pyopenms_deisotope_make_single_charged_converts_charge_two(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + mz0 = 100.0 + c13_diff = 1.003355 + peaks = [(mz0, 1e5), (mz0 + c13_diff / 2, 5e4), (mz0 + 2 * c13_diff / 2, 2e4)] + spectrum, mzs, _ = deisotope_with_pyopenms( + peaks, + keep_only_deisotoped=True, + min_charge=2, + max_charge=2, + min_isopeaks=3, + max_isopeaks=3, + make_single_charged=True, + annotate_charge=True, + ) + + assert spectrum is not None + assert mzs.size == 1 + assert np.isclose((2 * mz0) - mzs[0], PROTON_MASS, atol=1e-4) + + charge_arr = next(arr for arr in spectrum.getIntegerDataArrays() if arr.getName() == "charge") + assert int(charge_arr[0]) == 2 + + +def test_pyopenms_deisotope_start_intensity_check_controls_strictness(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + peaks = [(100.0, 1e5), (101.003355, 1.5e5)] + _, strict_mzs, _ = deisotope_with_pyopenms( + peaks, + keep_only_deisotoped=True, + min_isopeaks=2, + max_isopeaks=2, + use_decreasing_model=True, + start_intensity_check=1, + ) + _, relaxed_mzs, _ = deisotope_with_pyopenms( + peaks, + keep_only_deisotoped=True, + min_isopeaks=2, + max_isopeaks=2, + use_decreasing_model=True, + start_intensity_check=2, + ) + + assert strict_mzs.size == 0 + assert relaxed_mzs.size == 1 + assert np.isclose(relaxed_mzs[0], 100.0, atol=1e-6) + + +def test_pyopenms_deisotope_is_c13_only_drops_m_plus_2_only(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + peaks = [(100.0, 1e5), (101.997, 6e4)] + _, mzs, intensities = deisotope_with_pyopenms( + peaks, keep_only_deisotoped=True, min_isopeaks=2, max_isopeaks=2 + ) + + assert mzs.size == 0 + assert intensities.size == 0 + + +def test_pyopenms_deisotope_is_c13_only_on_multi_element_generator(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + formula = Formula("Cl2") + isotopes = Isotopes(formula) + isotope_peaks = isotopes.get_isotopes(total_proportion=0.99) + peaks = [(mz, proportion * 1e5) for mz, proportion, _ in isotope_peaks] + + mono_mz = isotope_peaks[0][0] + deltas = [mz - mono_mz for mz, _, _ in isotope_peaks[1:]] + assert any(np.isclose(delta, 1.997, atol=0.01) for delta in deltas) + assert not any(np.isclose(delta, 1.003355, atol=0.01) for delta in deltas) + + homegrown_clusters = Deisotoper(ppm_tolerance=10.0, max_charge=1, min_isotopes=2).deisotope( + peaks + ) + assert len(homegrown_clusters) == 1 + + _, mzs, _ = deisotope_with_pyopenms( + peaks, + keep_only_deisotoped=True, + min_charge=1, + max_charge=1, + min_isopeaks=2, + max_isopeaks=10, + ) + assert mzs.size == 0 + + +def test_pyopenms_deisotope_recovers_mono_from_generated_adducted_isotopes(): + pytest.importorskip("pyopenms") + from vimms.Deisotoping import deisotope_with_pyopenms + + formula = Formula("C10H16N2O2S") + isotopes = Isotopes(formula) + adducts = Adducts(formula, adduct_prior_dict={POSITIVE: {"M+H": 1.0}}) + adduct_name = adducts.get_adducts()[POSITIVE][0][0] + mul, add = ADDUCT_TERMS[adduct_name] + + peaks = [] + for mz, proportion, _ in isotopes.get_isotopes(total_proportion=0.99): + adducted_mz = adduct_transformation(mz, mul, add) + peaks.append((adducted_mz, proportion * 1e5)) + + _, mzs, _ = deisotope_with_pyopenms( + peaks, + keep_only_deisotoped=True, + min_charge=1, + max_charge=1, + min_isopeaks=2, + max_isopeaks=10, + make_single_charged=False, + ) + + expected_mz = formula.mass + PROTON_MASS + assert mzs.size >= 1 + assert np.any(np.isclose(mzs, expected_mz, atol=1e-3)) + assert np.isclose(mzs.min(), expected_mz, atol=1e-3) + + +def test_end_to_end_deadduct_then_homegrown_deisotope_recovers_neutral_mono(): + pytest.importorskip("pyopenms") + import pyopenms as oms + + from vimms.Deisotoping import deadduct_with_pyopenms + + formula = Formula("C10H16N2O2S") + isotopes = Isotopes(formula) + isotope_peaks = isotopes.get_isotopes(total_proportion=0.99)[:8] + + fmap = oms.FeatureMap() + uid = 1 + for mz, proportion, _ in isotope_peaks: + for adduct_name, weight in (("M+H", 1.0), ("M+Na", 0.6)): + mul, add = ADDUCT_TERMS[adduct_name] + feature = oms.Feature() + feature.setMZ(adduct_transformation(mz, mul, add)) + feature.setIntensity(float(proportion) * 1e5 * weight) + feature.setRT(0.0) + feature.setCharge(1) + feature.setUniqueId(uid) + uid += 1 + fmap.push_back(feature) + + neutral = deadduct_with_pyopenms( + fmap, + adducts=["[M+H]+", "[M+Na]+"], + ppm_tolerance=10.0, + keep_only_backbone=False, + ) + assert neutral.size() > 0 + + merged = _fmap_to_merged_peaks(neutral) + assert len(merged) <= len(isotope_peaks) + + deisotoper = Deisotoper(ppm_tolerance=10.0, max_charge=1, min_isotopes=2) + clusters = deisotoper.deisotope(merged) + + assert any(np.isclose(c.monoisotopic_mz, formula.mass, atol=1e-2) for c in clusters) + + +def test_end_to_end_deadduct_then_pyopenms_deisotope_recovers_neutral_mono(): + pytest.importorskip("pyopenms") + import pyopenms as oms + + from vimms.Deisotoping import deadduct_with_pyopenms, deisotope_with_pyopenms + + neutral_mz = 200.0 + isotope_peaks = [ + (neutral_mz, 1e5), + (neutral_mz + C13_MZ_DIFF, 5e4), + (neutral_mz + 2 * C13_MZ_DIFF, 2e4), + ] + + fmap = oms.FeatureMap() + uid = 1 + for mz, intensity in isotope_peaks: + for adduct_name, weight in (("M+H", 1.0), ("M+Na", 0.5)): + mul, add = ADDUCT_TERMS[adduct_name] + feature = oms.Feature() + feature.setMZ(adduct_transformation(mz, mul, add)) + feature.setIntensity(float(intensity) * weight) + feature.setRT(0.0) + feature.setCharge(1) + feature.setUniqueId(uid) + uid += 1 + fmap.push_back(feature) + + neutral = deadduct_with_pyopenms( + fmap, + adducts=["[M+H]+", "[M+Na]+"], + ppm_tolerance=10.0, + keep_only_backbone=False, + ) + merged = _fmap_to_merged_peaks(neutral) + + _, mzs, _ = deisotope_with_pyopenms( + merged, + keep_only_deisotoped=True, + min_charge=1, + max_charge=1, + min_isopeaks=3, + max_isopeaks=3, + make_single_charged=False, + ) + assert mzs.size == 1 + assert np.isclose(mzs[0], neutral_mz, atol=1e-3) + + +def test_end_to_end_deadduct_negative_then_homegrown_deisotope_recovers_neutral_mono(): + pytest.importorskip("pyopenms") + import pyopenms as oms + + from vimms.Deisotoping import deadduct_with_pyopenms + + cl_ion_mass = 34.969402 # [M+Cl]- shift in observed m/z + + formula = Formula("C10H16N2O2S") + isotopes = Isotopes(formula) + isotope_peaks = isotopes.get_isotopes(total_proportion=0.99)[:6] + + fmap = oms.FeatureMap() + uid = 1 + for mz, proportion, _ in isotope_peaks: + for shift, weight in ((-PROTON_MASS, 1.0), (cl_ion_mass, 0.5)): + feature = oms.Feature() + feature.setMZ(float(mz + shift)) + feature.setIntensity(float(proportion) * 1e5 * weight) + feature.setRT(0.0) + feature.setCharge(-1) + feature.setUniqueId(uid) + uid += 1 + fmap.push_back(feature) + + neutral = deadduct_with_pyopenms( + fmap, + adducts=["[M-H]-", "[M+Cl]-"], + ppm_tolerance=10.0, + max_charge=1, + keep_only_backbone=False, + ) + assert neutral.size() > 0 + + merged = _fmap_to_merged_peaks(neutral) + deisotoper = Deisotoper(ppm_tolerance=10.0, max_charge=1, min_isotopes=2) + clusters = deisotoper.deisotope(merged) + + assert any(np.isclose(c.monoisotopic_mz, formula.mass, atol=1e-2) for c in clusters) diff --git a/vimms/Deisotoping.py b/vimms/Deisotoping.py index 0853ea8..ff0d701 100644 --- a/vimms/Deisotoping.py +++ b/vimms/Deisotoping.py @@ -232,10 +232,16 @@ def deisotope_with_pyopenms( fragment_unit_ppm: bool = True, min_charge: int = 1, max_charge: int = 3, - keep_only_deisotoped: bool = True, - min_isopeaks: int = 2, + keep_only_deisotoped: bool = False, + min_isopeaks: int = 3, max_isopeaks: int = 10, - make_single_charged: bool = False, + make_single_charged: bool = True, + annotate_charge: bool = False, + annotate_iso_peak_count: bool = False, + use_decreasing_model: bool = True, + start_intensity_check: int = 2, + add_up_intensity: bool = False, + annotate_features: bool = False, ): """ Deisotope peaks using pyopenms Deisotoper on an MS1-like spectrum. @@ -250,6 +256,12 @@ def deisotope_with_pyopenms( min_isopeaks: minimum number of isotopic peaks in a cluster. max_isopeaks: maximum number of isotopic peaks in a cluster. make_single_charged: convert all features to single charge if True. + annotate_charge: annotate charge in the output if True. + annotate_iso_peak_count: annotate isotope peak count in the output if True. + use_decreasing_model: enforce decreasing intensity model if True. + start_intensity_check: isotope index at which intensity check starts. + add_up_intensity: add up intensities of isotope peaks if True. + annotate_features: annotate features in the output if True. Returns: Tuple of (spectrum, peak_mzs, peak_intensities) after deisotoping. @@ -262,6 +274,7 @@ def deisotope_with_pyopenms( spectrum = oms.MSSpectrum() spectrum.set_peaks((peaks_array[:, 0], peaks_array[:, 1])) + spectrum.sortByPosition() oms.Deisotoper.deisotopeAndSingleCharge( spectrum, @@ -273,6 +286,12 @@ def deisotope_with_pyopenms( min_isopeaks, max_isopeaks, make_single_charged, + annotate_charge, + annotate_iso_peak_count, + use_decreasing_model, + start_intensity_check, + add_up_intensity, + annotate_features, ) mzs, intensities = spectrum.get_peaks() @@ -284,30 +303,193 @@ def deadduct_with_pyopenms( adducts: List[str] | None = None, max_charge: int = 3, ppm_tolerance: float = 10.0, + keep_only_backbone: bool = True, ): """ - De-adduct features using pyopenms MetaboliteAdductDecharger. + De-adduct features using pyopenms MetaboliteFeatureDeconvolution. Args: feature_map: pyopenms FeatureMap containing detected features. - adducts: list of adduct strings (e.g. ["[M+H]+", "[M+Na]+"]) or None for defaults. + adducts: list of adduct strings or OpenMS potential_adducts strings. + Supported bracket forms: "[M+H]+", "[M+Na]+", "[M+K]+", "[M+NH4]+". + OpenMS form examples: "H:+:0.4", "Na:+:0.25". max_charge: maximum charge to consider. - ppm_tolerance: mass tolerance used for matching (ppm). + ppm_tolerance: approximate mass tolerance used for matching (ppm). + OpenMS MetaboliteFeatureDeconvolution uses a global Da tolerance; we approximate it at the + median m/z of the input features. + keep_only_backbone: if True, return only the representative (backbone) features. Returns: - De-adducted pyopenms FeatureMap. + De-adducted pyopenms FeatureMap with neutral masses (charge set to 0). """ import pyopenms as oms - decharger = oms.MetaboliteAdductDecharger() - params = decharger.getParameters() - params.setValue("mass_error_ppm", ppm_tolerance) - params.setValue("charge_max", max_charge) + # OpenMS reports adduct masses using *atomic* masses (neutral species), + # while MS m/z shifts correspond to charged species (atomic +/- e- mass). + # Correcting by the electron mass ensures we recover a chemically-sensible + # neutral mass from observed m/z values. + electron_mass = 0.000548579909065 # u + + def infer_negative_mode() -> bool: + charges = [] + for feature in feature_map: + try: + charges.append(int(feature.getCharge())) + except Exception: + continue + nonzero = [c for c in charges if c != 0] + if nonzero: + has_pos = any(c > 0 for c in nonzero) + has_neg = any(c < 0 for c in nonzero) + if has_pos and has_neg: + raise ValueError("Mixed positive/negative charges in FeatureMap are not supported.") + return has_neg + + if not adducts: + return False + + inferred = set() + for value in adducts: + value = value.strip() + if not value: + continue + if value.startswith("[") and value.endswith("]-"): + inferred.add("-") + elif value.startswith("[") and value.endswith("]+"): + inferred.add("+") + else: + parts = value.split(":") + if len(parts) == 3: + inferred.add(parts[1]) + if inferred == {"-"}: + return True + if inferred == {"+"}: + return False + if inferred: + raise ValueError( + f"Unable to infer polarity from adducts: mixed charge signs {sorted(inferred)}" + ) + return False + + def to_openms_potential_adducts(values: List[str]) -> List[bytes]: + parsed = [] + for value in values: + value = value.strip() + if not value: + continue + + if value.startswith("["): + if value.startswith("[M+") and value.endswith("]+"): + name = value[len("[M+") : -len("]+")] + parsed.append((name, "+", 1.0)) + continue + if value == "[M-H]-": + parsed.append(("H-1", "-", 1.0)) + continue + if value == "[M+Cl]-": + parsed.append(("Cl", "-", 1.0)) + continue + raise ValueError(f"Unsupported adduct string '{value}' for pyopenms") + + parts = value.split(":") + if len(parts) != 3: + raise ValueError(f"Unsupported OpenMS potential_adduct string '{value}'") + name, charge, prob = parts + parsed.append((name, charge, float(prob))) + + # OpenMS requires charged adduct probabilities to sum to 1.0, so normalise by charge sign. + for charge_sign in ("+", "-"): + indices = [i for i, (_, charge, _) in enumerate(parsed) if charge == charge_sign] + if not indices: + continue + total = sum(parsed[i][2] for i in indices) + if total <= 0: + raise ValueError( + f"Invalid OpenMS potential_adduct probabilities for charge '{charge_sign}'" + ) + for i in indices: + name, charge, prob = parsed[i] + parsed[i] = (name, charge, prob / total) + + potential = [] + for name, charge, prob in parsed: + potential.append(f"{name}:{charge}:{prob:.6g}".encode()) + return potential + + mzs = [] + for feature in feature_map: + try: + mzs.append(float(feature.getMZ())) + except Exception: + continue + reference_mz = float(np.median(mzs)) if mzs else 100.0 + negative_mode = infer_negative_mode() + + feature_map_in = oms.FeatureMap() + for feature in feature_map: + new_feature = oms.Feature(feature) + if new_feature.getCharge() == 0: + new_feature.setCharge(-1 if negative_mode else 1) + feature_map_in.push_back(new_feature) + + deconvolver = oms.MetaboliteFeatureDeconvolution() + params = deconvolver.getParameters() + if params.exists(b"unit"): + params.setValue(b"unit", b"Da") + if params.exists(b"negative_mode"): + params.setValue(b"negative_mode", b"true" if negative_mode else b"false") + if negative_mode: + params.setValue(b"charge_min", -int(max_charge)) + params.setValue(b"charge_max", -1) + else: + params.setValue(b"charge_min", 1) + params.setValue(b"charge_max", int(max_charge)) + params.setValue(b"mass_max_diff", float(max(ppm_tolerance * reference_mz / 1e6, 0.002))) if adducts: - adduct_info = oms.AdductInfo() - adduct_info.setAdducts(adducts) - decharger.setAdducts(adduct_info) - decharger.setParameters(params) - output = oms.FeatureMap() - decharger.decharge(feature_map, output) - return output + params.setValue(b"potential_adducts", to_openms_potential_adducts(adducts)) + deconvolver.setParameters(params) + + annotated = oms.FeatureMap() + cons_map = oms.ConsensusMap() + cons_map_p = oms.ConsensusMap() + deconvolver.compute(feature_map_in, annotated, cons_map, cons_map_p) + + neutral = oms.FeatureMap() + for feature in annotated: + if keep_only_backbone: + try: + if int(feature.getMetaValue(b"is_backbone")) != 1: + continue + except Exception: + # If OpenMS didn't annotate backbone status, keep the feature. + pass + + charge = int(feature.getCharge()) + if charge == 0: + charge = 1 + + has_adduct_mass = False + try: + adduct_mass = float(feature.getMetaValue(b"dc_charge_adduct_mass")) + has_adduct_mass = True + except Exception: + adduct_mass = 0.0 + + neutral_mass = float(feature.getMZ()) * abs(charge) + if has_adduct_mass: + neutral_mass = neutral_mass - adduct_mass + electron_mass * charge + + neutral_feature = oms.Feature() + neutral_feature.setMZ(neutral_mass) + neutral_feature.setRT(float(feature.getRT())) + neutral_feature.setIntensity(float(feature.getIntensity())) + neutral_feature.setCharge(0) + + try: + neutral_feature.setMetaValue(b"Group", feature.getMetaValue(b"Group")) + except Exception: + pass + + neutral.push_back(neutral_feature) + + return neutral From 7b4dea818b3074750b3265e91692bb5239de2400 Mon Sep 17 00:00:00 2001 From: Joe Wandy Date: Sat, 7 Feb 2026 20:25:24 +0000 Subject: [PATCH 08/10] Fix chloride adduct mass --- tests/test_deisotoping.py | 9 +++++++++ vimms/Common.py | 4 +++- vimms/Deisotoping.py | 4 ++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/test_deisotoping.py b/tests/test_deisotoping.py index 7e239b4..a6f095c 100644 --- a/tests/test_deisotoping.py +++ b/tests/test_deisotoping.py @@ -59,6 +59,15 @@ def test_isotope_distribution_chlorine_m2_peak(): assert any(np.isclose(delta, 1.997, atol=0.01) for delta in deltas) +def test_adduct_terms_chloride_uses_chloride_anion_mass(): + # [M+Cl]- uses Cl- (atomic mass + electron mass), not neutral Cl. + from vimms.Common import ELECTRON_MASS, NATURAL_ISOTOPES + + _, cl_shift = ADDUCT_TERMS["M+Cl"] + cl_atomic = NATURAL_ISOTOPES["Cl"][0][0] + assert np.isclose(cl_shift - cl_atomic, ELECTRON_MASS, atol=1e-12) + + def test_isotope_distribution_preserves_mono_when_filtered(): formula = Formula("C500H1000") isotopes = Isotopes(formula) diff --git a/vimms/Common.py b/vimms/Common.py index f20f123..ac8f975 100644 --- a/vimms/Common.py +++ b/vimms/Common.py @@ -72,6 +72,7 @@ DEFAULT_SOURCE_CID_ENERGY = 0 PROTON_MASS = 1.00727645199076 +ELECTRON_MASS = 0.000548579909065 # u CHROM_TYPE_EMPIRICAL = "empirical" CHROM_TYPE_CONSTANT = "constant" @@ -110,7 +111,8 @@ "2M+H": (2, 1.007276), "2M+NH4": (2, 18), "M-H": (1, -PROTON_MASS), - "M+Cl": (1, 34.96885268), + # [M+Cl]- adds Cl- (atomic mass + electron mass), not neutral Cl. + "M+Cl": (1, 34.96885268 + ELECTRON_MASS), "M+FA-H": (1, 44.998201), "M+Ac-H": (1, 59.013851), } diff --git a/vimms/Deisotoping.py b/vimms/Deisotoping.py index ff0d701..d7a5e81 100644 --- a/vimms/Deisotoping.py +++ b/vimms/Deisotoping.py @@ -4,7 +4,7 @@ import numpy as np -from vimms.Common import C13_MZ_DIFF, NATURAL_ISOTOPES +from vimms.Common import C13_MZ_DIFF, NATURAL_ISOTOPES, ELECTRON_MASS @dataclass(frozen=True) @@ -328,7 +328,7 @@ def deadduct_with_pyopenms( # while MS m/z shifts correspond to charged species (atomic +/- e- mass). # Correcting by the electron mass ensures we recover a chemically-sensible # neutral mass from observed m/z values. - electron_mass = 0.000548579909065 # u + electron_mass = ELECTRON_MASS def infer_negative_mode() -> bool: charges = [] From 9f264d55b6ae7986e8973a88a057143f3ebae688 Mon Sep 17 00:00:00 2001 From: Joe Wandy Date: Fri, 29 May 2026 14:42:13 +0100 Subject: [PATCH 09/10] Remove unsupported dimer adducts --- tests/test_deisotoping.py | 65 ++++++++++++++++++++++++++++++++++++++- vimms/Chemicals.py | 44 ++++++++++++++++++++++++-- vimms/Common.py | 12 ++------ 3 files changed, 108 insertions(+), 13 deletions(-) diff --git a/tests/test_deisotoping.py b/tests/test_deisotoping.py index a6f095c..304330f 100644 --- a/tests/test_deisotoping.py +++ b/tests/test_deisotoping.py @@ -4,7 +4,15 @@ import pytest from vimms.Chemicals import Isotopes, Adducts -from vimms.Common import C13_MZ_DIFF, Formula, ADDUCT_TERMS, POSITIVE, PROTON_MASS +from vimms.Common import ( + C13_MZ_DIFF, + Formula, + ADDUCT_NAMES_POS, + ADDUCT_PRIOR_POS, + ADDUCT_TERMS, + POSITIVE, + PROTON_MASS, +) from vimms.Deisotoping import Deisotoper from vimms.MassSpecUtils import adduct_transformation @@ -68,6 +76,61 @@ def test_adduct_terms_chloride_uses_chloride_anion_mass(): assert np.isclose(cl_shift - cl_atomic, ELECTRON_MASS, atol=1e-12) +def test_default_positive_adducts_exclude_unsupported_dimers(): + assert "2M+H" not in ADDUCT_NAMES_POS + assert "2M+NH4" not in ADDUCT_NAMES_POS + assert "2M+H" not in ADDUCT_PRIOR_POS + assert "2M+NH4" not in ADDUCT_PRIOR_POS + assert "2M+H" not in ADDUCT_TERMS + assert "2M+NH4" not in ADDUCT_TERMS + + +def test_unsupported_dimer_adducts_raise_clear_error(): + formula = Formula("C10H20") + + with pytest.raises(ValueError, match="multimer isotope envelopes"): + Adducts(formula, adduct_prior_dict={POSITIVE: {"2M+H": 1.0}}) + + with pytest.raises(ValueError, match="multimer isotope envelopes"): + Adducts(formula, adduct_prior_dict={POSITIVE: {"2M+NH4": 1.0}}) + + +def test_potassium_adduct_uses_corrected_name(): + assert "M+2K-H" in ADDUCT_NAMES_POS + assert "M+2K-H" in ADDUCT_PRIOR_POS + assert ADDUCT_TERMS["M+2K-H"] == (1, 76.919040) + assert "M+2K+H" not in ADDUCT_NAMES_POS + assert "M+2K+H" not in ADDUCT_PRIOR_POS + assert "M+2K+H" not in ADDUCT_TERMS + + formula = Formula("C10H20") + with pytest.raises(ValueError, match="use 'M\\+2K-H' instead"): + Adducts(formula, adduct_prior_dict={POSITIVE: {"M+2K+H": 1.0}}) + + +def test_isotope_distribution_keeps_prominent_halogen_high_mass_peaks(): + chlorinated = Isotopes(Formula("C30H40Cl2O5S2")).get_isotopes(total_proportion=0.99) + chlorinated_deltas = [mz - chlorinated[0][0] for mz, _, _ in chlorinated[1:]] + + brominated = Isotopes(Formula("C60H100Br2O10")).get_isotopes(total_proportion=0.99) + brominated_deltas = [mz - brominated[0][0] for mz, _, _ in brominated[1:]] + + assert len(chlorinated) > 20 + assert any(np.isclose(delta, 3.994, atol=0.02) for delta in chlorinated_deltas) + assert len(brominated) > 20 + assert any(np.isclose(delta, 6.003, atol=0.02) for delta in brominated_deltas) + + +def test_isotope_distribution_warns_when_explicit_peak_cap_truncates(): + formula = Formula("C30H40Cl2O5S2") + isotopes = Isotopes(formula) + + with pytest.warns(RuntimeWarning, match="max_peaks prevented"): + peaks = isotopes.get_isotopes(total_proportion=0.99, max_peaks=20) + + assert len(peaks) == 20 + + def test_isotope_distribution_preserves_mono_when_filtered(): formula = Formula("C500H1000") isotopes = Isotopes(formula) diff --git a/vimms/Chemicals.py b/vimms/Chemicals.py index 5c0ee6a..943808f 100644 --- a/vimms/Chemicals.py +++ b/vimms/Chemicals.py @@ -6,6 +6,7 @@ import copy import itertools import pickle +import warnings from abc import ABCMeta, abstractmethod from collections import deque @@ -31,6 +32,7 @@ ADDUCT_NAMES_NEG, ADDUCT_PRIOR_POS, ADDUCT_PRIOR_NEG, + ADDUCT_TERMS, NATURAL_ISOTOPES, ) from vimms.Noise import GaussianPeakNoise @@ -63,12 +65,27 @@ def __init__( self.inchikey = inchikey +_UNSUPPORTED_ADDUCT_MESSAGES = { + "2M+H": ( + "Adduct '2M+H' is not supported because multimer isotope envelopes " + "require adduct-specific isotope generation." + ), + "2M+NH4": ( + "Adduct '2M+NH4' is not supported because multimer isotope envelopes " + "require adduct-specific isotope generation." + ), + "M+2K+H": "Adduct 'M+2K+H' is mislabelled; use 'M+2K-H' instead.", +} + + class Isotopes: """ A class to represent an isotope of a chemical """ - def __init__(self, formula, min_prob=1e-12, max_peaks=20, max_states=4000, mass_precision=8): + def __init__( + self, formula, min_prob=1e-12, max_peaks=None, max_states=4000, mass_precision=8 + ): """ Create an Isotope object Args: @@ -107,7 +124,7 @@ def get_isotopes( return peaks def _get_isotope_distribution( - self, total_proportion, min_prob=1e-12, max_peaks=20, max_states=4000, mass_precision=8 + self, total_proportion, min_prob=1e-12, max_peaks=None, max_states=4000, mass_precision=8 ): distribution = [(0.0, 1.0)] monoisotope_log_prob = 0.0 @@ -155,9 +172,19 @@ def _get_isotope_distribution( for mass_shift, prob in distribution: selected.append((mass_shift, prob)) cumulative += prob - if cumulative >= total_proportion or len(selected) >= max_peaks: + reached_total = cumulative >= total_proportion + reached_cap = max_peaks is not None and len(selected) >= max_peaks + if reached_total or reached_cap: break + if max_peaks is not None and cumulative < total_proportion: + warnings.warn( + "max_peaks prevented isotope generation from reaching total_proportion; " + "the truncated isotope envelope will be renormalised.", + RuntimeWarning, + stacklevel=2, + ) + total = sum(prob for _, prob in selected) if total == 0: return [(0.0, 1.0)] @@ -259,6 +286,7 @@ def __init__( self.formula = formula self.adduct_proportion_cutoff = adduct_proportion_cutoff self.adduct_concentration = adduct_concentration + self._validate_adduct_names() def get_adducts(self): """ @@ -303,6 +331,16 @@ def _get_adduct_names(self): """ return self.adduct_names + def _validate_adduct_names(self): + for ionisation_mode, names in self.adduct_names.items(): + for name in names: + if name in _UNSUPPORTED_ADDUCT_MESSAGES: + raise ValueError(_UNSUPPORTED_ADDUCT_MESSAGES[name]) + if name not in ADDUCT_TERMS: + raise ValueError( + f"Unknown adduct '{name}' for ionisation mode '{ionisation_mode}'" + ) + class BaseChemical(metaclass=ABCMeta): """ diff --git a/vimms/Common.py b/vimms/Common.py index ac8f975..7074766 100644 --- a/vimms/Common.py +++ b/vimms/Common.py @@ -85,13 +85,11 @@ "M+K", "M+ACN+Na", "M+2Na-H", - "M+2K+H", + "M+2K-H", "[M+ACN]+H", "[M+CH3OH]+H", "[M+DMSO]+H", "[M+2ACN]+H", - "2M+H", - "2M+NH4", ] ADDUCT_NAMES_NEG = ["M-H", "M+Cl", "M+FA-H", "M+Ac-H"] @@ -105,11 +103,9 @@ "M+K": (1, 38.963158), "M+2Na-H": (1, 44.971160), "M+ACN+Na": (1, 64.015765), - "M+2K+H": (1, 76.919040), + "M+2K-H": (1, 76.919040), "[M+DMSO]+H": (1, 79.02122), "[M+2ACN]+H": (1, 83.060370), - "2M+H": (2, 1.007276), - "2M+NH4": (2, 18), "M-H": (1, -PROTON_MASS), # [M+Cl]- adds Cl- (atomic mass + electron mass), not neutral Cl. "M+Cl": (1, 34.96885268 + ELECTRON_MASS), @@ -128,13 +124,11 @@ "M+K": 0.15, "M+ACN+Na": 0.05, "M+2Na-H": 0.03, - "M+2K+H": 0.02, + "M+2K-H": 0.02, "[M+ACN]+H": 0.08, "[M+CH3OH]+H": 0.05, "[M+DMSO]+H": 0.03, "[M+2ACN]+H": 0.02, - "2M+H": 0.04, - "2M+NH4": 0.02, } ADDUCT_PRIOR_NEG = { "M-H": 1.0, From c02ed6c62de30023992135d2b2396633d81b31f2 Mon Sep 17 00:00:00 2001 From: Joe Wandy Date: Fri, 29 May 2026 19:44:31 +0100 Subject: [PATCH 10/10] Refactor adduct and isotope helpers --- tests/test_deisotoping.py | 52 ++++--- vimms/Chemicals.py | 143 ++---------------- vimms/Common.py | 113 +++++++------- vimms/Deisotoping.py | 286 +++++++++++++++++++---------------- vimms/IsotopeDistribution.py | 133 ++++++++++++++++ 5 files changed, 397 insertions(+), 330 deletions(-) create mode 100644 vimms/IsotopeDistribution.py diff --git a/tests/test_deisotoping.py b/tests/test_deisotoping.py index 304330f..d969809 100644 --- a/tests/test_deisotoping.py +++ b/tests/test_deisotoping.py @@ -7,11 +7,16 @@ from vimms.Common import ( C13_MZ_DIFF, Formula, + ADDUCT_DEFINITIONS, + ADDUCT_NAMES_NEG, ADDUCT_NAMES_POS, + ADDUCT_PRIOR_NEG, ADDUCT_PRIOR_POS, ADDUCT_TERMS, + NEGATIVE, POSITIVE, PROTON_MASS, + validate_adduct_name, ) from vimms.Deisotoping import Deisotoper from vimms.MassSpecUtils import adduct_transformation @@ -85,6 +90,31 @@ def test_default_positive_adducts_exclude_unsupported_dimers(): assert "2M+NH4" not in ADDUCT_TERMS +def test_derived_adduct_constants_are_consistent(): + expected_pos = [ + name + for name, definition in ADDUCT_DEFINITIONS.items() + if definition["polarity"] == POSITIVE and definition["prior"] is not None + ] + expected_neg = [ + name + for name, definition in ADDUCT_DEFINITIONS.items() + if definition["polarity"] == NEGATIVE and definition["prior"] is not None + ] + + assert ADDUCT_NAMES_POS == expected_pos + assert ADDUCT_NAMES_NEG == expected_neg + assert ADDUCT_PRIOR_POS == { + name: ADDUCT_DEFINITIONS[name]["prior"] for name in ADDUCT_NAMES_POS + } + assert ADDUCT_PRIOR_NEG == { + name: ADDUCT_DEFINITIONS[name]["prior"] for name in ADDUCT_NAMES_NEG + } + assert ADDUCT_TERMS == { + name: definition["term"] for name, definition in ADDUCT_DEFINITIONS.items() + } + + def test_unsupported_dimer_adducts_raise_clear_error(): formula = Formula("C10H20") @@ -108,6 +138,11 @@ def test_potassium_adduct_uses_corrected_name(): Adducts(formula, adduct_prior_dict={POSITIVE: {"M+2K+H": 1.0}}) +def test_adduct_validation_rejects_wrong_polarity(): + with pytest.raises(ValueError, match="defined for ionisation mode"): + validate_adduct_name("M+H", NEGATIVE) + + def test_isotope_distribution_keeps_prominent_halogen_high_mass_peaks(): chlorinated = Isotopes(Formula("C30H40Cl2O5S2")).get_isotopes(total_proportion=0.99) chlorinated_deltas = [mz - chlorinated[0][0] for mz, _, _ in chlorinated[1:]] @@ -169,7 +204,6 @@ def test_deisotoper_handles_m_plus_2_only(): def test_pyopenms_deisotope_helper(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms peaks = [(100.0, 1e5), (101.003355, 5e4), (102.00671, 2e4)] @@ -180,7 +214,6 @@ def test_pyopenms_deisotope_helper(): def test_pyopenms_deadduct_helper(): - pytest.importorskip("pyopenms") import pyopenms as oms from vimms.Deisotoping import deadduct_with_pyopenms @@ -196,7 +229,6 @@ def test_pyopenms_deadduct_helper(): def test_pyopenms_deadduct_multiple_features(): - pytest.importorskip("pyopenms") import pyopenms as oms from vimms.Deisotoping import deadduct_with_pyopenms @@ -213,7 +245,6 @@ def test_pyopenms_deadduct_multiple_features(): def test_pyopenms_deisotope_empty_peaks(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms spectrum, mzs, intensities = deisotope_with_pyopenms([]) @@ -223,7 +254,6 @@ def test_pyopenms_deisotope_empty_peaks(): def test_pyopenms_deisotope_sorts_input(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms peaks = [(102.00671, 2e4), (100.0, 1e5), (101.003355, 5e4)] @@ -234,7 +264,6 @@ def test_pyopenms_deisotope_sorts_input(): def test_pyopenms_deisotope_keeps_non_isotopic_peaks_by_default(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms peaks = [(100.0, 1e5)] @@ -246,7 +275,6 @@ def test_pyopenms_deisotope_keeps_non_isotopic_peaks_by_default(): def test_pyopenms_deisotope_keep_only_deisotoped_returns_mono_peak(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms peaks = [(100.0, 1e5), (101.003355, 5e4), (102.00671, 2e4)] @@ -267,7 +295,6 @@ def test_pyopenms_deisotope_keep_only_deisotoped_returns_mono_peak(): def test_pyopenms_deisotope_add_up_intensity_sums_cluster(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms peaks = [(100.0, 1e5), (101.003355, 5e4), (102.00671, 2e4)] @@ -287,7 +314,6 @@ def test_pyopenms_deisotope_add_up_intensity_sums_cluster(): def test_pyopenms_deisotope_annotation_arrays_present_and_consistent(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms peaks = [(100.0, 1e5), (101.003355, 5e4), (102.00671, 2e4)] @@ -322,7 +348,6 @@ def test_pyopenms_deisotope_annotation_arrays_present_and_consistent(): def test_pyopenms_deisotope_make_single_charged_converts_charge_two(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms mz0 = 100.0 @@ -348,7 +373,6 @@ def test_pyopenms_deisotope_make_single_charged_converts_charge_two(): def test_pyopenms_deisotope_start_intensity_check_controls_strictness(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms peaks = [(100.0, 1e5), (101.003355, 1.5e5)] @@ -375,7 +399,6 @@ def test_pyopenms_deisotope_start_intensity_check_controls_strictness(): def test_pyopenms_deisotope_is_c13_only_drops_m_plus_2_only(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms peaks = [(100.0, 1e5), (101.997, 6e4)] @@ -388,7 +411,6 @@ def test_pyopenms_deisotope_is_c13_only_drops_m_plus_2_only(): def test_pyopenms_deisotope_is_c13_only_on_multi_element_generator(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms formula = Formula("Cl2") @@ -418,7 +440,6 @@ def test_pyopenms_deisotope_is_c13_only_on_multi_element_generator(): def test_pyopenms_deisotope_recovers_mono_from_generated_adducted_isotopes(): - pytest.importorskip("pyopenms") from vimms.Deisotoping import deisotope_with_pyopenms formula = Formula("C10H16N2O2S") @@ -449,7 +470,6 @@ def test_pyopenms_deisotope_recovers_mono_from_generated_adducted_isotopes(): def test_end_to_end_deadduct_then_homegrown_deisotope_recovers_neutral_mono(): - pytest.importorskip("pyopenms") import pyopenms as oms from vimms.Deisotoping import deadduct_with_pyopenms @@ -490,7 +510,6 @@ def test_end_to_end_deadduct_then_homegrown_deisotope_recovers_neutral_mono(): def test_end_to_end_deadduct_then_pyopenms_deisotope_recovers_neutral_mono(): - pytest.importorskip("pyopenms") import pyopenms as oms from vimms.Deisotoping import deadduct_with_pyopenms, deisotope_with_pyopenms @@ -538,7 +557,6 @@ def test_end_to_end_deadduct_then_pyopenms_deisotope_recovers_neutral_mono(): def test_end_to_end_deadduct_negative_then_homegrown_deisotope_recovers_neutral_mono(): - pytest.importorskip("pyopenms") import pyopenms as oms from vimms.Deisotoping import deadduct_with_pyopenms diff --git a/vimms/Chemicals.py b/vimms/Chemicals.py index 943808f..31a583e 100644 --- a/vimms/Chemicals.py +++ b/vimms/Chemicals.py @@ -6,7 +6,6 @@ import copy import itertools import pickle -import warnings from abc import ABCMeta, abstractmethod from collections import deque @@ -32,9 +31,10 @@ ADDUCT_NAMES_NEG, ADDUCT_PRIOR_POS, ADDUCT_PRIOR_NEG, - ADDUCT_TERMS, - NATURAL_ISOTOPES, + ADDUCT_PROFILE_PRESETS, + validate_adduct_name, ) +from vimms.IsotopeDistribution import get_isotope_distribution from vimms.Noise import GaussianPeakNoise from vimms.Roi import make_roi, RoiBuilderParams @@ -65,19 +65,6 @@ def __init__( self.inchikey = inchikey -_UNSUPPORTED_ADDUCT_MESSAGES = { - "2M+H": ( - "Adduct '2M+H' is not supported because multimer isotope envelopes " - "require adduct-specific isotope generation." - ), - "2M+NH4": ( - "Adduct '2M+NH4' is not supported because multimer isotope envelopes " - "require adduct-specific isotope generation." - ), - "M+2K+H": "Adduct 'M+2K+H' is mislabelled; use 'M+2K-H' instead.", -} - - class Isotopes: """ A class to represent an isotope of a chemical @@ -126,113 +113,14 @@ def get_isotopes( def _get_isotope_distribution( self, total_proportion, min_prob=1e-12, max_peaks=None, max_states=4000, mass_precision=8 ): - distribution = [(0.0, 1.0)] - monoisotope_log_prob = 0.0 - for element, count in self.formula.atoms.items(): - if count <= 0: - continue - isotopes = NATURAL_ISOTOPES.get(element) - if not isotopes or len(isotopes) == 1: - continue - monoisotope_log_prob += count * np.log(isotopes[0][1]) - mono_mass = isotopes[0][0] - base_distribution = [(mass - mono_mass, abundance) for mass, abundance in isotopes] - element_distribution = self._power_distribution( - base_distribution, - count, - min_prob=min_prob, - max_states=max_states, - mass_precision=mass_precision, - ) - distribution = self._convolve_distributions( - distribution, - element_distribution, - min_prob=min_prob, - max_states=max_states, - mass_precision=mass_precision, - ) - - # Ensure the monoisotopic (zero-shift) peak is always present, even if - # truncation/pruning would otherwise drop it. - monoisotope_shift = round(0.0, mass_precision) - monoisotope_prob = float(np.exp(monoisotope_log_prob)) - distribution_dict = dict(distribution) - distribution_dict[monoisotope_shift] = monoisotope_prob - distribution = list(distribution_dict.items()) - - distribution = [ - (shift, prob) - for shift, prob in distribution - if shift == monoisotope_shift or prob >= min_prob - ] - distribution.sort(key=lambda x: x[0]) - - selected = [] - cumulative = 0.0 - for mass_shift, prob in distribution: - selected.append((mass_shift, prob)) - cumulative += prob - reached_total = cumulative >= total_proportion - reached_cap = max_peaks is not None and len(selected) >= max_peaks - if reached_total or reached_cap: - break - - if max_peaks is not None and cumulative < total_proportion: - warnings.warn( - "max_peaks prevented isotope generation from reaching total_proportion; " - "the truncated isotope envelope will be renormalised.", - RuntimeWarning, - stacklevel=2, - ) - - total = sum(prob for _, prob in selected) - if total == 0: - return [(0.0, 1.0)] - return [(shift, prob / total) for shift, prob in selected] - - def _power_distribution(self, base_distribution, count, min_prob, max_states, mass_precision): - if count == 1: - return base_distribution - result = [(0.0, 1.0)] - power = base_distribution - remaining = count - while remaining > 0: - if remaining % 2 == 1: - result = self._convolve_distributions( - result, - power, - min_prob=min_prob, - max_states=max_states, - mass_precision=mass_precision, - ) - remaining //= 2 - if remaining: - power = self._convolve_distributions( - power, - power, - min_prob=min_prob, - max_states=max_states, - mass_precision=mass_precision, - ) - return result - - def _convolve_distributions(self, left, right, min_prob, max_states, mass_precision): - new_distribution = {} - for left_shift, left_prob in left: - for right_shift, right_prob in right: - prob = left_prob * right_prob - if prob < min_prob: - continue - shift = left_shift + right_shift - key = round(shift, mass_precision) - new_distribution[key] = new_distribution.get(key, 0.0) + prob - if not new_distribution: - return [] - distribution = list(new_distribution.items()) - if len(distribution) > max_states: - distribution.sort(key=lambda x: x[1], reverse=True) - distribution = distribution[:max_states] - return distribution + return get_isotope_distribution( + self.formula, + total_proportion, + min_prob=min_prob, + max_peaks=max_peaks, + max_states=max_states, + mass_precision=mass_precision, + ) class Adducts: @@ -262,8 +150,6 @@ def __init__( adduct_prior_dict = adduct_prior_dict(formula) if adduct_prior_dict is None and adduct_profile is not None: - from vimms.Common import ADDUCT_PROFILE_PRESETS - if isinstance(adduct_profile, str): adduct_prior_dict = ADDUCT_PROFILE_PRESETS.get(adduct_profile) if adduct_prior_dict is None: @@ -334,12 +220,7 @@ def _get_adduct_names(self): def _validate_adduct_names(self): for ionisation_mode, names in self.adduct_names.items(): for name in names: - if name in _UNSUPPORTED_ADDUCT_MESSAGES: - raise ValueError(_UNSUPPORTED_ADDUCT_MESSAGES[name]) - if name not in ADDUCT_TERMS: - raise ValueError( - f"Unknown adduct '{name}' for ionisation mode '{ionisation_mode}'" - ) + validate_adduct_name(name, ionisation_mode) class BaseChemical(metaclass=ABCMeta): diff --git a/vimms/Common.py b/vimms/Common.py index 7074766..7673646 100644 --- a/vimms/Common.py +++ b/vimms/Common.py @@ -78,65 +78,76 @@ CHROM_TYPE_CONSTANT = "constant" CHROM_TYPE_FUNCTIONAL = "functional" -ADDUCT_NAMES_POS = [ - "M+H", - "M+NH4", - "M+Na", - "M+K", - "M+ACN+Na", - "M+2Na-H", - "M+2K-H", - "[M+ACN]+H", - "[M+CH3OH]+H", - "[M+DMSO]+H", - "[M+2ACN]+H", -] -ADDUCT_NAMES_NEG = ["M-H", "M+Cl", "M+FA-H", "M+Ac-H"] +ADDUCT_DEFINITIONS = { + "M+H": {"polarity": POSITIVE, "term": (1, PROTON_MASS), "prior": 1.0}, + "M+NH4": {"polarity": POSITIVE, "term": (1, 18.033823), "prior": 0.3}, + "M+Na": {"polarity": POSITIVE, "term": (1, 22.989218), "prior": 0.25}, + "M+K": {"polarity": POSITIVE, "term": (1, 38.963158), "prior": 0.15}, + "M+ACN+Na": {"polarity": POSITIVE, "term": (1, 64.015765), "prior": 0.05}, + "M+2Na-H": {"polarity": POSITIVE, "term": (1, 44.971160), "prior": 0.03}, + "M+2K-H": {"polarity": POSITIVE, "term": (1, 76.919040), "prior": 0.02}, + "[M+ACN]+H": {"polarity": POSITIVE, "term": (1, 42.033823), "prior": 0.08}, + "[M+CH3OH]+H": {"polarity": POSITIVE, "term": (1, 33.033489), "prior": 0.05}, + "[M+DMSO]+H": {"polarity": POSITIVE, "term": (1, 79.02122), "prior": 0.03}, + "[M+2ACN]+H": {"polarity": POSITIVE, "term": (1, 83.060370), "prior": 0.02}, + "[M+NH3]+H": {"polarity": POSITIVE, "term": (1, 18.033823), "prior": None}, + "M-H": {"polarity": NEGATIVE, "term": (1, -PROTON_MASS), "prior": 1.0}, + # [M+Cl]- adds Cl- (atomic mass + electron mass), not neutral Cl. + "M+Cl": {"polarity": NEGATIVE, "term": (1, 34.96885268 + ELECTRON_MASS), "prior": 0.2}, + "M+FA-H": {"polarity": NEGATIVE, "term": (1, 44.998201), "prior": 0.35}, + "M+Ac-H": {"polarity": NEGATIVE, "term": (1, 59.013851), "prior": 0.15}, +} + +UNSUPPORTED_ADDUCT_MESSAGES = { + "2M+H": ( + "Adduct '2M+H' is not supported because multimer isotope envelopes " + "require adduct-specific isotope generation." + ), + "2M+NH4": ( + "Adduct '2M+NH4' is not supported because multimer isotope envelopes " + "require adduct-specific isotope generation." + ), + "M+2K+H": "Adduct 'M+2K+H' is mislabelled; use 'M+2K-H' instead.", +} + + +def _adduct_prior_by_polarity(polarity): + return { + name: definition["prior"] + for name, definition in ADDUCT_DEFINITIONS.items() + if definition["polarity"] == polarity and definition["prior"] is not None + } + +ADDUCT_PRIOR_POS = _adduct_prior_by_polarity(POSITIVE) +ADDUCT_PRIOR_NEG = _adduct_prior_by_polarity(NEGATIVE) +ADDUCT_NAMES_POS = list(ADDUCT_PRIOR_POS.keys()) +ADDUCT_NAMES_NEG = list(ADDUCT_PRIOR_NEG.keys()) ADDUCT_TERMS = { - "M+H": (1, PROTON_MASS), - "M+NH4": (1, 18.033823), - "[M+ACN]+H": (1, 42.033823), - "[M+CH3OH]+H": (1, 33.033489), - "[M+NH3]+H": (1, 18.033823), - "M+Na": (1, 22.989218), - "M+K": (1, 38.963158), - "M+2Na-H": (1, 44.971160), - "M+ACN+Na": (1, 64.015765), - "M+2K-H": (1, 76.919040), - "[M+DMSO]+H": (1, 79.02122), - "[M+2ACN]+H": (1, 83.060370), - "M-H": (1, -PROTON_MASS), - # [M+Cl]- adds Cl- (atomic mass + electron mass), not neutral Cl. - "M+Cl": (1, 34.96885268 + ELECTRON_MASS), - "M+FA-H": (1, 44.998201), - "M+Ac-H": (1, 59.013851), + name: definition["term"] for name, definition in ADDUCT_DEFINITIONS.items() } + +def validate_adduct_name(name, ionisation_mode=None): + if name in UNSUPPORTED_ADDUCT_MESSAGES: + raise ValueError(UNSUPPORTED_ADDUCT_MESSAGES[name]) + if name not in ADDUCT_DEFINITIONS: + message = f"Unknown adduct '{name}'" + if ionisation_mode is not None: + message += f" for ionisation mode '{ionisation_mode}'" + raise ValueError(message) + + expected_mode = ADDUCT_DEFINITIONS[name]["polarity"] + if ionisation_mode is not None and ionisation_mode != expected_mode: + raise ValueError( + f"Adduct '{name}' is defined for ionisation mode '{expected_mode}', " + f"not '{ionisation_mode}'" + ) + # example prior dictionary to be passed when creating an # adducts object to only get M+H adducts out ADDUCT_DICT_POS_MH = {POSITIVE: {"M+H": 1.0}} -ADDUCT_PRIOR_POS = { - "M+H": 1.0, - "M+NH4": 0.3, - "M+Na": 0.25, - "M+K": 0.15, - "M+ACN+Na": 0.05, - "M+2Na-H": 0.03, - "M+2K-H": 0.02, - "[M+ACN]+H": 0.08, - "[M+CH3OH]+H": 0.05, - "[M+DMSO]+H": 0.03, - "[M+2ACN]+H": 0.02, -} -ADDUCT_PRIOR_NEG = { - "M-H": 1.0, - "M+Cl": 0.2, - "M+FA-H": 0.35, - "M+Ac-H": 0.15, -} - ADDUCT_PROFILE_PRESETS = { "default": {POSITIVE: ADDUCT_PRIOR_POS, NEGATIVE: ADDUCT_PRIOR_NEG}, "esi_positive": {POSITIVE: ADDUCT_PRIOR_POS}, diff --git a/vimms/Deisotoping.py b/vimms/Deisotoping.py index d7a5e81..4596df6 100644 --- a/vimms/Deisotoping.py +++ b/vimms/Deisotoping.py @@ -3,6 +3,8 @@ from typing import Iterable, List, Tuple import numpy as np +import pyopenms as oms +from ms_deisotope.deconvolution import deconvolute_peaks from vimms.Common import C13_MZ_DIFF, NATURAL_ISOTOPES, ELECTRON_MASS @@ -201,7 +203,7 @@ def deisotope_with_ms_deisotope( ms1_tolerance: float = 10.0, ): """ - Deisotope peaks using the optional ms_deisotope package. + Deisotope peaks using ms_deisotope. Args: peaks: iterable of (mz, intensity) pairs. @@ -212,8 +214,6 @@ def deisotope_with_ms_deisotope( Returns: The ms_deisotope deconvolution result object. """ - from ms_deisotope.deconvolution import deconvolute_peaks - peaks_array = np.array(list(peaks), dtype=float) if peaks_array.size == 0: return [] @@ -266,8 +266,6 @@ def deisotope_with_pyopenms( Returns: Tuple of (spectrum, peak_mzs, peak_intensities) after deisotoping. """ - import pyopenms as oms - peaks_array = np.array(list(peaks), dtype=float) if peaks_array.size == 0: return None, np.array([]), np.array([]) @@ -322,116 +320,98 @@ def deadduct_with_pyopenms( Returns: De-adducted pyopenms FeatureMap with neutral masses (charge set to 0). """ - import pyopenms as oms - - # OpenMS reports adduct masses using *atomic* masses (neutral species), - # while MS m/z shifts correspond to charged species (atomic +/- e- mass). - # Correcting by the electron mass ensures we recover a chemically-sensible - # neutral mass from observed m/z values. - electron_mass = ELECTRON_MASS - - def infer_negative_mode() -> bool: - charges = [] - for feature in feature_map: - try: - charges.append(int(feature.getCharge())) - except Exception: - continue - nonzero = [c for c in charges if c != 0] - if nonzero: - has_pos = any(c > 0 for c in nonzero) - has_neg = any(c < 0 for c in nonzero) - if has_pos and has_neg: - raise ValueError("Mixed positive/negative charges in FeatureMap are not supported.") - return has_neg - - if not adducts: - return False - - inferred = set() - for value in adducts: - value = value.strip() - if not value: - continue - if value.startswith("[") and value.endswith("]-"): - inferred.add("-") - elif value.startswith("[") and value.endswith("]+"): - inferred.add("+") - else: - parts = value.split(":") - if len(parts) == 3: - inferred.add(parts[1]) - if inferred == {"-"}: - return True - if inferred == {"+"}: - return False - if inferred: - raise ValueError( - f"Unable to infer polarity from adducts: mixed charge signs {sorted(inferred)}" - ) - return False - - def to_openms_potential_adducts(values: List[str]) -> List[bytes]: - parsed = [] - for value in values: - value = value.strip() - if not value: - continue + reference_mz = _reference_mz(feature_map) + negative_mode = _infer_negative_mode(feature_map, adducts) + feature_map_in = _feature_map_with_charge(feature_map, negative_mode) - if value.startswith("["): - if value.startswith("[M+") and value.endswith("]+"): - name = value[len("[M+") : -len("]+")] - parsed.append((name, "+", 1.0)) - continue - if value == "[M-H]-": - parsed.append(("H-1", "-", 1.0)) - continue - if value == "[M+Cl]-": - parsed.append(("Cl", "-", 1.0)) - continue - raise ValueError(f"Unsupported adduct string '{value}' for pyopenms") + annotated = oms.FeatureMap() + deconvolver = _configured_metabolite_deconvolver( + adducts=adducts, + max_charge=max_charge, + negative_mode=negative_mode, + ppm_tolerance=ppm_tolerance, + reference_mz=reference_mz, + ) + deconvolver.compute( + feature_map_in, + annotated, + oms.ConsensusMap(), + oms.ConsensusMap(), + ) - parts = value.split(":") - if len(parts) != 3: - raise ValueError(f"Unsupported OpenMS potential_adduct string '{value}'") - name, charge, prob = parts - parsed.append((name, charge, float(prob))) - - # OpenMS requires charged adduct probabilities to sum to 1.0, so normalise by charge sign. - for charge_sign in ("+", "-"): - indices = [i for i, (_, charge, _) in enumerate(parsed) if charge == charge_sign] - if not indices: - continue - total = sum(parsed[i][2] for i in indices) - if total <= 0: - raise ValueError( - f"Invalid OpenMS potential_adduct probabilities for charge '{charge_sign}'" - ) - for i in indices: - name, charge, prob = parsed[i] - parsed[i] = (name, charge, prob / total) + return _neutral_feature_map(annotated, keep_only_backbone) - potential = [] - for name, charge, prob in parsed: - potential.append(f"{name}:{charge}:{prob:.6g}".encode()) - return potential +def _reference_mz(feature_map) -> float: mzs = [] for feature in feature_map: try: mzs.append(float(feature.getMZ())) except Exception: continue - reference_mz = float(np.median(mzs)) if mzs else 100.0 - negative_mode = infer_negative_mode() + return float(np.median(mzs)) if mzs else 100.0 + + +def _infer_negative_mode(feature_map, adducts: List[str] | None) -> bool: + charges = [] + for feature in feature_map: + try: + charges.append(int(feature.getCharge())) + except Exception: + continue + nonzero = [charge for charge in charges if charge != 0] + if nonzero: + has_pos = any(charge > 0 for charge in nonzero) + has_neg = any(charge < 0 for charge in nonzero) + if has_pos and has_neg: + raise ValueError("Mixed positive/negative charges in FeatureMap are not supported.") + return has_neg + + inferred = _charge_signs_from_adducts(adducts) + if inferred == {"-"}: + return True + if inferred == {"+"} or not inferred: + return False + raise ValueError(f"Unable to infer polarity from adducts: mixed charge signs {sorted(inferred)}") + + +def _charge_signs_from_adducts(adducts: List[str] | None) -> set[str]: + if not adducts: + return set() + + inferred = set() + for value in adducts: + value = value.strip() + if not value: + continue + if value.startswith("[") and value.endswith("]-"): + inferred.add("-") + elif value.startswith("[") and value.endswith("]+"): + inferred.add("+") + else: + parts = value.split(":") + if len(parts) == 3: + inferred.add(parts[1]) + return inferred + +def _feature_map_with_charge(feature_map, negative_mode: bool): feature_map_in = oms.FeatureMap() for feature in feature_map: new_feature = oms.Feature(feature) if new_feature.getCharge() == 0: new_feature.setCharge(-1 if negative_mode else 1) feature_map_in.push_back(new_feature) + return feature_map_in + +def _configured_metabolite_deconvolver( + adducts: List[str] | None, + max_charge: int, + negative_mode: bool, + ppm_tolerance: float, + reference_mz: float, +): deconvolver = oms.MetaboliteFeatureDeconvolution() params = deconvolver.getParameters() if params.exists(b"unit"): @@ -446,50 +426,94 @@ def to_openms_potential_adducts(values: List[str]) -> List[bytes]: params.setValue(b"charge_max", int(max_charge)) params.setValue(b"mass_max_diff", float(max(ppm_tolerance * reference_mz / 1e6, 0.002))) if adducts: - params.setValue(b"potential_adducts", to_openms_potential_adducts(adducts)) + params.setValue(b"potential_adducts", _to_openms_potential_adducts(adducts)) deconvolver.setParameters(params) + return deconvolver + + +def _to_openms_potential_adducts(values: List[str]) -> List[bytes]: + parsed = [_parse_openms_adduct(value) for value in values if value.strip()] + parsed = _normalise_adduct_probabilities(parsed) + return [f"{name}:{charge}:{prob:.6g}".encode() for name, charge, prob in parsed] + + +def _parse_openms_adduct(value: str) -> tuple[str, str, float]: + value = value.strip() + if value.startswith("["): + if value.startswith("[M+") and value.endswith("]+"): + name = value[len("[M+") : -len("]+")] + return name, "+", 1.0 + if value == "[M-H]-": + return "H-1", "-", 1.0 + if value == "[M+Cl]-": + return "Cl", "-", 1.0 + raise ValueError(f"Unsupported adduct string '{value}' for pyopenms") + + parts = value.split(":") + if len(parts) != 3: + raise ValueError(f"Unsupported OpenMS potential_adduct string '{value}'") + name, charge, prob = parts + return name, charge, float(prob) + + +def _normalise_adduct_probabilities(parsed: List[tuple[str, str, float]]): + normalised = list(parsed) + for charge_sign in ("+", "-"): + indices = [i for i, (_, charge, _) in enumerate(normalised) if charge == charge_sign] + if not indices: + continue + total = sum(normalised[i][2] for i in indices) + if total <= 0: + raise ValueError( + f"Invalid OpenMS potential_adduct probabilities for charge '{charge_sign}'" + ) + for i in indices: + name, charge, prob = normalised[i] + normalised[i] = (name, charge, prob / total) + return normalised - annotated = oms.FeatureMap() - cons_map = oms.ConsensusMap() - cons_map_p = oms.ConsensusMap() - deconvolver.compute(feature_map_in, annotated, cons_map, cons_map_p) +def _neutral_feature_map(annotated, keep_only_backbone: bool): neutral = oms.FeatureMap() for feature in annotated: - if keep_only_backbone: - try: - if int(feature.getMetaValue(b"is_backbone")) != 1: - continue - except Exception: - # If OpenMS didn't annotate backbone status, keep the feature. - pass - - charge = int(feature.getCharge()) - if charge == 0: - charge = 1 - - has_adduct_mass = False - try: - adduct_mass = float(feature.getMetaValue(b"dc_charge_adduct_mass")) - has_adduct_mass = True - except Exception: - adduct_mass = 0.0 - - neutral_mass = float(feature.getMZ()) * abs(charge) - if has_adduct_mass: - neutral_mass = neutral_mass - adduct_mass + electron_mass * charge + if keep_only_backbone and not _is_backbone_feature(feature): + continue neutral_feature = oms.Feature() - neutral_feature.setMZ(neutral_mass) + neutral_feature.setMZ(_neutral_mass(feature)) neutral_feature.setRT(float(feature.getRT())) neutral_feature.setIntensity(float(feature.getIntensity())) neutral_feature.setCharge(0) + _copy_group_meta(feature, neutral_feature) + neutral.push_back(neutral_feature) + return neutral - try: - neutral_feature.setMetaValue(b"Group", feature.getMetaValue(b"Group")) - except Exception: - pass - neutral.push_back(neutral_feature) +def _is_backbone_feature(feature) -> bool: + try: + return int(feature.getMetaValue(b"is_backbone")) == 1 + except Exception: + return True - return neutral + +def _neutral_mass(feature) -> float: + charge = int(feature.getCharge()) + if charge == 0: + charge = 1 + + neutral_mass = float(feature.getMZ()) * abs(charge) + try: + adduct_mass = float(feature.getMetaValue(b"dc_charge_adduct_mass")) + except Exception: + return neutral_mass + + # OpenMS reports adduct masses using atomic masses (neutral species), while + # MS m/z shifts correspond to charged species (atomic +/- electron mass). + return neutral_mass - adduct_mass + ELECTRON_MASS * charge + + +def _copy_group_meta(source, target): + try: + target.setMetaValue(b"Group", source.getMetaValue(b"Group")) + except Exception: + pass diff --git a/vimms/IsotopeDistribution.py b/vimms/IsotopeDistribution.py new file mode 100644 index 0000000..02ff3b0 --- /dev/null +++ b/vimms/IsotopeDistribution.py @@ -0,0 +1,133 @@ +import warnings + +import numpy as np + +from vimms.Common import NATURAL_ISOTOPES + + +def get_isotope_distribution( + formula, + total_proportion, + min_prob=1e-12, + max_peaks=None, + max_states=4000, + mass_precision=8, +): + distribution = [(0.0, 1.0)] + monoisotope_log_prob = 0.0 + for element, count in formula.atoms.items(): + if count <= 0: + continue + isotopes = NATURAL_ISOTOPES.get(element) + if not isotopes or len(isotopes) == 1: + continue + monoisotope_log_prob += count * np.log(isotopes[0][1]) + mono_mass = isotopes[0][0] + base_distribution = [(mass - mono_mass, abundance) for mass, abundance in isotopes] + element_distribution = _power_distribution( + base_distribution, + count, + min_prob=min_prob, + max_states=max_states, + mass_precision=mass_precision, + ) + distribution = _convolve_distributions( + distribution, + element_distribution, + min_prob=min_prob, + max_states=max_states, + mass_precision=mass_precision, + ) + + distribution = _preserve_monoisotope( + distribution, monoisotope_log_prob, min_prob, mass_precision + ) + selected, cumulative = _select_distribution_peaks( + distribution, total_proportion, max_peaks + ) + if max_peaks is not None and cumulative < total_proportion: + warnings.warn( + "max_peaks prevented isotope generation from reaching total_proportion; " + "the truncated isotope envelope will be renormalised.", + RuntimeWarning, + stacklevel=2, + ) + + total = sum(prob for _, prob in selected) + if total == 0: + return [(0.0, 1.0)] + return [(shift, prob / total) for shift, prob in selected] + + +def _preserve_monoisotope(distribution, monoisotope_log_prob, min_prob, mass_precision): + monoisotope_shift = round(0.0, mass_precision) + monoisotope_prob = float(np.exp(monoisotope_log_prob)) + distribution_dict = dict(distribution) + distribution_dict[monoisotope_shift] = monoisotope_prob + distribution = list(distribution_dict.items()) + distribution = [ + (shift, prob) + for shift, prob in distribution + if shift == monoisotope_shift or prob >= min_prob + ] + distribution.sort(key=lambda x: x[0]) + return distribution + + +def _select_distribution_peaks(distribution, total_proportion, max_peaks): + selected = [] + cumulative = 0.0 + for mass_shift, prob in distribution: + selected.append((mass_shift, prob)) + cumulative += prob + reached_total = cumulative >= total_proportion + reached_cap = max_peaks is not None and len(selected) >= max_peaks + if reached_total or reached_cap: + break + return selected, cumulative + + +def _power_distribution(base_distribution, count, min_prob, max_states, mass_precision): + if count == 1: + return base_distribution + result = [(0.0, 1.0)] + power = base_distribution + remaining = count + while remaining > 0: + if remaining % 2 == 1: + result = _convolve_distributions( + result, + power, + min_prob=min_prob, + max_states=max_states, + mass_precision=mass_precision, + ) + remaining //= 2 + if remaining: + power = _convolve_distributions( + power, + power, + min_prob=min_prob, + max_states=max_states, + mass_precision=mass_precision, + ) + return result + + +def _convolve_distributions(left, right, min_prob, max_states, mass_precision): + new_distribution = {} + for left_shift, left_prob in left: + for right_shift, right_prob in right: + prob = left_prob * right_prob + if prob < min_prob: + continue + shift = left_shift + right_shift + key = round(shift, mass_precision) + new_distribution[key] = new_distribution.get(key, 0.0) + prob + if not new_distribution: + return [] + distribution = list(new_distribution.items()) + if len(distribution) > max_states: + distribution.sort(key=lambda x: x[1], reverse=True) + distribution = distribution[:max_states] + return distribution