From 82ce3c8319bbaa50e7ba4580bd0170d387bab5b6 Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Mon, 8 Aug 2022 20:38:11 +0200 Subject: [PATCH] lDDT: limit max number of possible chain mappings Not that it is helpful to enumerate more chain mappings than atoms in the universe, but the limit is mainly in place to avoid a memory explosion. We're using a itertools.product there. From the docu: Before product() runs, it completely consumes the input iterables, keeping pools of values in memory to generate the products. Accordingly, it is only useful with finite inputs. --- modules/mol/alg/pymod/chain_mapping.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/modules/mol/alg/pymod/chain_mapping.py b/modules/mol/alg/pymod/chain_mapping.py index 43d7ac143..11cae5595 100644 --- a/modules/mol/alg/pymod/chain_mapping.py +++ b/modules/mol/alg/pymod/chain_mapping.py @@ -93,6 +93,12 @@ class ChainMapper: :param min_nuc_length: Minimal number of residues for a nucleotide chain to be considered in target and in models. :type min_nuc_length: :class:`int` + :param n_max_naive: Max possible chain mappings that are enumerated in + :func:`~GetNaivelDDTMapping` / + :func:`~GetDecomposerlDDTMapping`. A + :class:`RuntimeError` is raised in case of bigger + complexity. + :type n_max_naive: :class:`int` """ def __init__(self, target, resnum_alignments=False, pep_seqid_thr = 95., pep_gap_thr = 0.1, @@ -100,7 +106,8 @@ class ChainMapper: pep_subst_mat = seq.alg.BLOSUM100, pep_gap_open = -5, pep_gap_ext = -2, nuc_subst_mat = seq.alg.NUC44, nuc_gap_open = -4, nuc_gap_ext = -4, - min_pep_length = 10, min_nuc_length = 4): + min_pep_length = 10, min_nuc_length = 4, + n_max_naive = 1e8): # target structure preprocessing self._target, self._polypep_seqs, self._polynuc_seqs = \ @@ -122,6 +129,7 @@ class ChainMapper: self.nuc_gap_thr = nuc_gap_thr self.min_pep_length = min_pep_length self.min_nuc_length = min_nuc_length + self.n_max_naive = n_max_naive # lazy computed attributes self._chem_groups = None @@ -325,7 +333,8 @@ class ChainMapper: best_mapping = None best_lddt = -1.0 - for mapping in _ChainMappings(self.chem_groups, chem_mapping): + for mapping in _ChainMappings(self.chem_groups, chem_mapping, + self.n_max_naive): # chain_mapping and alns as input for lDDT computation lddt_chain_mapping = dict() lddt_alns = dict() @@ -389,7 +398,8 @@ class ChainMapper: best_mapping = None best_lddt = -1.0 - for mapping in _ChainMappings(self.chem_groups, chem_mapping): + for mapping in _ChainMappings(self.chem_groups, chem_mapping, + self.n_max_naive): lDDT = lddt_scorer.lDDT(self.chem_groups, mapping) if lDDT > best_lddt: @@ -1830,7 +1840,7 @@ def _ConcatIterators(iterators): yield list(item) -def _ChainMappings(ref_chains, mdl_chains): +def _ChainMappings(ref_chains, mdl_chains, n_max=None): """Returns all possible ways to map *mdl_chains* onto fixed *ref_chains* :param ref_chains: List of list of chemically equivalent chains in reference @@ -1838,6 +1848,9 @@ def _ChainMappings(ref_chains, mdl_chains): :param mdl_chains: Equally long list of list of chemically equivalent chains in model that map on those ref chains. :type mdl_chains: :class:`list` of :class:`list` + :param n_max: Aborts and raises :class:`RuntimeError` if max number of + mappings is above this threshold. + :type n_max: :class:`int` :returns: Iterator over all possible mappings of *mdl_chains* onto fixed *ref_chains*. Potentially contains None as padding when number of model chains for a certain mapping is smaller than the according @@ -1858,6 +1871,11 @@ def _ChainMappings(ref_chains, mdl_chains): [[None, 'y', 'x'], ['j', 'i']]] """ assert(len(ref_chains) == len(mdl_chains)) + + if n_max is not None: + if not _NMappingsWithin(ref_chains, mdl_chains, n_max): + raise RuntimeError(f"Too many mappings. Max allowed: {n_max}") + # one iterator per mapping representing all mdl combinations relative to # reference iterators = list() -- GitLab