diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures index 2e4c324792a614591a2aa6ae73e882b5cd5c2750..75972640de0c7dd4869470593ab00628962beb97 100644 --- a/actions/ost-compare-structures +++ b/actions/ost-compare-structures @@ -457,11 +457,15 @@ def _ParseArgs(): "--n-max-naive", dest="n_max_naive", required=False, - default=12, + default=40320, type=int, - help=("If number of chains in model and reference are below or equal " - "that number, the chain mapping will naively enumerate all " - "possible mappings. A heuristic is used otherwise.")) + help=("Parameter for chain mapping. If the number of possible " + "mappings is <= *n_max_naive*, the full " + "mapping solution space is enumerated to find the " + "the mapping with optimal QS-score. A heuristic is used " + "otherwise. The default of 40320 corresponds to an octamer " + "(8! = 40320). A structure with stoichiometry A6B2 would be " + "6!*2! = 1440 etc.")) parser.add_argument( "--dump-aligned-residues", diff --git a/modules/doc/actions.rst b/modules/doc/actions.rst index bd5785bfd23431642f627bed679680e21fa74402..eee1181a1275a8615c6893dded821d314114bc68 100644 --- a/modules/doc/actions.rst +++ b/modules/doc/actions.rst @@ -333,12 +333,24 @@ Details on the usage (output of ``ost compare-structures --help``): --lddt-no-stereochecks Disable stereochecks for lDDT computation --n-max-naive N_MAX_NAIVE - If number of chains in model and reference are below - or equal that number, the chain mapping will naively - enumerate all possible mappings. A heuristic is used - otherwise. - - + Parameter for chain mapping. If the number of possible + mappings is <= *n_max_naive*, the full mapping + solution space is enumerated to find the the mapping + with optimal QS-score. A heuristic is used otherwise. + The default of 40320 corresponds to an octamer (8! = + 40320). A structure with stoichiometry A6B2 would be + 6!*2! = 1440 etc. + --dump-aligned-residues + Dump additional info on aligned model and reference + residues. + --dump-pepnuc-alns Dump alignments of mapped chains but with sequences + that did not undergo Molck preprocessing in the + scorer. Sequences are extracted from model/target + after undergoing selection for peptide and nucleotide + residues. + --dump-pepnuc-aligned-residues + Dump additional info on model and reference residues + that occur in pepnuc alignments. .. _ost compare ligand structures: diff --git a/modules/mol/alg/pymod/chain_mapping.py b/modules/mol/alg/pymod/chain_mapping.py index 7b8d2f645f9d23b9bd0a15ab8c60e38e45136512..417cfa58d59b55c1e67be4e3d3b40dd1251f53de 100644 --- a/modules/mol/alg/pymod/chain_mapping.py +++ b/modules/mol/alg/pymod/chain_mapping.py @@ -1228,24 +1228,24 @@ class ChainMapper: return MappingResult(self.target, mdl, self.chem_groups, chem_mapping, final_mapping, alns) - def GetMapping(self, model, n_max_naive = 12): + def GetMapping(self, model, n_max_naive = 40320): """ Convenience function to get mapping with currently preferred method - If number of chains in model and target are <= *n_max_naive*, a naive - QS-score mapping is performed. For anything else, a QS-score mapping - with the greedy_full strategy is performed - (greedy_prune_contact_map = True). + If number of possible chain mappings is <= *n_max_naive*, a naive + QS-score mapping is performed and optimal QS-score is guaranteed. + For anything else, a QS-score mapping with the greedy_full strategy is + performed (greedy_prune_contact_map = True). The default for + *n_max_naive* of 40320 corresponds to an octamer (8!=40320). A + structure with stoichiometry A6B2 would be 6!*2!=1440 etc. """ - n_trg_chains = len(self.target.chains) - res = self.GetChemMapping(model) - n_mdl_chains = len(res[2].chains) - if n_trg_chains <= n_max_naive and n_mdl_chains <= n_max_naive: + chem_mapping_res = self.GetChemMapping(model) + if _NMappingsWithin(self.chem_groups, chem_mapping_res[0], n_max_naive): return self.GetQSScoreMapping(model, strategy="naive", - chem_mapping_result=res) + chem_mapping_result=chem_mapping_res) else: return self.GetQSScoreMapping(model, strategy="greedy_full", greedy_prune_contact_map=True, - chem_mapping_result=res) + chem_mapping_result=chem_mapping_res) def GetRepr(self, substructure, model, topn=1, inclusion_radius=15.0, thresholds=[0.5, 1.0, 2.0, 4.0], bb_only=False, diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py index af70aca279de11ec5a615d6bfa92a794a17bf665..530a881bea6121795047225984484367a784d858 100644 --- a/modules/mol/alg/pymod/scoring.py +++ b/modules/mol/alg/pymod/scoring.py @@ -129,10 +129,13 @@ class Scorer: :param lddt_no_stereochecks: Whether to compute lDDT without stereochemistry checks :type lddt_no_stereochecks: :class:`bool` - :param n_max_naive: Parameter for chain mapping. If *model* and *target* - have less or equal that number of chains, the full + :param n_max_naive: Parameter for chain mapping. If the number of possible + mappings is <= *n_max_naive*, the full mapping solution space is enumerated to find the - the optimum. A heuristic is used otherwise. + the optimum. A heuristic is used otherwise. The default + of 40320 corresponds to an octamer (8! = 40320). + A structure with stoichiometry A6B2 would be + 6!*2! = 1440 etc. :type n_max_naive: :class:`int` :param oum: Override USalign Mapping. Inject mapping of :class:`Scorer` object into USalign to compute TM-score. Experimental feature @@ -144,7 +147,7 @@ class Scorer: def __init__(self, model, target, resnum_alignments=False, molck_settings = None, cad_score_exec = None, custom_mapping=None, usalign_exec = None, - lddt_no_stereochecks=False, n_max_naive=12, + lddt_no_stereochecks=False, n_max_naive=40320, oum=False): self._target_orig = target diff --git a/modules/mol/alg/tests/test_chain_mapping.py b/modules/mol/alg/tests/test_chain_mapping.py index 3d4ccbde250a3f9e2f4b39e7b4b9a5010a18645f..e26ad858d36432ff8b170701b719f6760e43cc1e 100644 --- a/modules/mol/alg/tests/test_chain_mapping.py +++ b/modules/mol/alg/tests/test_chain_mapping.py @@ -289,17 +289,20 @@ class TestChainMapper(unittest.TestCase): greedy_rigid_res = mapper.GetRigidMapping(mdl, strategy="greedy_iterative_rmsd") self.assertEqual(greedy_rigid_res.mapping, [['X', 'Y'],[None],['Z']]) + # the default chain mapping + default_res = mapper.GetMapping(mdl) + # test flat mapping functionality of MappingResult - flat_map = greedy_rigid_res.GetFlatMapping() + flat_map = default_res.GetFlatMapping() self.assertEqual(len(flat_map), 3) - self.assertEqual(flat_map[greedy_rigid_res.chem_groups[0][0]], 'X') - self.assertEqual(flat_map[greedy_rigid_res.chem_groups[0][1]], 'Y') - self.assertEqual(flat_map[greedy_rigid_res.chem_groups[2][0]], 'Z') - flat_map = greedy_rigid_res.GetFlatMapping(mdl_as_key=True) + self.assertEqual(flat_map[default_res.chem_groups[0][0]], 'X') + self.assertEqual(flat_map[default_res.chem_groups[0][1]], 'Y') + self.assertEqual(flat_map[default_res.chem_groups[2][0]], 'Z') + flat_map = default_res.GetFlatMapping(mdl_as_key=True) self.assertEqual(len(flat_map), 3) - self.assertEqual(greedy_rigid_res.chem_groups[0][0], flat_map['X']) - self.assertEqual(greedy_rigid_res.chem_groups[0][1], flat_map['Y']) - self.assertEqual(greedy_rigid_res.chem_groups[2][0], flat_map['Z']) + self.assertEqual(default_res.chem_groups[0][0], flat_map['X']) + self.assertEqual(default_res.chem_groups[0][1], flat_map['Y']) + self.assertEqual(default_res.chem_groups[2][0], flat_map['Z']) # test Align function of ChainMapper _, mdl_polypep_seqs, mdl_polynuc_seqs = mapper.ProcessStructure(mdl)