diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures index cfaabbd373dd53b9b53c6282ae21148a59b46c25..9606e32f4b6fd1745befc027b936058482d4355d 100644 --- a/actions/ost-compare-structures +++ b/actions/ost-compare-structures @@ -57,6 +57,8 @@ results: * "cad_exec" * "usalign_exec" * "lddt_no_stereochecks" + * "min_pep_length" + * "min_nuc_length" The pairwise sequence alignments are computed with Needleman-Wunsch using BLOSUM62 (NUC44 for nucleotides). Many benchmarking scenarios preprocess the @@ -492,6 +494,36 @@ def _ParseArgs(): action="store_true", help=("Dump additional info on model and reference residues that occur " "in pepnuc alignments.")) + + parser.add_argument( + "--min-pep-length", + dest="min_pep_length", + default = 10, + type=int, + help=("Relevant parameter if short peptides are involved in scoring." + "Minimum peptide length for a chain in the target structure to " + "be considered in chain mapping. The chain mapping algorithm " + "first performs an all vs. all pairwise sequence alignment to " + "identify \"equal\" chains within the target structure. We go " + "for simple sequence identity there. Short sequences can be " + "problematic as they may produce high sequence identity " + "alignments by pure chance.") + ) + + parser.add_argument( + "--min-nuc-length", + dest="min_nuc_length", + default = 4, + type=int, + help=("Relevant parameter if short nucleotides are involved in scoring." + "Minimum nucleotide length for a chain in the target structure to " + "be considered in chain mapping. The chain mapping algorithm " + "first performs an all vs. all pairwise sequence alignment to " + "identify \"equal\" chains within the target structure. We go " + "for simple sequence identity there. Short sequences can be " + "problematic as they may produce high sequence identity " + "alignments by pure chance.") + ) return parser.parse_args() @@ -695,7 +727,9 @@ def _Process(model, reference, args): usalign_exec = args.usalign_exec, lddt_no_stereochecks = args.lddt_no_stereochecks, n_max_naive = args.n_max_naive, - oum = args.oum) + oum = args.oum, + min_pep_length = args.min_pep_length, + min_nuc_length = args.min_nuc_length) ir = _GetInconsistentResidues(scorer.aln) if len(ir) > 0 and args.enforce_consistency: @@ -876,6 +910,8 @@ def _Main(): out["cad_exec"] = args.cad_exec out["usalign_exec"] = args.usalign_exec out["lddt_no_stereochecks"] = args.lddt_no_stereochecks + out["min_pep_length"] = args.min_pep_length + out["min_nuc_length"] = args.min_nuc_length out["status"] = "SUCCESS" with open(args.output, 'w') as fh: json.dump(out, fh, indent=4, sort_keys=False) diff --git a/modules/doc/actions.rst b/modules/doc/actions.rst index eee1181a1275a8615c6893dded821d314114bc68..523926fc33fea5d92baaa86587b03766dc527656 100644 --- a/modules/doc/actions.rst +++ b/modules/doc/actions.rst @@ -40,11 +40,16 @@ Details on the usage (output of ``ost compare-structures --help``): [--local-lddt] [--bb-lddt] [--bb-local-lddt] [--cad-score] [--local-cad-score] [--cad-exec CAD_EXEC] - [--usalign-exec USALIGN_EXEC] [--qs-score] - [--dockq] [--contact-scores] [--rigid-scores] + [--usalign-exec USALIGN_EXEC] + [--override-usalign-mapping] [--qs-score] + [--dockq] [--ics] [--ips] [--rigid-scores] [--patch-scores] [--tm-score] [--lddt-no-stereochecks] [--n-max-naive N_MAX_NAIVE] + [--dump-aligned-residues] [--dump-pepnuc-alns] + [--dump-pepnuc-aligned-residues] + [--min-pep-length MIN_PEP_LENGTH] + [--min-nuc-length MIN_NUC_LENGTH] Evaluate model against reference @@ -104,6 +109,8 @@ Details on the usage (output of ``ost compare-structures --help``): * "cad_exec" * "usalign_exec" * "lddt_no_stereochecks" + * "min_pep_length" + * "min_nuc_length" The pairwise sequence alignments are computed with Needleman-Wunsch using BLOSUM62 (NUC44 for nucleotides). Many benchmarking scenarios preprocess the @@ -351,6 +358,26 @@ Details on the usage (output of ``ost compare-structures --help``): --dump-pepnuc-aligned-residues Dump additional info on model and reference residues that occur in pepnuc alignments. + --min-pep-length MIN_PEP_LENGTH + Relevant parameter if short peptides are involved in + scoring.Minimum peptide length for a chain in the + target structure to be considered in chain mapping. + The chain mapping algorithm first performs an all vs. + all pairwise sequence alignment to identify "equal" + chains within the target structure. We go for simple + sequence identity there. Short sequences can be + problematic as they may produce high sequence identity + alignments by pure chance. + --min-nuc-length MIN_NUC_LENGTH + Relevant parameter if short nucleotides are involved + in scoring.Minimum nucleotide length for a chain in + the target structure to be considered in chain + mapping. The chain mapping algorithm first performs an + all vs. all pairwise sequence alignment to identify + "equal" chains within the target structure. We go for + simple sequence identity there. Short sequences can be + problematic as they may produce high sequence identity + alignments by pure chance. .. _ost compare ligand structures: diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py index 6aaa2276e00e2d7233c2e0e870b3f56ccba1def5..e568fdf3afab4577e258ca8bd021e678e9937b33 100644 --- a/modules/mol/alg/pymod/scoring.py +++ b/modules/mol/alg/pymod/scoring.py @@ -141,12 +141,33 @@ class Scorer: object into USalign to compute TM-score. Experimental feature with limitations. :type oum: :class:`bool` + :param min_pep_length: Relevant parameter if short peptides are involved in + scoring. Minimum peptide length for a chain in the + target structure to be considered in chain mapping. + The chain mapping algorithm first performs an all vs. + all pairwise sequence alignment to identify \"equal\" + chains within the target structure. We go for simple + sequence identity there. Short sequences can be + problematic as they may produce high sequence identity + alignments by pure chance. + :type min_pep_length: :class:`int` + :param min_nuc_length: Relevant parameter if short nucleotides are involved + in scoring. Minimum nucleotide length for a chain in + the target structure to be considered in chain + mapping. The chain mapping algorithm first performs + an all vs. all pairwise sequence alignment to + identify \"equal\" chains within the target + structure. We go for simple sequence identity there. + Short sequences can be problematic as they may + produce high sequence identity alignments by pure + chance. + :type min_nuc_length: :class:`int` """ def __init__(self, model, target, resnum_alignments=False, molck_settings = None, cad_score_exec = None, custom_mapping=None, usalign_exec = None, lddt_no_stereochecks=False, n_max_naive=40320, - oum=False): + oum=False, min_pep_length = 10, min_nuc_length = 4): self._target_orig = target self._model_orig = model @@ -231,6 +252,8 @@ class Scorer: self.lddt_no_stereochecks = lddt_no_stereochecks self.n_max_naive = n_max_naive self.oum = oum + self.min_pep_length = min_pep_length + self.min_nuc_length = min_nuc_length # lazily evaluated attributes self._stereochecked_model = None @@ -491,7 +514,9 @@ class Scorer: if self._chain_mapper is None: self._chain_mapper = chain_mapping.ChainMapper(self.target, n_max_naive=1e9, - resnum_alignments=self.resnum_alignments) + resnum_alignments=self.resnum_alignments, + min_pep_length=self.min_pep_length, + min_nuc_length=self.min_nuc_length) return self._chain_mapper @property