diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures index 9dacd7786f13195ca42451ac4b569a01a502cdbc..2e4c324792a614591a2aa6ae73e882b5cd5c2750 100644 --- a/actions/ost-compare-structures +++ b/actions/ost-compare-structures @@ -463,6 +463,31 @@ def _ParseArgs(): "that number, the chain mapping will naively enumerate all " "possible mappings. A heuristic is used otherwise.")) + parser.add_argument( + "--dump-aligned-residues", + dest="dump_aligned_residues", + default=False, + action="store_true", + help=("Dump additional info on aligned model and reference residues.")) + + parser.add_argument( + "--dump-pepnuc-alns", + dest="dump_pepnuc_alns", + default=False, + action="store_true", + help=("Dump alignments of mapped chains but with sequences that did " + "not undergo Molck preprocessing in the scorer. Sequences are " + "extracted from model/target after undergoing selection for " + "peptide and nucleotide residues.")) + + parser.add_argument( + "--dump-pepnuc-aligned-residues", + dest="dump_pepnuc_aligned_residues", + default=False, + action="store_true", + help=("Dump additional info on model and reference residues that occur " + "in pepnuc alignments.")) + return parser.parse_args() def _Rename(ent): @@ -558,7 +583,7 @@ def _AlnToFastaStr(aln): """ s1 = aln.GetSequence(0) s2 = aln.GetSequence(1) - return f">reference:{s1.name}\n{str(s1)}\nmodel:{s2.name}\n{str(s2)}" + return f">reference:{s1.name}\n{str(s1)}\n>model:{s2.name}\n{str(s2)}" def _GetInconsistentResidues(alns): lst = list() @@ -609,6 +634,41 @@ def _PatchScoresToJSONList(interface_dict, score_dict): json_list += score_dict[ch] return json_list +def _GetAlignedResidues(aln): + aligned_residues = list() + for a in aln: + mdl_lst = list() + ref_lst = list() + for c in a: + mdl_r = c.GetResidue(1) + ref_r = c.GetResidue(0) + if mdl_r.IsValid(): + olc = mdl_r.one_letter_code + num = mdl_r.GetNumber().num + ins_code = mdl_r.GetNumber().ins_code.strip("\u0000") + mdl_lst.append({"olc": olc, + "num": f"{num}.{ins_code}"}) + else: + mdl_lst.append(None) + + if ref_r.IsValid(): + olc = ref_r.one_letter_code + num = ref_r.GetNumber().num + ins_code = ref_r.GetNumber().ins_code.strip("\u0000") + ref_lst.append({"olc": olc, + "num": f"{num}.{ins_code}"}) + else: + ref_lst.append(None) + + mdl_dct = {"chain": a.GetSequence(1).GetName(), + "residues": mdl_lst} + ref_dct = {"chain": a.GetSequence(0).GetName(), + "residues": ref_lst} + + aligned_residues.append({"model": mdl_dct, + "reference": ref_dct}) + return aligned_residues + def _Process(model, reference, args): mapping = None @@ -637,6 +697,15 @@ def _Process(model, reference, args): out["aln"] = [_AlnToFastaStr(aln) for aln in scorer.aln] out["inconsistent_residues"] = ir + if args.dump_aligned_residues: + out["aligned_residues"] = _GetAlignedResidues(scorer.aln) + + if args.dump_pepnuc_alns: + out["pepnuc_aln"] = [_AlnToFastaStr(aln) for aln in scorer.pepnuc_aln] + + if args.dump_pepnuc_aligned_residues: + out["pepnuc_aligned_residues"] = _GetAlignedResidues(scorer.pepnuc_aln) + if args.lddt: out["lddt"] = scorer.lddt diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py index 45ccb439e860377dcc672c9fa32516de13c6c749..af70aca279de11ec5a615d6bfa92a794a17bf665 100644 --- a/modules/mol/alg/pymod/scoring.py +++ b/modules/mol/alg/pymod/scoring.py @@ -147,15 +147,18 @@ class Scorer: lddt_no_stereochecks=False, n_max_naive=12, oum=False): - if isinstance(model, mol.EntityView): - model = mol.CreateEntityFromView(model, False) + self._target_orig = target + self._model_orig = model + + if isinstance(self._model_orig, mol.EntityView): + self._model = mol.CreateEntityFromView(self._model_orig, False) else: - model = model.Copy() + self._model = self._model_orig.Copy() - if isinstance(target, mol.EntityView): - target = mol.CreateEntityFromView(target, False) + if isinstance(self._target_orig, mol.EntityView): + self._target = mol.CreateEntityFromView(self._target_orig, False) else: - target = target.Copy() + self._target = self._target_orig.Copy() if molck_settings is None: molck_settings = MolckSettings(rm_unk_atoms=True, @@ -166,8 +169,8 @@ class Scorer: colored=False, map_nonstd_res=True, assign_elem=True) - Molck(model, conop.GetDefaultLib(), molck_settings) - Molck(target, conop.GetDefaultLib(), molck_settings) + Molck(self._model, conop.GetDefaultLib(), molck_settings) + Molck(self._target, conop.GetDefaultLib(), molck_settings) self._model = model.Select("peptide=True or nucleotide=True") self._target = target.Select("peptide=True or nucleotide=True") @@ -245,6 +248,7 @@ class Scorer: self._target_interface_residues = None self._aln = None self._stereochecked_aln = None + self._pepnuc_aln = None # lazily constructed scorer objects self._lddt_scorer = None @@ -326,6 +330,14 @@ class Scorer: """ return self._model + @property + def model_orig(self): + """ The original model passed at object construction + + :type: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView` + """ + return self._model_orig + @property def target(self): """ Target with Molck cleanup @@ -334,6 +346,14 @@ class Scorer: """ return self._target + @property + def target_orig(self): + """ The original target passed at object construction + + :type: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView` + """ + return self._target_orig + @property def aln(self): """ Alignments of :attr:`model`/:attr:`target` chains @@ -353,12 +373,26 @@ class Scorer: The alignments may differ, as stereochecks potentially remove residues - :type: :class:`` + :type: :class:`list` of :class:`ost.seq.AlignmentHandle` """ if self._stereochecked_aln is None: self._compute_stereochecked_aln() return self._stereochecked_aln + @property + def pepnuc_aln(self): + """ Alignments of :attr:`model_orig`/:attr:`target_orig` chains + + Selects for peptide and nucleotide residues before sequence + extraction. Includes residues that would be removed by molck in + structure preprocessing. + + :type: :class:`list` of :class:`ost.seq.AlignmentHandle` + """ + if self._pepnuc_aln is None: + self._compute_pepnuc_aln() + return self._pepnuc_aln + @property def stereochecked_model(self): """ View of :attr:`~model` that has stereochemistry checks applied @@ -638,11 +672,15 @@ class Scorer: """ Interfaces in :attr:`~target` with non-zero contribution to :attr:`~qs_global`/:attr:`~qs_best` + Chain names are lexicographically sorted. + :type: :class:`list` of :class:`tuple` with 2 elements each: (trg_ch1, trg_ch2) """ if self._qs_target_interfaces is None: self._qs_target_interfaces = self.qs_scorer.qsent1.interacting_chains + self._qs_target_interfaces = \ + [(min(x[0],x[1]), max(x[0],x[1])) for x in self._qs_target_interfaces] return self._qs_target_interfaces @property @@ -650,11 +688,16 @@ class Scorer: """ Interfaces in :attr:`~model` with non-zero contribution to :attr:`~qs_global`/:attr:`~qs_best` + Chain names are lexicographically sorted. + :type: :class:`list` of :class:`tuple` with 2 elements each: (mdl_ch1, mdl_ch2) """ if self._qs_model_interfaces is None: self._qs_model_interfaces = self.qs_scorer.qsent2.interacting_chains + self._qs_model_interfaces = \ + [(min(x[0],x[1]), max(x[0],x[1])) for x in self._qs_model_interfaces] + return self._qs_model_interfaces @property @@ -662,6 +705,8 @@ class Scorer: """ Interfaces in :attr:`~qs_target_interfaces` that can be mapped to :attr:`~model`. + Target chain names are lexicographically sorted. + :type: :class:`list` of :class:`tuple` with 4 elements each: (trg_ch1, trg_ch2, mdl_ch1, mdl_ch2) """ @@ -724,26 +769,32 @@ class Scorer: def contact_target_interfaces(self): """ Interfaces in :class:`target` which have at least one contact - Contact as defined in :attr:`~native_contacts` + Contact as defined in :attr:`~native_contacts`, + chain names are lexicographically sorted. :type: :class:`list` of :class:`tuple` with 2 elements each (trg_ch1, trg_ch2) """ if self._contact_target_interfaces is None: - self._contact_target_interfaces = self.contact_scorer.cent1.interacting_chains + tmp = self.contact_scorer.cent1.interacting_chains + tmp = [(min(x[0],x[1]), max(x[0],x[1])) for x in tmp] + self._contact_target_interfaces = tmp return self._contact_target_interfaces @property def contact_model_interfaces(self): """ Interfaces in :class:`model` which have at least one contact - Contact as defined in :attr:`~native_contacts` + Contact as defined in :attr:`~native_contacts`, + chain names are lexicographically sorted. :type: :class:`list` of :class:`tuple` with 2 elements each (mdl_ch1, mdl_ch2) """ if self._contact_model_interfaces is None: - self._contact_model_interfaces = self.contact_scorer.cent2.interacting_chains + tmp = self.contact_scorer.cent2.interacting_chains + tmp = [(min(x[0],x[1]), max(x[0],x[1])) for x in tmp] + self._contact_model_interfaces = tmp return self._contact_model_interfaces @property @@ -902,7 +953,7 @@ class Scorer: """ Interfaces in :attr:`target` that are relevant for DockQ In principle a subset of :attr:`~contact_target_interfaces` that only - contains peptide sequences. + contains peptide sequences. Chain names are lexicographically sorted. :type: :class:`list` of :class:`tuple` with 2 elements each: (trg_ch1, trg_ch2) @@ -920,6 +971,8 @@ class Scorer: """ Interfaces in :attr:`dockq_target_interfaces` that can be mapped to model + Target chain names are lexicographically sorted + :type: :class:`list` of :class:`tuple` with 4 elements each: (trg_ch1, trg_ch2, mdl_ch1, mdl_ch2) """ @@ -1322,6 +1375,12 @@ class Scorer: alns[-1].AttachView(1, mdl_seqs[mdl_ch].GetAttachedView()) return alns + def _compute_pepnuc_aln(self): + query = "peptide=true or nucleotide=true" + pep_nuc_target = self.target_orig.Select(query) + pep_nuc_model = self.model_orig.Select(query) + self._pepnuc_aln = self._aln_helper(pep_nuc_target, pep_nuc_model) + def _compute_aln(self): self._aln = self._aln_helper(self.target, self.model)