diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures index 48d33839e43ff0dba2fb1f5e259ea95bce9d9d3d..595c18b2e320726f97e1c994f049cb69f7497916 100644 --- a/actions/ost-compare-structures +++ b/actions/ost-compare-structures @@ -303,7 +303,62 @@ def _ParseArgs(): default=False, action="store_true", help=("Compute QS-score, stored as key \"qs_global\", and the QS-best " - "variant, stored as key \"qs_best\".")) + "variant, stored as key \"qs_best\". Interfaces in the reference " + "with non-zero contribution to QS-score are available as key " + "\"qs_reference_interfaces\", the ones from the model as key " + "\"qs_model_interfaces\". \"qs_interfaces\" is a subset of " + "\"qs_reference_interfaces\" that contains interfaces that " + "can be mapped to the model. They are stored as lists in format " + "[ref_ch1, ref_ch2, mdl_ch1, mdl_ch2]. The respective " + "per-interface scores for \"qs_interfaces\" are available as " + "keys \"per_interface_qs_global\" and \"per_interface_qs_best\"")) + + parser.add_argument( + "--dockq", + dest="dockq", + default=False, + action="store_true", + help=("Compute DockQ scores and its components. Relevant interfaces " + "with at least one contact (any atom within 5A) of the reference " + "structure are available as key \"dockq_reference_interfaces\". " + "Only interfaces between peptide chains are considered here! " + "Key \"dockq_interfaces\" is a subset of " + "\"dockq_reference_interfaces\" that contains interfaces that " + "can be mapped to the model. They are stored as lists in format " + "[ref_ch1, ref_ch2, mdl_ch1, mdl_ch2]. The respective " + "DockQ scores for \"dockq_interfaces\" are available as key " + "\"dockq\". It's components are available as keys: " + "\"fnat\" (fraction of reference contacts which are also there " + "in model) \"irmsd\" (interface RMSD), \"lrmsd\" (ligand RMSD). " + "The DockQ score is strictly designed to score each interface " + "individually. We also provide two averaged versions to get one " + "full model score: \"dockq_ave\", \"dockq_wave\". The first is " + "simply the average of \"dockq_scores\", the latter is a " + "weighted average with weights derived from number of contacts " + "in the reference interfaces. These two scores only consider " + "interfaces that are present in both, the model and the " + "reference. \"dockq_ave_full\" and \"dockq_wave_full\" add zeros " + "in the average computation for each interface that is only " + "present in the reference but not in the model.")) + + parser.add_argument( + "--contact-scores", + dest="contact_scores", + default=False, + action="store_true", + help=("Computes interface contact based scores. A contact between two " + "residues of different chains is defined as having at least one " + "heavy atom within 5A. Contacts in reference structure are " + "available as key \"reference_contacts\". Each contact specifies " + "the interacting residues in format " + "\"<cname>.<rnum>.<ins_code>\". Model contacts are available as " + "key \"model_contacts\". The precision which is available as key " + "\"contact_precision\" reports the fraction of model contacts " + "that are also present in the reference. The recall which is " + "available as key \"contact_recall\" reports the fraction of " + "reference contacts that are correctly reproduced in the model. " + "The ICS score (Interface Contact Similarity) available as key " + "\"ics\" combines precision and recall using the F1-measure.")) parser.add_argument( "--rigid-scores", @@ -319,36 +374,6 @@ def _ParseArgs(): "these positions and transformation, \"transform\": the used 4x4 " "transformation matrix that superposes model onto reference.")) - parser.add_argument( - "--interface-scores", - dest="interface_scores", - default=False, - action="store_true", - help=("Per interface scores for each interface that has at least one " - "contact in the reference, i.e. at least one pair of heavy atoms " - "within 5A. The respective interfaces are available from key " - "\"interfaces\" which is a list of tuples in form (ref_ch1, " - "ref_ch2, mdl_ch1, mdl_ch2). Per-interface scores are available " - "as lists referring to these interfaces and have the following " - "keys: \"nnat\" (number of contacts in reference), \"nmdl\" " - "(number of contacts in model), \"fnat\" (fraction of reference " - "contacts which are also there in model), \"fnonnat\" (fraction " - "of model contacts which are not there in target), \"irmsd\" " - "(interface RMSD), \"lrmsd\" (ligand RMSD), \"dockq_scores\" " - "(per-interface score computed from \"fnat\", \"irmsd\" and " - "\"lrmsd\"), \"interface_qs_global\" and \"interface_qs_best\" " - "(per-interface versions of the two QS-score variants). The " - "DockQ score is strictly designed to score each interface " - "individually. We also provide two averaged versions to get one " - "full model score: \"dockq_ave\", \"dockq_wave\". The first is " - "simply the average of \"dockq_scores\", the latter is a " - "weighted average with weights derived from \"nnat\". These two " - "scores only consider interfaces that are present in both, the " - "model and the reference. \"dockq_ave_full\" and " - "\"dockq_wave_full\" add zeros in the average computation for " - "each interface that is only present in the reference but not in " - "the model.")) - parser.add_argument( "--patch-scores", dest="patch_scores", @@ -596,30 +621,42 @@ def _Process(model, reference, args): if args.qs_score: out["qs_global"] = scorer.qs_global out["qs_best"] = scorer.qs_best - - if args.rigid_scores: - out["oligo_gdtts"] = scorer.gdtts - out["oligo_gdtha"] = scorer.gdtha - out["rmsd"] = scorer.rmsd - data = scorer.transform.data - out["transform"] = [data[i:i + 4] for i in range(0, len(data), 4)] - - if args.interface_scores: - out["interfaces"] = scorer.interfaces - out["nnat"] = scorer.native_contacts - out["nmdl"] = scorer.model_contacts + out["qs_reference_interfaces"] = scorer.qs_target_interfaces + out["qs_model_interfaces"] = scorer.qs_model_interfaces + out["qs_interfaces"] = scorer.qs_interfaces + out["per_interface_qs_global"] = scorer.per_interface_qs_global + out["per_interface_qs_best"] = scorer.per_interface_qs_best + + if args.contact_scores: + out["reference_contacts"] = scorer.native_contacts + out["model_contacts"] = scorer.model_contacts + out["contact_precision"] = scorer.contact_precision + out["contact_recall"] = scorer.contact_recall + out["ics"] = scorer.ics + + if args.dockq: + out["dockq_reference_interfaces"] = scorer.dockq_target_interfaces + out["dockq_interfaces"] = scorer.dockq_interfaces + out["dockq"] = scorer.dockq_scores out["fnat"] = scorer.fnat - out["fnonnat"] = scorer.fnonnat out["irmsd"] = scorer.irmsd out["lrmsd"] = scorer.lrmsd - out["dockq_scores"] = scorer.dockq_scores - out["interface_qs_global"] = scorer.interface_qs_global - out["interface_qs_best"] = scorer.interface_qs_best out["dockq_ave"] = scorer.dockq_ave out["dockq_wave"] = scorer.dockq_wave out["dockq_ave_full"] = scorer.dockq_ave_full out["dockq_wave_full"] = scorer.dockq_wave_full + if args.contact_scores: + out["reference_contacts"] = scorer.native_contacts + out["model_contacts"] = scorer.model_contacts + + if args.rigid_scores: + out["oligo_gdtts"] = scorer.gdtts + out["oligo_gdtha"] = scorer.gdtha + out["rmsd"] = scorer.rmsd + data = scorer.transform.data + out["transform"] = [data[i:i + 4] for i in range(0, len(data), 4)] + if args.patch_scores: out["model_interface_residues"] = \ _InterfaceResiduesToJSONList(scorer.model_interface_residues) diff --git a/modules/doc/actions.rst b/modules/doc/actions.rst index 5deeca8603518cb81d2dabf5d37573d52df497e7..01b02f0c8aa6b9eb4c3c80b5d919324b8947fcd9 100644 --- a/modules/doc/actions.rst +++ b/modules/doc/actions.rst @@ -41,16 +41,17 @@ Details on the usage (output of ``ost compare-structures --help``): [--cad-score] [--local-cad-score] [--cad-exec CAD_EXEC] [--usalign-exec USALIGN_EXEC] [--qs-score] - [--rigid-scores] [--interface-scores] + [--dockq] [--contact-scores] [--rigid-scores] [--patch-scores] [--tm-score] [--lddt-no-stereochecks] - + [--n-max-naive N_MAX_NAIVE] + Evaluate model against reference - + Example: ost compare-structures -m model.pdb -r reference.cif - + Loads the structures and performs basic cleanup: - + * Assign elements according to the PDB Chemical Component Dictionary * Map nonstandard residues to their parent residues as defined by the PDB Chemical Component Dictionary, e.g. phospho-serine => serine @@ -59,12 +60,12 @@ Details on the usage (output of ``ost compare-structures --help``): * Remove unknown atoms, i.e. atoms that are not expected according to the PDB Chemical Component Dictionary * Select for peptide/nucleotide residues - + The cleaned structures are optionally dumped using -d/--dump-structures - + Output is written in JSON format (default: out.json). In case of no additional options, this is a dictionary with 8 keys describing model/reference comparison: - + * "reference_chains": Chain names of reference * "model_chains": Chain names of model * "chem_groups": Groups of polypeptides/polynucleotides from reference that @@ -89,10 +90,10 @@ Details on the usage (output of ``ost compare-structures --help``): * "status": SUCCESS if everything ran through. In case of failure, the only content of the JSON output will be "status" set to FAILURE and an additional key: "traceback". - + The following additional keys store relevant input parameters to reproduce results: - + * "model" * "reference" * "fault_tolerant" @@ -103,22 +104,22 @@ Details on the usage (output of ``ost compare-structures --help``): * "cad_exec" * "usalign_exec" * "lddt_no_stereochecks" - + The pairwise sequence alignments are computed with Needleman-Wunsch using BLOSUM62 (NUC44 for nucleotides). Many benchmarking scenarios preprocess the structures to ensure matching residue numbers (CASP/CAMEO). In these cases, enabling -rna/--residue-number-alignment is recommended. - + Each score is opt-in and can be enabled with optional arguments. - + Example to compute global and per-residue lDDT values as well as QS-score: - + ost compare-structures -m model.pdb -r reference.cif --lddt --local-lddt --qs-score - + Example to inject custom chain mapping - + ost compare-structures -m model.pdb -r reference.cif -c A:B B:A - + optional arguments: -h, --help show this help message and exit -m MODEL, --model MODEL @@ -218,7 +219,58 @@ Details on the usage (output of ``ost compare-structures --help``): given, an OpenStructure internal copy of USalign code is used. --qs-score Compute QS-score, stored as key "qs_global", and the - QS-best variant, stored as key "qs_best". + QS-best variant, stored as key "qs_best". Interfaces + in the reference with non-zero contribution to QS- + score are available as key "qs_reference_interfaces", + the ones from the model as key "qs_model_interfaces". + "qs_interfaces" is a subset of + "qs_reference_interfaces" that contains interfaces + that can be mapped to the model. They are stored as + lists in format [ref_ch1, ref_ch2, mdl_ch1, mdl_ch2]. + The respective per-interface scores for + "qs_interfaces" are available as keys + "per_interface_qs_global" and "per_interface_qs_best" + --dockq Compute DockQ scores and its components. Relevant + interfaces with at least one contact (any atom within + 5A) of the reference structure are available as key + "dockq_reference_interfaces". Only interfaces between + peptide chains are considered here! Key + "dockq_interfaces" is a subset of + "dockq_reference_interfaces" that contains interfaces + that can be mapped to the model. They are stored as + lists in format [ref_ch1, ref_ch2, mdl_ch1, mdl_ch2]. + The respective DockQ scores for "dockq_interfaces" are + available as key "dockq". It's components are + available as keys: "fnat" (fraction of reference + contacts which are also there in model) "irmsd" + (interface RMSD), "lrmsd" (ligand RMSD). The DockQ + score is strictly designed to score each interface + individually. We also provide two averaged versions to + get one full model score: "dockq_ave", "dockq_wave". + The first is simply the average of "dockq_scores", the + latter is a weighted average with weights derived from + number of contacts in the reference interfaces. These + two scores only consider interfaces that are present + in both, the model and the reference. "dockq_ave_full" + and "dockq_wave_full" add zeros in the average + computation for each interface that is only present in + the reference but not in the model. + --contact-scores Computes interface contact based scores. A contact + between two residues of different chains is defined as + having at least one heavy atom within 5A. Contacts in + reference structure are available as key + "reference_contacts". Each contact specifies the + interacting residues in format + "<cname>.<rnum>.<ins_code>". Model contacts are + available as key "model_contacts". The precision which + is available as key "contact_precision" reports the + fraction of model contacts that are also present in + the reference. The recall which is available as key + "contact_recall" reports the fraction of reference + contacts that are correctly reproduced in the model. + The ICS score (Interface Contact Similarity) available + as key "ics" combines precision and recall using the + F1-measure. --rigid-scores Computes rigid superposition based scores. They're based on a Kabsch superposition of all mapped CA positions (C3' for nucleotides). Makes the following @@ -229,33 +281,6 @@ Details on the usage (output of ``ost compare-structures --help``): these positions and transformation, "transform": the used 4x4 transformation matrix that superposes model onto reference. - --interface-scores Per interface scores for each interface that has at - least one contact in the reference, i.e. at least one - pair of heavy atoms within 5A. The respective - interfaces are available from key "interfaces" which - is a list of tuples in form (ref_ch1, ref_ch2, - mdl_ch1, mdl_ch2). Per-interface scores are available - as lists referring to these interfaces and have the - following keys: "nnat" (number of contacts in - reference), "nmdl" (number of contacts in model), - "fnat" (fraction of reference contacts which are also - there in model), "fnonnat" (fraction of model contacts - which are not there in target), "irmsd" (interface - RMSD), "lrmsd" (ligand RMSD), "dockq_scores" (per- - interface score computed from "fnat", "irmsd" and - "lrmsd"), "interface_qs_global" and - "interface_qs_best" (per-interface versions of the two - QS-score variants). The DockQ score is strictly - designed to score each interface individually. We also - provide two averaged versions to get one full model - score: "dockq_ave", "dockq_wave". The first is simply - the average of "dockq_scores", the latter is a - weighted average with weights derived from "nnat". - These two scores only consider interfaces that are - present in both, the model and the reference. - "dockq_ave_full" and "dockq_wave_full" add zeros in - the average computation for each interface that is - only present in the reference but not in the model. --patch-scores Local interface quality score used in CASP15. Scores each model residue that is considered in the interface (CB pos within 8A of any CB pos from another chain (CA @@ -277,6 +302,12 @@ Details on the usage (output of ``ost compare-structures --help``): "usalign_mapping" --lddt-no-stereochecks Disable stereochecks for lDDT computation + --n-max-naive N_MAX_NAIVE + If number of chains in model and reference are below + or equal that number, the chain mapping will naively + enumerate all possible mappings. A heuristic is used + otherwise. + diff --git a/modules/mol/alg/pymod/contact_score.py b/modules/mol/alg/pymod/contact_score.py index e4708aeb181f52143b7681c4b8f25a5682d05d7b..2023c6dfcd530138e42446ef80f3459c190a4548 100644 --- a/modules/mol/alg/pymod/contact_score.py +++ b/modules/mol/alg/pymod/contact_score.py @@ -54,6 +54,7 @@ class ContactEntity: self._interacting_chains = None self._sequence = dict() self._contacts = None + self._hr_contacts = None @property def view(self): @@ -122,6 +123,18 @@ class ContactEntity: if self._contacts is None: self._SetupContacts() return self._contacts + + @property + def hr_contacts(self): + """ Human readable interchain contacts + + Human readable version of :attr:`~contacts`. Simple list with tuples + containing two strings specifying the residues in contact. Format: + <cname>.<rnum>.<ins_code> + """ + if self._hr_contacts is None: + self._SetupContacts() + return self._hr_contacts def GetChain(self, chain_name): """ Get chain by name @@ -152,6 +165,7 @@ class ContactEntity: # this function is incredibly inefficient... if performance is an issue, # go ahead and optimize self._contacts = dict() + self._hr_contacts = list() # set indices relative to full view for ch in self.view.chains: @@ -178,6 +192,12 @@ class ContactEntity: self._contacts[cname_key] = set() self._contacts[cname_key].add((r1.GetIntProp("contact_idx"), r2.GetIntProp("contact_idx"))) + rnum1 = r1.GetNumber() + hr1 = f"{cname}.{rnum1.num}.{rnum1.ins_code}" + rnum2 = r2.GetNumber() + hr2 = f"{cname2}.{rnum2.num}.{rnum2.ins_code}" + self._hr_contacts.append((hr1.strip("\u0000"), + hr2.strip("\u0000"))) class ContactScorerResult: """ diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py index 3ccecfd23860e862eb9818d327baca2d683ced26..80faed359b71cfc1815b86aa962844e61e61accd 100644 --- a/modules/mol/alg/pymod/scoring.py +++ b/modules/mol/alg/pymod/scoring.py @@ -12,6 +12,7 @@ from ost.mol.alg import stereochemistry from ost.mol.alg import dockq from ost.mol.alg.lddt import lDDTScorer from ost.mol.alg.qsscore import QSScorer +from ost.mol.alg.contact_score import ContactScorer from ost.mol.alg import Molck, MolckSettings from ost import bindings from ost.bindings import cadscore @@ -227,6 +228,7 @@ class Scorer: self._lddt_scorer = None self._bb_lddt_scorer = None self._qs_scorer = None + self._contact_scorer = None # lazily computed scores self._lddt = None @@ -236,18 +238,26 @@ class Scorer: self._qs_global = None self._qs_best = None - self._interface_qs_global = None - self._interface_qs_best = None - - self._interfaces = None + self._qs_target_interfaces = None + self._qs_model_interfaces = None + self._qs_interfaces = None + self._per_interface_qs_global = None + self._per_interface_qs_best = None + + self._contact_target_interfaces = None + self._contact_model_interfaces = None self._native_contacts = None self._model_contacts = None + self._contact_precision = None + self._contact_recall = None + self._ics = None + + self._dockq_target_interfaces = None + self._dockq_interfaces = None self._fnat = None self._fnonnat = None self._irmsd = None self._lrmsd = None - self._nonmapped_interfaces = None - self._nonmapped_interfaces_contacts = None self._dockq_scores = None self._dockq_ave = None self._dockq_wave = None @@ -496,6 +506,13 @@ class Scorer: self._qs_scorer = QSScorer.FromMappingResult(self.mapping) return self._qs_scorer + @property + def contact_scorer(self): + if self._contact_scorer is None: + self._contact_scorer = ContactScorer.FromMappingResult(self.mapping) + return self._contact_scorer + + @property def lddt(self): """ Global lDDT score in range [0.0, 1.0] @@ -584,101 +601,224 @@ class Scorer: return self._qs_best @property - def interfaces(self): - """ Interfaces with nonzero :attr:`native_contacts` + def qs_target_interfaces(self): + """ Interfaces in :attr:`~target` with non-zero contribution to + :attr:`~qs_global`/:attr:`~qs_best` + + :type: :class:`list` of :class:`tuple` with 2 elements each: + (trg_ch1, trg_ch2) + """ + if self._qs_target_interfaces is None: + self._qs_target_interfaces = self.qs_scorer.qsent1.interacting_chains + return self._qs_target_interfaces + + @property + def qs_model_interfaces(self): + """ Interfaces in :attr:`~model` with non-zero contribution to + :attr:`~qs_global`/:attr:`~qs_best` + + :type: :class:`list` of :class:`tuple` with 2 elements each: + (mdl_ch1, mdl_ch2) + """ + if self._qs_model_interfaces is None: + self._qs_model_interfaces = self.qs_scorer.qsent2.interacting_chains + return self._qs_model_interfaces + + @property + def qs_interfaces(self): + """ Interfaces in :attr:`~qs_target_interfaces` that can be mapped + to :attr:`~model`. :type: :class:`list` of :class:`tuple` with 4 elements each: (trg_ch1, trg_ch2, mdl_ch1, mdl_ch2) """ - if self._interfaces is None: - self._compute_per_interface_scores() - return self._interfaces - + if self._qs_interfaces is None: + self._qs_interfaces = list() + flat_mapping = self.mapping.GetFlatMapping() + for i in self.qs_target_interfaces: + if i[0] in flat_mapping and i[1] in flat_mapping: + self._qs_interfaces.append((i[0], i[1], + flat_mapping[i[0]], + flat_mapping[i[1]])) + return self._qs_interfaces + @property - def interface_qs_global(self): - """ QS-score for each interface in :attr:`interfaces` + def per_interface_qs_global(self): + """ QS-score for each interface in :attr:`qs_interfaces` :type: :class:`list` of :class:`float` """ - if self._interface_qs_global is None: - self._compute_per_interface_scores() - return self._interface_qs_global + if self._per_interface_qs_global is None: + self._compute_per_interface_qs_scores() + return self._per_interface_qs_global @property - def interface_qs_best(self): - """ QS-score for each interface in :attr:`interfaces` + def per_interface_qs_best(self): + """ QS-score for each interface in :attr:`qs_interfaces` Only computed on aligned residues :type: :class:`list` of :class:`float` """ - if self._interface_qs_best is None: - self._compute_per_interface_scores() - return self._interface_qs_best + if self._per_interface_qs_best is None: + self._compute_per_interface_qs_scores() + return self._per_interface_qs_best @property def native_contacts(self): - """ N native contacts for interfaces in :attr:`~interfaces` + """ Native contacts A contact is a pair or residues from distinct chains that have - a minimal heavy atom distance < 5A + a minimal heavy atom distance < 5A. Contacts are specified as + :class:`tuple` with two strings in format: + <cname>.<rnum>.<ins_code> - :type: :class:`list` of :class:`int` + :type: :class:`list` of :class:`tuple` """ if self._native_contacts is None: - self._compute_per_interface_scores() + self._native_contacts = self.contact_scorer.cent1.hr_contacts return self._native_contacts @property def model_contacts(self): - """ N model contacts for interfaces in :attr:`~interfaces` - - A contact is a pair or residues from distinct chains that have - a minimal heavy atom distance < 5A - - :type: :class:`list` of :class:`int` + """ Same for model """ if self._model_contacts is None: - self._compute_per_interface_scores() + self._model_contacts = self.contact_scorer.cent2.hr_contacts return self._model_contacts + @property + def contact_target_interfaces(self): + """ Interfaces in :class:`target` which have at least one contact + + Contact as defined in :attr:`~native_contacts` + + :type: :class:`list` of :class:`tuple` with 2 elements each + (trg_ch1, trg_ch2) + """ + if self._contact_target_interfaces is None: + self._contact_target_interfaces = self.contact_scorer.cent1.interacting_chains + return self._contact_target_interfaces + + @property + def contact_model_interfaces(self): + """ Interfaces in :class:`model` which have at least one contact + + Contact as defined in :attr:`~native_contacts` + + :type: :class:`list` of :class:`tuple` with 2 elements each + (trg_ch1, trg_ch2) + """ + if self._contact_model_interfaces is None: + self._contact_model_interfaces = self.contact_scorer.cent2.interacting_chains + return self._contact_model_interfaces + + @property + def contact_precision(self): + """ Fraction of model contacts that are also present in target + + :type: :class:`float` + """ + if self._contact_precision is None: + self._compute_contact_scores() + return self._contact_precision + + @property + def contact_recall(self): + """ Fraction of target contacts that are correctly reproduced in model + + :type: :class:`float` + """ + if self._contact_recall is None: + self._compute_contact_scores() + return self._contact_recall + + @property + def ics(self): + """ ICS (Interface Contact Similarity) score + + Combination of :attr:`~contact_precision` and :attr:`~contact_recall` + using the F1-measure + + :type: :class:`float` + """ + if self._ics is None: + self._compute_contact_scores() + return self._ics + + @property + def dockq_target_interfaces(self): + """ Interfaces in :attr:`target` that are relevant for DockQ + + In principle a subset of :attr:`~contact_target_interfaces` that only + contains peptide sequences. + + :type: :class:`list` of :class:`tuple` with 2 elements each: + (trg_ch1, trg_ch2) + """ + if self._dockq_target_interfaces is None: + self._dockq_target_interfaces = list() + pep_seqs = set([s.GetName() for s in self.chain_mapper.polypep_seqs]) + for interface in self.contact_target_interfaces: + if interface[0] in pep_seqs and interface[1] in pep_seqs: + self._dockq_target_interfaces.append(interface) + return self._dockq_target_interfaces + + @property + def dockq_interfaces(self): + """ Interfaces in :attr:`dockq_target_interfaces` that can be mapped + to model + + :type: :class:`list` of :class:`tuple` with 4 elements each: + (trg_ch1, trg_ch2, mdl_ch1, mdl_ch2) + """ + if self._dockq_interfaces is None: + self._dockq_interfaces = list() + flat_mapping = self.mapping.GetFlatMapping() + for i in self.dockq_target_interfaces: + if i[0] in flat_mapping and i[1] in flat_mapping: + self._dockq_interfaces.append((i[0], i[1], + flat_mapping[i[0]], + flat_mapping[i[1]])) + return self._dockq_interfaces + @property def dockq_scores(self): - """ DockQ scores for interfaces in :attr:`~interfaces` + """ DockQ scores for interfaces in :attr:`~dockq_interfaces` :class:`list` of :class:`float` """ if self._dockq_scores is None: - self._compute_per_interface_scores() + self._compute_dockq_scores() return self._dockq_scores @property def fnat(self): - """ fnat scores for interfaces in :attr:`~interfaces` + """ fnat scores for interfaces in :attr:`~dockq_interfaces` fnat: Fraction of native contacts that are also present in model :class:`list` of :class:`float` """ if self._fnat is None: - self._compute_per_interface_scores() + self._compute_dockq_scores() return self._fnat @property def fnonnat(self): - """ fnonnat scores for interfaces in :attr:`~interfaces` + """ fnonnat scores for interfaces in :attr:`~dockq_interfaces` fnat: Fraction of model contacts that are not present in target :class:`list` of :class:`float` """ if self._fnonnat is None: - self._compute_per_interface_scores() + self._compute_dockq_scores() return self._fnonnat @property def irmsd(self): - """ irmsd scores for interfaces in :attr:`~interfaces` + """ irmsd scores for interfaces in :attr:`~dockq_interfaces` irmsd: RMSD of interface (RMSD computed on N, CA, C, O atoms) which consists of each residue that has at least one heavy atom within 10A of @@ -687,12 +827,12 @@ class Scorer: :class:`list` of :class:`float` """ if self._irmsd is None: - self._compute_per_interface_scores() + self._compute_dockq_scores() return self._irmsd @property def lrmsd(self): - """ lrmsd scores for interfaces in :attr:`~interfaces` + """ lrmsd scores for interfaces in :attr:`~dockq_interfaces` lrmsd: The interfaces are superposed based on the receptor (rigid min RMSD superposition) and RMSD for the ligand is reported. @@ -703,31 +843,8 @@ class Scorer: :class:`list` of :class:`float` """ if self._lrmsd is None: - self._compute_per_interface_scores() + self._compute_dockq_scores() return self._lrmsd - - @property - def nonmapped_interfaces(self): - """ Interfaces present in target that are not mapped - - At least one of the chains is not present in target - - :type: :class:`list` of :class:`tuple` with two elements each: - (trg_ch1, trg_ch2) - """ - if self._nonmapped_interfaces is None: - self._compute_per_interface_scores() - return self._nonmapped_interfaces - - @property - def nonmapped_interfaces_contacts(self): - """ Number of native contacts in :attr:`~nonmapped_interfaces` - - :type: :class:`list` of :class:`int` - """ - if self._nonmapped_interfaces_contacts is None: - self._compute_per_interface_scores() - return self._nonmapped_interfaces_contacts @property def dockq_ave(self): @@ -740,42 +857,42 @@ class Scorer: :type: :class:`float` """ if self._dockq_ave is None: - self._compute_per_interface_scores() + self._compute_dockq_scores() return self._dockq_ave @property def dockq_wave(self): - """ Same as :attr:`dockq_ave`, weighted by :attr:`native_contacts` + """ Same as :attr:`dockq_ave`, weighted by native contacts :type: :class:`float` """ if self._dockq_wave is None: - self._compute_per_interface_scores() + self._compute_dockq_scores() return self._dockq_wave @property def dockq_ave_full(self): """ Same as :attr:`~dockq_ave` but penalizing for missing interfaces - Interfaces in :attr:`nonmapped_interfaces` are added as 0.0 + Interfaces that are not covered in model are added as 0.0 in average computation. :type: :class:`float` """ if self._dockq_ave_full is None: - self._compute_per_interface_scores() + self._compute_dockq_scores() return self._dockq_ave_full @property def dockq_wave_full(self): """ Same as :attr:`~dockq_ave_full`, but weighted - Interfaces in :attr:`nonmapped_interfaces` are added as 0.0 in + Interfaces that are not covered in model are added as 0.0 in average computations and the respective weights are derived from - :attr:`~nonmapped_interfaces_contacts` + number of contacts in respective target interface. """ if self._dockq_wave_full is None: - self._compute_per_interface_scores() + self._compute_dockq_scores() return self._dockq_wave_full @property @@ -1168,88 +1285,90 @@ class Scorer: self._qs_global = qs_score_result.QS_global self._qs_best = qs_score_result.QS_best - def _compute_per_interface_scores(self): - # list of [trg_ch1, trg_ch2, mdl_ch1, mdl_ch2] - self._interfaces = list() - # lists with respective values for these interfaces - self._native_contacts = list() - self._model_contacts = list() - self._interface_qs_global = list() - self._interface_qs_best = list() + def _compute_per_interface_qs_scores(self): + self._per_interface_qs_global = list() + self._per_interface_qs_best = list() + + for interface in self.qs_interfaces: + trg_ch1 = interface[0] + trg_ch2 = interface[1] + mdl_ch1 = interface[2] + mdl_ch2 = interface[3] + qs_res = self.qs_scorer.ScoreInterface(trg_ch1, trg_ch2, + mdl_ch1, mdl_ch2) + self._per_interface_qs_best.append(qs_res.QS_best) + self._per_interface_qs_global.append(qs_res.QS_global) + + def _compute_contact_scores(self): + contact_scorer_res = self.contact_scorer.Score(self.mapping.mapping) + self._contact_precision = contact_scorer_res.precision + self._contact_recall = contact_scorer_res.recall + self._ics = contact_scorer_res.ics + + def _compute_dockq_scores(self): + + # lists with values in contact_target_interfaces self._dockq_scores = list() self._fnat = list() self._fnonnat = list() self._irmsd = list() self._lrmsd = list() - # list of interfaces which are present in target but not mapped, i.e. - # not present in mdl - self._nonmapped_interfaces = list() - self._nonmapped_interfaces_contacts = list() - - nonmapped_interface_counts = list() - - flat_mapping = self.mapping.GetFlatMapping() - pep_seqs = set([s.GetName() for s in self.chain_mapper.polypep_seqs]) + # keep track of native counts for weights in dockq_wave/dockq_wave_full + native_counts = list() dockq_alns = dict() for aln in self.aln: - trg_ch = aln.GetSequence(0).name - if trg_ch in pep_seqs: - mdl_ch = aln.GetSequence(1).name - dockq_alns[(trg_ch, mdl_ch)] = aln - - for trg_int in self.qs_scorer.qsent1.interacting_chains: - trg_ch1 = trg_int[0] - trg_ch2 = trg_int[1] - if trg_ch1 in pep_seqs and trg_ch2 in pep_seqs: - if trg_ch1 in flat_mapping and trg_ch2 in flat_mapping: - mdl_ch1 = flat_mapping[trg_ch1] - mdl_ch2 = flat_mapping[trg_ch2] - aln1 = dockq_alns[(trg_ch1, mdl_ch1)] - aln2 = dockq_alns[(trg_ch2, mdl_ch2)] - res = dockq.DockQ(self.model, self.target, mdl_ch1, mdl_ch2, - trg_ch1, trg_ch2, ch1_aln=aln1, - ch2_aln=aln2) - if res["nnat"] > 0: - self._interfaces.append((trg_ch1, trg_ch2, - mdl_ch1, mdl_ch2)) - self._native_contacts.append(res["nnat"]) - self._model_contacts.append(res["nmdl"]) - self._fnat.append(res["fnat"]) - self._fnonnat.append(res["fnonnat"]) - self._irmsd.append(res["irmsd"]) - self._lrmsd.append(res["lrmsd"]) - self._dockq_scores.append(res["DockQ"]) - qs_res = self.qs_scorer.ScoreInterface(trg_ch1, trg_ch2, - mdl_ch1, mdl_ch2) - self._interface_qs_best.append(qs_res.QS_best) - self._interface_qs_global.append(qs_res.QS_global) - else: - # interface which is not covered by mdl... let's run DockQ - # with trg as trg/mdl in order to get the native contacts - # out - # no need to pass alns as the residue numbers match for sure - res = dockq.DockQ(self.target, self.target, - trg_ch1, trg_ch2, trg_ch1, trg_ch2) - nnat = res["nnat"] - if nnat > 0: - self._nonmapped_interfaces.append((trg_ch1, trg_ch2)) - self._nonmapped_interfaces_contacts.append(nnat) - + trg_s = aln.GetSequence(0) + mdl_s = aln.GetSequence(1) + dockq_alns[(trg_s.GetName(), mdl_s.GetName())] = aln + + for interface in self.dockq_interfaces: + trg_ch1 = interface[0] + trg_ch2 = interface[1] + mdl_ch1 = interface[2] + mdl_ch2 = interface[3] + aln1 = dockq_alns[(trg_ch1, mdl_ch1)] + aln2 = dockq_alns[(trg_ch2, mdl_ch2)] + res = dockq.DockQ(self.model, self.target, mdl_ch1, mdl_ch2, + trg_ch1, trg_ch2, ch1_aln=aln1, + ch2_aln=aln2) + self._fnat.append(res["fnat"]) + self._fnonnat.append(res["fnonnat"]) + self._irmsd.append(res["irmsd"]) + self._lrmsd.append(res["lrmsd"]) + self._dockq_scores.append(res["DockQ"]) + native_counts.append(res["nnat"]) + + # keep track of native counts in target interfaces which are + # not covered in model in order to compute + # dockq_ave_full/dockq_wave_full in the end + not_covered_counts = list() + proc_trg_interfaces = set([(x[0], x[1]) for x in self.dockq_interfaces]) + for interface in self.dockq_target_interfaces: + if interface not in proc_trg_interfaces: + # let's run DockQ with trg as trg/mdl in order to get the native + # contacts out - no need to pass alns as the residue numbers + # match for sure + trg_ch1 = interface[0] + trg_ch2 = interface[1] + res = dockq.DockQ(self.target, self.target, + trg_ch1, trg_ch2, trg_ch1, trg_ch2) + not_covered_counts.apend(res["nnat"]) + # there are 4 types of combined scores # - simple average # - average weighted by native_contacts - # - the two above including nonmapped_interfaces => set DockQ to 0.0 - scores = np.array(self._dockq_scores) - weights = np.array(self._native_contacts) + # - the two above including nonmapped_contact_interfaces => set DockQ to 0.0 + scores = np.array([self._dockq_scores]) + weights = np.array([native_counts]) if len(scores) > 0: self._dockq_ave = np.mean(scores) else: self._dockq_ave = 0.0 self._dockq_wave = np.sum(np.multiply(weights/np.sum(weights), scores)) - scores = np.append(scores, [0.0]*len(self._nonmapped_interfaces)) - weights = np.append(weights, self._nonmapped_interfaces_contacts) + scores = np.append(scores, [0.0]*len(not_covered_counts)) + weights = np.append(weights, not_covered_counts) if len(scores) > 0: self._dockq_ave_full = np.mean(scores) else: