diff --git a/actions/ost-compare-structures b/actions/ost-compare-structures index 7f35291ac4e2b14c2c5b855bfb2a05a889cfb704..82489c26e7439c3bbc71947846038bd1ab326583 100644 --- a/actions/ost-compare-structures +++ b/actions/ost-compare-structures @@ -345,7 +345,8 @@ def _ParseArgs(): help=("Compute DockQ scores and its components. Relevant interfaces " "with at least one contact (any atom within 5A) of the reference " "structure are available as key \"dockq_reference_interfaces\". " - "Only interfaces between peptide chains are considered here! " + "Protein-protein, protein-nucleotide and nucleotide-nucleotide " + "interfaces are considered. " "Key \"dockq_interfaces\" is a subset of " "\"dockq_reference_interfaces\" that contains interfaces that " "can be mapped to the model. They are stored as lists in format " @@ -383,7 +384,8 @@ def _ParseArgs(): "8A in combination with only considering CB atoms for " "protein-peptide interactions. " "Note that the resulting DockQ is not evaluated for these " - "slightly updated fnat and irmsd (lrmsd stays the same)." + "slightly updated fnat and irmsd (lrmsd stays the same). " + "Raises an error if reference contains nucleotide chains. " "This flag has no influence on patch_dockq scores.")) parser.add_argument( diff --git a/modules/doc/actions.rst b/modules/doc/actions.rst index ac3de2abbf0cb9c831c067ea5660cc078b110aec..fb5c7b9a0ac82242bc576f076bda82850a195d6f 100644 --- a/modules/doc/actions.rst +++ b/modules/doc/actions.rst @@ -252,28 +252,29 @@ Details on the usage (output of ``ost compare-structures --help``): --dockq Compute DockQ scores and its components. Relevant interfaces with at least one contact (any atom within 5A) of the reference structure are available as key - "dockq_reference_interfaces". Only interfaces between - peptide chains are considered here! Key - "dockq_interfaces" is a subset of - "dockq_reference_interfaces" that contains interfaces - that can be mapped to the model. They are stored as - lists in format [ref_ch1, ref_ch2, mdl_ch1, mdl_ch2]. - The respective DockQ scores for "dockq_interfaces" are - available as key "dockq". It's components are - available as keys: "fnat" (fraction of reference - contacts which are also there in model) "irmsd" - (interface RMSD), "lrmsd" (ligand RMSD). The DockQ - score is strictly designed to score each interface - individually. We also provide two averaged versions to - get one full model score: "dockq_ave", "dockq_wave". - The first is simply the average of "dockq_scores", the - latter is a weighted average with weights derived from - number of contacts in the reference interfaces. These - two scores only consider interfaces that are present - in both, the model and the reference. "dockq_ave_full" - and "dockq_wave_full" add zeros in the average - computation for each interface that is only present in - the reference but not in the model. + "dockq_reference_interfaces". Protein-protein, + protein-nucleotide and nucleotide-nucleotide + interfaces are considered. Key "dockq_interfaces" is a + subset of "dockq_reference_interfaces" that contains + interfaces that can be mapped to the model. They are + stored as lists in format [ref_ch1, ref_ch2, mdl_ch1, + mdl_ch2]. The respective DockQ scores for + "dockq_interfaces" are available as key "dockq". It's + components are available as keys: "fnat" (fraction of + reference contacts which are also there in model) + "irmsd" (interface RMSD), "lrmsd" (ligand RMSD). The + DockQ score is strictly designed to score each + interface individually. We also provide two averaged + versions to get one full model score: "dockq_ave", + "dockq_wave". The first is simply the average of + "dockq_scores", the latter is a weighted average with + weights derived from number of contacts in the + reference interfaces. These two scores only consider + interfaces that are present in both, the model and the + reference. "dockq_ave_full" and "dockq_wave_full" add + zeros in the average computation for each interface + that is only present in the reference but not in the + model. --dockq-capri-peptide Flag that changes two things in the way DockQ and its underlying scores are computed which is proposed by @@ -289,8 +290,9 @@ Details on the usage (output of ``ost compare-structures --help``): only considering CB atoms for protein-peptide interactions. Note that the resulting DockQ is not evaluated for these slightly updated fnat and irmsd - (lrmsd stays the same).This flag has no influence on - patch_dockq scores. + (lrmsd stays the same). Raises an error if reference + contains nucleotide chains. This flag has no influence + on patch_dockq scores. --ics Computes interface contact similarity (ICS) related scores. A contact between two residues of different chains is defined as having at least one heavy atom diff --git a/modules/mol/alg/pymod/dockq.py b/modules/mol/alg/pymod/dockq.py index 7be3ca5d2a235bc58f5f15a7172a32f3a1da1e88..b448e8b1e29262a097e8ba4f7556f2a6705bd1ad 100644 --- a/modules/mol/alg/pymod/dockq.py +++ b/modules/mol/alg/pymod/dockq.py @@ -158,7 +158,9 @@ def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0, cb_mode=False): # backbone atoms used for superposition - sup_atoms = ['CA','C','N','O'] + sup_atoms = ["CA","C","N","O", + "P", "OP1", "OP2", "O2'", "O3'", "O4'", "O5'", "C1'", "C2'", + "C3'", "C4'", "C5'"] # make mapped residues accessible by the dockq_idx property mapped_mdl = mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch1)},{mol.QueryQuoteName(mdl_ch2)} and grdockq_mapped=1") diff --git a/modules/mol/alg/pymod/scoring.py b/modules/mol/alg/pymod/scoring.py index 13cc7615eab7157f6be7c3d7b938e77ea641f704..37304aa94cae1a4daacefd499d1fc0bd5da897ba 100644 --- a/modules/mol/alg/pymod/scoring.py +++ b/modules/mol/alg/pymod/scoring.py @@ -1104,7 +1104,9 @@ class Scorer: """ Interfaces in :attr:`target` that are relevant for DockQ All interfaces in :attr:`~target` with non-zero contacts that are - relevant for DockQ. Chain names are lexicographically sorted. + relevant for DockQ. Includes protein-protein, protein-nucleotide and + nucleotide-nucleotide interfaces. Chain names for each interface are + lexicographically sorted. :type: :class:`list` of :class:`tuple` with 2 elements each: (trg_ch1, trg_ch2) @@ -1122,19 +1124,13 @@ class Scorer: interfaces = cent.interacting_chains interfaces = [(min(x[0],x[1]), max(x[0],x[1])) for x in interfaces] - nuc_seqs = set([s.GetName() for s in self.chain_mapper.polynuc_seqs]) - interface_chains = {c for i in interfaces for c in i} - nuc_interface_chains = interface_chains.intersection(nuc_seqs) - if nuc_interface_chains: - msg = "OST doesn't support nucleic acid chains for DockQ: " - msg += ", ".join(nuc_interface_chains) - raise NotImplementedError(msg) - - # select the ones with only peptides involved pep_seqs = set([s.GetName() for s in self.chain_mapper.polypep_seqs]) + nuc_seqs = set([s.GetName() for s in self.chain_mapper.polynuc_seqs]) + + seqs = pep_seqs.union(nuc_seqs) self._dockq_target_interfaces = list() for interface in interfaces: - if interface[0] in pep_seqs and interface[1] in pep_seqs: + if interface[0] in seqs and interface[1] in seqs: self._dockq_target_interfaces.append(interface) return self._dockq_target_interfaces @@ -1217,9 +1213,11 @@ class Scorer: def irmsd(self): """ irmsd scores for interfaces in :attr:`~dockq_interfaces` - irmsd: RMSD of interface (RMSD computed on N, CA, C, O atoms) which + irmsd: RMSD of interface (RMSD computed on backbone atoms) which consists of each residue that has at least one heavy atom within 10A of - other chain. + other chain. Backbone atoms for proteins: "CA","C","N","O", for + nucleotides: "P", "OP1", "OP2", "O2'", "O3'", "O4'", "O5'", "C1'", + "C2'", "C3'", "C4'", "C5'". :class:`list` of :class:`float` """ @@ -1231,11 +1229,10 @@ class Scorer: def lrmsd(self): """ lrmsd scores for interfaces in :attr:`~dockq_interfaces` - lrmsd: The interfaces are superposed based on the receptor (rigid - min RMSD superposition) and RMSD for the ligand is reported. - Superposition and RMSD are based on N, CA, C and O positions, - receptor is the chain contributing to the interface with more - residues in total. + lrmsd: The two chains involved in the interface are superposed based on + the receptor (rigid min RMSD superposition) and the ligand RMSD is + reported. Receptor is the chain with more residues. Superposition and + RMSD is computed on same backbone atoms as :attr:`irmsd`. :class:`list` of :class:`float` """ @@ -2050,6 +2047,12 @@ class Scorer: def _compute_dockq_scores(self): LogScript("Computing DockQ") + + if self.dockq_capri_peptide and len(self.chain_mapper.polynuc_seqs) > 0: + raise RuntimeError("Cannot compute DockQ for reference structures " + "with nucleotide chains if dockq_capri_peptide " + "is enabled.") + # lists with values in contact_target_interfaces self._dockq_scores = list() self._fnat = list()