Added PAE QA metric

50f76d7e · Bienchen · 490cdfe0 · 50f76d7e
Commit 50f76d7e authored 1 year ago by Bienchen
--- a/convert_to_modelcif.py
+++ b/convert_to_modelcif.py
@@ -74,6 +74,20 @@ class _LocalPLDDT(modelcif.qa_metric.Local, modelcif.qa_metric.PLDDT):
    software = None


+class _PAE(modelcif.qa_metric.MetricType):
+    """Predicted aligned error (in Angstroms)"""
+
+    type = "PAE"
+    other_details = None
+
+
+class _LocalPairwisePAE(modelcif.qa_metric.LocalPairwise, _PAE):
+    """Predicted aligned error (in Angstroms)"""
+
+    name = "PAE"
+    software = None
+
+
 # pylint: enable=too-few-public-methods


@@ -117,9 +131,11 @@ class _Biopython2ModelCIF(modelcif.model.AbInitioModel):
        # iterate polypetide chains
        # local PLDDT
        i = 0
+        lpae = []
        # aa_only=False includes non-canonical amino acids but seems to skip
        # non-peptide-linking residues like ions
-        for chn_i in PPBuilder().build_peptides(self.structure, aa_only=False):
+        polypeptides = PPBuilder().build_peptides(self.structure, aa_only=False)
+        for chn_i in polypeptides:
            for res_i in chn_i:
                # local pLDDT
                # Assertion assumes that pLDDT values are also stored in the
@@ -137,8 +153,23 @@ class _Biopython2ModelCIF(modelcif.model.AbInitioModel):

                # pairwise alignment error
                j = 0
+                # We do a 2nd iteration over the structure instead of doing
+                # index magic because it keeps the code cleaner and should not
+                # be noticeably slower than iterating the array directly.
+                # Majority of time goes into writing files, anyway.
+                for chn_j in polypeptides:
+                    for res_j in chn_j:
+                        lpae.append(
+                            _LocalPairwisePAE(
+                                self.asym[res_i.parent.id].residue(res_i.id[1]),
+                                self.asym[res_j.parent.id].residue(res_j.id[1]),
+                                scores_json["pae"][i][j],
+                            )
+                        )
+                        j += 1

                i += 1
+        self.qa_metrics.extend(lpae)


 def _get_modelcif_entities(target_ents, asym_units, system):
@@ -343,12 +374,13 @@ def _get_scores(cif_json: dict, scr_file: str) -> None:
    """Add scores to JSON data."""
    with open(scr_file, "rb") as sfh:
        scr_dict = pickle.load(sfh)
-    # dict_keys(['distogram', 'experimentally_resolved', 'masked_msa', 'num_recycles', 'predicted_aligned_error', 'structure_module', 'plddt', 'aligned_confidence_probs', 'max_predicted_aligned_error', 'ranking_confidence'])
+    # dict_keys(['distogram', 'experimentally_resolved', 'masked_msa', 'num_recycles', 'structure_module', 'aligned_confidence_probs', 'max_predicted_aligned_error', 'ranking_confidence'])
    # Get pLDDT as a list, the global pLDDT is the average, calculated on the
    # spot.
    cif_json["plddt"] = scr_dict["plddt"]
    cif_json["ptm"] = float(scr_dict["ptm"])
    cif_json["iptm"] = float(scr_dict["iptm"])
+    cif_json["pae"] = scr_dict["predicted_aligned_error"]


 def alphapulldown_model_to_modelcif(