diff --git a/convert_to_modelcif.py b/convert_to_modelcif.py index 18820683004f04d21f1cfbbbb548dfa667a92c37..0c7bb5876272e417d0253734cb182b12173d493b 100755 --- a/convert_to_modelcif.py +++ b/convert_to_modelcif.py @@ -74,6 +74,20 @@ class _LocalPLDDT(modelcif.qa_metric.Local, modelcif.qa_metric.PLDDT): software = None +class _PAE(modelcif.qa_metric.MetricType): + """Predicted aligned error (in Angstroms)""" + + type = "PAE" + other_details = None + + +class _LocalPairwisePAE(modelcif.qa_metric.LocalPairwise, _PAE): + """Predicted aligned error (in Angstroms)""" + + name = "PAE" + software = None + + # pylint: enable=too-few-public-methods @@ -117,9 +131,11 @@ class _Biopython2ModelCIF(modelcif.model.AbInitioModel): # iterate polypetide chains # local PLDDT i = 0 + lpae = [] # aa_only=False includes non-canonical amino acids but seems to skip # non-peptide-linking residues like ions - for chn_i in PPBuilder().build_peptides(self.structure, aa_only=False): + polypeptides = PPBuilder().build_peptides(self.structure, aa_only=False) + for chn_i in polypeptides: for res_i in chn_i: # local pLDDT # Assertion assumes that pLDDT values are also stored in the @@ -137,8 +153,23 @@ class _Biopython2ModelCIF(modelcif.model.AbInitioModel): # pairwise alignment error j = 0 + # We do a 2nd iteration over the structure instead of doing + # index magic because it keeps the code cleaner and should not + # be noticeably slower than iterating the array directly. + # Majority of time goes into writing files, anyway. + for chn_j in polypeptides: + for res_j in chn_j: + lpae.append( + _LocalPairwisePAE( + self.asym[res_i.parent.id].residue(res_i.id[1]), + self.asym[res_j.parent.id].residue(res_j.id[1]), + scores_json["pae"][i][j], + ) + ) + j += 1 i += 1 + self.qa_metrics.extend(lpae) def _get_modelcif_entities(target_ents, asym_units, system): @@ -343,12 +374,13 @@ def _get_scores(cif_json: dict, scr_file: str) -> None: """Add scores to JSON data.""" with open(scr_file, "rb") as sfh: scr_dict = pickle.load(sfh) - # dict_keys(['distogram', 'experimentally_resolved', 'masked_msa', 'num_recycles', 'predicted_aligned_error', 'structure_module', 'plddt', 'aligned_confidence_probs', 'max_predicted_aligned_error', 'ranking_confidence']) + # dict_keys(['distogram', 'experimentally_resolved', 'masked_msa', 'num_recycles', 'structure_module', 'aligned_confidence_probs', 'max_predicted_aligned_error', 'ranking_confidence']) # Get pLDDT as a list, the global pLDDT is the average, calculated on the # spot. cif_json["plddt"] = scr_dict["plddt"] cif_json["ptm"] = float(scr_dict["ptm"]) cif_json["iptm"] = float(scr_dict["iptm"]) + cif_json["pae"] = scr_dict["predicted_aligned_error"] def alphapulldown_model_to_modelcif(