Skip to content
Snippets Groups Projects
Commit d45d73c3 authored by Bienchen's avatar Bienchen
Browse files

Add software to AF2 scoresD

parent c1006128
Branches
No related tags found
No related merge requests found
# Don't have Emac's backup files # Don't have Emac's backup files
*~ *~
# ignore test scripts
biop-test.py
junk.py
# ignore some files used for testing # ignore some files used for testing
1ake.1.pdb 1ake.1.pdb
3lre.3.A.pdb 3lre.3.A.pdb
6xne.pdb 6xne.pdb
cmp.cif
...@@ -20,6 +20,7 @@ from Bio.PDB.Structure import Structure as BioStructure ...@@ -20,6 +20,7 @@ from Bio.PDB.Structure import Structure as BioStructure
from absl import app, flags, logging from absl import app, flags, logging
import numpy as np import numpy as np
import ihm.citations
import modelcif import modelcif
import modelcif.associated import modelcif.associated
import modelcif.dumper import modelcif.dumper
...@@ -120,8 +121,13 @@ class _Biopython2ModelCIF(modelcif.model.AbInitioModel): ...@@ -120,8 +121,13 @@ class _Biopython2ModelCIF(modelcif.model.AbInitioModel):
occupancy=atm.occupancy, occupancy=atm.occupancy,
) )
def add_scores(self, scores_json, entry_id, file_prefix): def add_scores(self, scores_json, entry_id, file_prefix, sw_dct):
"""Add QA metrics""" """Add QA metrics"""
_GlobalPLDDT.software = sw_dct["alphafold"]
_GlobalPTM.software = sw_dct["alphafold"]
_GlobalIPTM.software = sw_dct["alphafold"]
_LocalPLDDT.software = sw_dct["alphafold"]
_LocalPairwisePAE.software = sw_dct["alphafold"]
# global scores # global scores
self.qa_metrics.extend( self.qa_metrics.extend(
( (
...@@ -254,9 +260,14 @@ def _store_as_modelcif( ...@@ -254,9 +260,14 @@ def _store_as_modelcif(
name="ToDo: Model <N> (ranked #<M>)", name="ToDo: Model <N> (ranked #<M>)",
) )
# create software list from feature metadata
sw_dct = _get_software_data(data_json["__meta__"])
# process scores # process scores
mdl_file = os.path.splitext(os.path.basename(mdl_file))[0] mdl_file = os.path.splitext(os.path.basename(mdl_file))[0]
system.repositories.append(model.add_scores(data_json, system.id, mdl_file)) system.repositories.append(
model.add_scores(data_json, system.id, mdl_file, sw_dct)
)
system.model_groups.append(modelcif.model.ModelGroup([model])) system.model_groups.append(modelcif.model.ModelGroup([model]))
...@@ -316,9 +327,16 @@ def _compress_cif_file(cif_file): ...@@ -316,9 +327,16 @@ def _compress_cif_file(cif_file):
def _get_model_details(cmplx_name: str, data_json: dict) -> str: def _get_model_details(cmplx_name: str, data_json: dict) -> str:
"""Get the model description.""" """Get the model description."""
ap_versions = [] ap_versions = []
for mnmr in data_json["__meta__"]: for mnmr in data_json["__meta__"]: # mnmr = monomer
if data_json["__meta__"][mnmr]["ap_version"] not in ap_versions: if (
ap_versions.append(data_json["__meta__"][mnmr]["ap_version"]) data_json["__meta__"][mnmr]["software"]["alphapulldown"]["version"]
not in ap_versions
):
ap_versions.append(
data_json["__meta__"][mnmr]["software"]["alphapulldown"][
"version"
]
)
# ToDo: fetch AF2 version/ have it in metadata JSON # ToDo: fetch AF2 version/ have it in metadata JSON
return ( return (
...@@ -348,7 +366,13 @@ def _get_feature_metadata( ...@@ -348,7 +366,13 @@ def _get_feature_metadata(
# ToDo: make sure that its always ASCII # ToDo: make sure that its always ASCII
with open(feature_json, "r", encoding="ascii") as jfh: with open(feature_json, "r", encoding="ascii") as jfh:
jdata = json.load(jfh) jdata = json.load(jfh)
modelcif_json["__meta__"][mnmr]["ap_version"] = jdata["version"] modelcif_json["__meta__"][mnmr]["software"] = jdata["binaries"]
modelcif_json["__meta__"][mnmr]["software"]["alphapulldown"] = {
"version": jdata["version"]
}
modelcif_json["__meta__"][mnmr]["software"]["alphafold"] = {
"version": jdata["AlphaFold version"]
}
return cmplx_name return cmplx_name
...@@ -418,7 +442,6 @@ def _get_scores(cif_json: dict, scr_file: str) -> None: ...@@ -418,7 +442,6 @@ def _get_scores(cif_json: dict, scr_file: str) -> None:
"""Add scores to JSON data.""" """Add scores to JSON data."""
with open(scr_file, "rb") as sfh: with open(scr_file, "rb") as sfh:
scr_dict = pickle.load(sfh) scr_dict = pickle.load(sfh)
# dict_keys(['distogram', 'experimentally_resolved', 'masked_msa', 'num_recycles', 'structure_module', 'aligned_confidence_probs', 'max_predicted_aligned_error', 'ranking_confidence'])
# Get pLDDT as a list, the global pLDDT is the average, calculated on the # Get pLDDT as a list, the global pLDDT is the average, calculated on the
# spot. # spot.
cif_json["plddt"] = scr_dict["plddt"] cif_json["plddt"] = scr_dict["plddt"]
...@@ -427,6 +450,79 @@ def _get_scores(cif_json: dict, scr_file: str) -> None: ...@@ -427,6 +450,79 @@ def _get_scores(cif_json: dict, scr_file: str) -> None:
cif_json["pae"] = scr_dict["predicted_aligned_error"] cif_json["pae"] = scr_dict["predicted_aligned_error"]
def _get_software_data(meta_json: dict) -> list:
"""Turn meta data about software into modelcif.Software objects."""
# {key from json: dict needed to produce sw entry plus internal key}
sw_data = {
"jackhmmer": None,
"hhblits": None,
"hhsearch": None,
"hmmsearch": None,
"hmmbuild": None,
"kalign": None,
"alphapulldown": None,
"alphafold": modelcif.Software(
"AlphaFold-Multimer",
"model building",
"Structure prediction",
"https://github.com/deepmind/alphafold",
"package",
None,
ihm.Citation(
pmid=None,
title="Protein complex prediction with AlphaFold-Multimer.",
journal="bioRxiv",
volume=None,
page_range=None,
year=2021,
authors=[
"Evans, R.",
"O'Neill, M.",
"Pritzel, A.",
"Antropova, N.",
"Senior, A.",
"Green, T.",
"Zidek, A.",
"Bates, R.",
"Blackwell, S.",
"Yim, J.",
"Ronneberger, O.",
"Bodenstein, S.",
"Zielinski, M.",
"Bridgland, A.",
"Potapenko, A.",
"Cowie, A.",
"Tunyasuvunakool, K.",
"Jain, R.",
"Clancy, E.",
"Kohli, P.",
"Jumper, J.",
"Hassabis, D.",
],
doi="10.1101/2021.10.04.463034",
),
),
}
for data in meta_json.values():
for sftwr, version in data["software"].items():
if sftwr not in sw_data:
raise RuntimeError(
"Unknown software found in meta data: " + f"'{sftwr}'"
)
version = version["version"]
if sw_data[sftwr] is not None:
if sw_data[sftwr].version is not None:
if sw_data[sftwr].version != version:
raise RuntimeError(
"Software versions differ for "
+ f"'{sftwr}': '{sw_data[sftwr].version}' vs. "
+ f"'{version}'"
)
sw_data[sftwr].version = version
return sw_data
def alphapulldown_model_to_modelcif( def alphapulldown_model_to_modelcif(
cmplx_name: str, cmplx_name: str,
mdl_file: str, mdl_file: str,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment