Skip to content
Snippets Groups Projects
Commit 5f2676e3 authored by Bienchen's avatar Bienchen
Browse files

Get draft of ModelCIF 'header'

parent e4f5df09
Branches
No related tags found
No related merge requests found
......@@ -3,11 +3,15 @@
"""Take the output of the AlphaPulldown pipeline and turn it into a ModelCIF
file with a lot of metadata in place."""
from typing import Tuple
import json
import os
import sys
from absl import app, flags, logging
import modelcif
import modelcif.dumper
# ToDo: Get options properly, best get the same names as used in existing
# scripts, e.g. could '--monomer_objects_dir' be used as feature
......@@ -30,44 +34,144 @@ FLAGS = flags.FLAGS
# exist as expected.
def alphapulldown_model_to_modelcif() -> None:
def _store_as_modelcif(
data_json: dict,
mdl_file: str,
out_dir: str,
# ost_ent, file_prfx, compress, add_files
) -> None:
"""Create the actual ModelCIF file."""
system = modelcif.System(
title=data_json["_struct.title"],
id=data_json["data_"].upper(),
model_details=data_json["_struct.pdbx_model_details"],
)
# write modelcif.System to file
# NOTE: this will dump PAE on path provided in add_scores
# -> hence we cheat by changing path and back while being exception-safe...
oldpwd = os.getcwd()
os.chdir(out_dir)
try:
with open(
f"{os.path.splitext(os.path.basename(mdl_file))[0]}.cif",
"w",
encoding="ascii",
) as mmcif_fh:
modelcif.dumper.write(mmcif_fh, [system])
# _package_associated_files(system.repositories[0])
# if compress:
# _compress_cif_file(mdl_fle)
finally:
os.chdir(oldpwd)
def _get_model_details(cmplx_name: str, data_json: dict) -> str:
"""Get the model description."""
ap_versions = []
for mnmr in data_json["__meta__"]:
if data_json["__meta__"][mnmr]["ap_version"] not in ap_versions:
ap_versions.append(data_json["__meta__"][mnmr]["ap_version"])
# ToDo: fetch AF2 version/ have it in metadata JSON
return (
f"Model generated for {' and '.join(cmplx_name)}, produced "
+ "using AlphaFold-Multimer (<AF2 VERSION>) as implemented by "
+ f"AlphaPulldown ({', '.join(ap_versions)})."
)
def _get_feature_metadata(
modelcif_json: dict, cmplx_name: str, prj_dir: str
) -> list:
"""Read metadata from a feature JSON file."""
cmplx_name = cmplx_name.split("_and_")
prj_dir = os.path.join(prj_dir, "features_monomers")
if not os.path.isdir(prj_dir):
logging.info(f"No feature directory '{prj_dir}' found.")
sys.exit()
if "__meta__" not in modelcif_json:
modelcif_json["__meta__"] = {}
for mnmr in cmplx_name:
modelcif_json["__meta__"][mnmr] = {}
feature_json = os.path.join(prj_dir, f"{mnmr}_feature_metadata.json")
if not os.path.isfile(feature_json):
logging.info(f"No feature metadata file '{feature_json}' found.")
sys.exit()
# ToDo: make sure that its always ASCII
with open(feature_json, "r", encoding="ascii") as jfh:
jdata = json.load(jfh)
modelcif_json["__meta__"][mnmr]["ap_version"] = jdata["version"]
return cmplx_name
def _get_data_block_id_and_struct_and_entry_categories(
cif_json: dict, cmplx_name: str
) -> None:
"""Get 'data_' block ID and data for categories '_struct' and '_entry'."""
cif_json["data_"] = "_".join(cmplx_name)
cif_json["_struct.title"] = f"Prediction for {' and '.join(cmplx_name)}"
cif_json["_struct.pdbx_model_details"] = _get_model_details(
cmplx_name, cif_json
)
def alphapulldown_model_to_modelcif(
cmplx_name: str,
mdl_file: str,
out_dir: str,
prj_dir: str,
) -> None:
"""Convert an AlphaPulldown model into a ModelCIF formatted mmCIF file.
Metadata for the ModelCIF categories will be fetched from AlphaPulldown
output as far as possible. This expects modelling projects to exists in
AlphaPulldown's output directory structure."""
def _get_model_list(ap_dir, model_selected) -> list:
# ToDo: ENABLE logging.info(f"Processing '{mdl_file}'...")
modelcif_json = {}
# fetch metadata
cmplx_name = _get_feature_metadata(modelcif_json, cmplx_name, prj_dir)
# fetch/ assemble more data about the modelling experiment
_get_data_block_id_and_struct_and_entry_categories(
modelcif_json, cmplx_name
)
_store_as_modelcif(modelcif_json, mdl_file, out_dir)
# ToDo: ENABLE logging.info(f"... done with '{mdl_file}'")
def _get_model_list(ap_dir: str, model_selected: str) -> Tuple[str, str, list]:
"""Get the list of models to be converted.
If `model_selected` is none, all models will be marked for conversion."""
# ToDo: Question - use 'ranked_*.pdb' or
# 'unrelaxed_model_*_multimer_v3_pred_0.pdb' models?
mdl_path = os.path.join(ap_dir, "models")
cmplx = os.listdir(mdl_path)
# For now, exactly 1 complex is expected in the 'models' subdirectory. If
# there are more, the 'model_selected' mechanism needs to be further tuned
# to get to the right model.
assert len(cmplx) == 1
cmplx = cmplx[0]
mdl_path = os.path.join(mdl_path, cmplx)
models = []
if model_selected is not None:
mdl_paths = os.path.join(ap_dir, "models")
cmplx = os.listdir(mdl_paths)
# For now, exactly 1 complex is expected in the 'models' subdirectory.
# If there are more, the 'model_selected' mechanism needs to be further
# tuned to get to the right model.
assert len(cmplx) == 1
cmplx = cmplx[0]
mdl_paths = os.path.join(
mdl_paths, cmplx, f"ranked_{model_selected}.pdb"
)
mdl_paths = [mdl_paths]
models.append(os.path.join(mdl_path, f"ranked_{model_selected}.pdb"))
else:
for mdl in os.listdir(mdl_path):
if mdl.startswith("ranked_"):
models.append(os.path.join(mdl_path, mdl))
for mdl in mdl_paths:
if os.path.isfile(mdl):
# check that files actually exist
for mdl in models:
if not os.path.isfile(mdl):
logging.info(
f"Model file '{mdl}' does not exist or is not a regular file."
)
sys.exit()
# check that files actually exist
return []
return cmplx, mdl_path, models
def main(argv):
......@@ -92,10 +196,14 @@ def main(argv):
# pylint: enable=pointless-string-statement
del argv # Unused.
# make list of selected models
model_conversions = _get_model_list(FLAGS.ap_output, FLAGS.model_selected)
# assemble selected models into ModelCIF files + associated data archives
alphapulldown_model_to_modelcif()
# get list of selected models and assemble ModelCIF files + associated data
complex_name, model_dir, model_list = _get_model_list(
FLAGS.ap_output, FLAGS.model_selected
)
for mdl in model_list:
alphapulldown_model_to_modelcif(
complex_name, mdl, model_dir, FLAGS.ap_output
)
if __name__ == "__main__":
......@@ -106,5 +214,9 @@ if __name__ == "__main__":
# but we did that already in the past. Idea is to have all models
# available for... reproducibility and whatnot, but show the selected
# (representative) of the modelling experiment/ study more prominently.
# ToDo: Things to look at: '_struct.title', '_struct.pdbx_model_details',
# 'data_', '_entry', maybe have a user-defined JSON document with things
# like that, including author names?
# ToDo: where to store which model was chosen? Should be in Tara's models.
# LocalWords: ToDo AlphaPulldown PAEs dir
# LocalWords: ToDo AlphaPulldown PAEs dir struct
......@@ -9,8 +9,16 @@ dependencies = [
[tool.black]
line-length = 80
[tool.pylint.MAIN]
load-plugins = "pylint.extensions.bad_builtin"
[tool.pylint.REPORTS]
reports = "no"
[tool.pylint.FORMAT]
max-line-length = 81
[tool.pylint.deprecated_builtins]
# We want to use proper logging, so we can control *ALL* output bei the Abseil
# logger, hence: deprecate 'print'
bad-functions = ["map", "filter", "print"]
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment