Skip to content
Snippets Groups Projects
Commit d4835b37 authored by Bienchen's avatar Bienchen
Browse files

Add early protocol

parent ed96fc9c
No related branches found
No related tags found
No related merge requests found
......@@ -80,6 +80,13 @@ def _parse_args():
metavar="<OUTPUT DIR>",
help="Path to directory to store results.",
)
parser.add_argument(
"--af2-models",
default=None,
type=str,
metavar="<LIST FILE>",
help="Path to a txt file with models build with AF2, 1 ID per line.",
)
parser.add_argument(
"--compress",
default=False,
......@@ -415,12 +422,10 @@ def _get_modelcif_protocol_software(js_step):
return None
def _get_modelcif_protocol_data(data_label, target_entities, aln_data, model):
def _get_modelcif_protocol_data(data_label, target_entities, model):
"""Assemble data for a ModelCIF protocol step."""
if data_label == "target_sequences":
data = modelcif.data.DataGroup(target_entities)
elif data_label == "MSA":
data = aln_data
elif data_label == "target_sequences_and_MSA":
data = modelcif.data.DataGroup(target_entities)
data.append(aln_data)
......@@ -431,16 +436,16 @@ def _get_modelcif_protocol_data(data_label, target_entities, aln_data, model):
return data
def _get_modelcif_protocol(protocol_steps, target_entities, aln_data, model):
def _get_modelcif_protocol(protocol_steps, target_entities, model):
"""Create the protocol for the ModelCIF file."""
protocol = modelcif.protocol.Protocol()
for js_step in protocol_steps:
sftwre = _get_modelcif_protocol_software(js_step)
input_data = _get_modelcif_protocol_data(
js_step["input"], target_entities, aln_data, model
js_step["input"], target_entities, model
)
output_data = _get_modelcif_protocol_data(
js_step["output"], target_entities, aln_data, model
js_step["output"], target_entities, model
)
protocol.steps.append(
......@@ -518,6 +523,11 @@ def _store_as_modelcif(
model_group = modelcif.model.ModelGroup([model])
system.model_groups.append(model_group)
protocol = _get_modelcif_protocol(
data_json["protocol"], system.target_entities, model
)
system.protocols.append(protocol)
# write modelcif System to file (NOTE: no PAE here!)
# NOTE: we change path and back while being exception-safe to handle zipfile
oldpwd = os.getcwd()
......@@ -533,10 +543,75 @@ def _store_as_modelcif(
os.chdir(oldpwd)
def _get_protocol_steps_and_software_colabfold(config_data):
"""Get protocol steps for ColabFold models."""
protocol = []
# modelling step
step = {
"method_type": "modeling",
"name": None,
"details": config_data["description"],
}
# get input data
# Must refer to data already in the JSON, so we try keywords
step["input"] = "target_sequences"
# get output data
# Must refer to existing data, so we try keywords
step["output"] = "model"
# get software
step["software"] = [
{
"name": "ColabFold",
"classification": "model building",
"description": "Structure prediction",
"citation": ihm.citations.colabfold,
"location": "https://github.com/sokrypton/ColabFold",
"type": "package",
"version": None,
}
]
step["software"].append(
{
"name": "AlphaFold",
"classification": "model building",
"description": "Structure prediction",
"citation": ihm.citations.alphafold2,
"location": "https://github.com/deepmind/alphafold",
"type": "package",
"version": None,
}
)
step["software_parameters"] = None
protocol.append(step)
return protocol
def _get_config_colabfold():
"""Get config variables for ColabFold"""
description = "Model generation using ColabFold."
return {"description": description}
def _get_protocol_steps_and_software(mdl_id, af2_lst):
"""Get protocol steps for this model, make a difference between AF2 and
ColabFold models."""
if mdl_id in af2_lst:
protocol = _get_protocol_steps_and_software_alphafold()
else:
config_data = _get_config_colabfold()
protocol = _get_protocol_steps_and_software_colabfold(config_data)
return protocol
def _translate2modelcif_single(
f_name,
opts,
mdl_details,
af2_lst,
):
"""Convert a single model with its accompanying data to ModelCIF."""
# ToDo: re-enable Pylint
......@@ -546,13 +621,16 @@ def _translate2modelcif_single(
# gather data into JSON-like structure
mdlcf_json = {}
mdlcf_json["mdl_id"] = fam_name # used for entry ID
mdlcf_json["protocol"] = _get_protocol_steps_and_software(
fam_name, af2_lst
)
# process coordinates
target_entities, ost_ent = _get_entities(f_name, fam_name)
mdlcf_json["target_entities"] = target_entities
# fill annotations
mdlcf_json["title"] = _get_title(f_name)
mdlcf_json["title"] = _get_title(fam_name)
mdlcf_json["model_details"] = mdl_details
# save ModelCIF
......@@ -565,7 +643,7 @@ def _translate2modelcif_single(
)
def _translate2modelcif(f_name, opts):
def _translate2modelcif(f_name, af2_lst, opts):
"""Convert a family of models with their accompanying data to ModelCIF."""
# ToDo: re-enable Pylint
# pylint: disable=too-many-locals
......@@ -590,9 +668,28 @@ def _translate2modelcif(f_name, opts):
f_name,
opts,
mdl_details,
af2_lst,
)
def _read_af2_model_list(path):
"""Read a list of models build with AF2. One ID per line. Returns an empty
list if path is None."""
af2_lst = []
if path is None:
return af2_lst
with open(path, encoding="ascii") as lfh:
for line in lfh:
line = line.strip()
af2_lst.append(line)
print(f"Got a list of {len(af2_lst)} models built with AF2.")
return af2_lst
def _main():
"""Run as script."""
s_tmstmp = timer()
......@@ -602,6 +699,9 @@ def _main():
pdb_files = _get_pdb_files(opts.model_dir)
n_mdls = len(pdb_files)
# read list of AF2 models
af2_mdls = _read_af2_model_list(opts.af2_models)
# iterate over models
print(f"Processing {n_mdls} models.")
tmstmp = s_tmstmp
......@@ -610,6 +710,7 @@ def _main():
try:
_translate2modelcif(
f_name,
af2_mdls,
opts,
)
except (_InvalidCoordinateError, _NoEntitiesError):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment