Skip to content
Snippets Groups Projects
Commit 38a40f13 authored by B13nch3n's avatar B13nch3n
Browse files

Mockup protocol

parent a41aadae
No related branches found
No related tags found
No related merge requests found
A0A1B0GTU1-O75152*
.*~
\ No newline at end of file
.*~
.docker-bash-history
.DS_Store
......@@ -16,6 +16,7 @@ import modelcif
import modelcif.associated
import modelcif.dumper
import modelcif.model
import modelcif.protocol
import modelcif.reference
from ost import io
......@@ -218,6 +219,69 @@ def _get_audit_authors():
return ("Foo B", "Bar F")
def _get_protocol_steps_and_software(trg_ents):
"""Create the list of protocol steps with software and parameters used."""
protocol = []
# modelling step
step = {
"method_type": "modeling",
"name": "ma_protocol_step.step_name",
"details": "ma_protocol_step.details",
}
# get input data
# Must refer to data already in the JSON, so we try keywords
step["input"] = "target_sequences"
# get output data
# Must refer to existing data, so we try keywords
step["output"] = "model"
# get software
step["software"] = {
"name": "ColabFold",
"classification": "model building",
"description": "software.description",
"citation": {
"pmid": None,
"title": "ColabFold - Making protein folding accessible to all",
"journal": "bioRxiv",
"volume": None,
"page_range": None,
"year": 2022,
"authors": [
"Mirdita M",
"Schütze K",
"Moriwaki Y",
"Heo L",
"Ovchinnikov S",
"Steinegger M",
],
"doi": "10.1101/2021.08.15.456425",
},
"location": "https://github.com/sokrypton/ColabFold",
"type": "package",
"version": "software.version",
}
# get parameters
step["software_parameters"] = {}
protocol.append(step)
# model selection step
# ToDo [input/ internal]: model selection step on a single model is a bit
# silly, how do we get a list of models?
step = {
"method_type": "model selection",
"name": "ma_protocol_step.step_name",
"details": "ma_protocol_step.details",
}
step["input"] = "model"
step["output"] = "model"
step["software"] = {}
step["software_parameters"] = {}
protocol.append(step)
return protocol
def _get_title():
"""Get a title for this modelling experiment."""
# ToDo [input]: Add title
......@@ -379,28 +443,9 @@ def _get_scores(data, prfx):
data.update(scrs_json)
def _store_as_modelcif(interaction_name, data_json, ost_ent, file_prfx):
"""Mix all the data into a ModelCIF file."""
print(" generating ModelCIF objects...", end="")
pstart = timer()
# ToDo [internal]: Get protocol/ software
# ToDo [internal]: Get QA metrics
# create system to gather all the data
system = modelcif.System(
title=data_json["title"],
id=interaction_name.upper(),
model_details=data_json["model_details"],
)
# create target entities, references, source, asymmetric units & assembly
# for source we assume all chains come from the same taxon
source = ihm.source.Natural(
ncbi_taxonomy_id=data_json["target_entities"][0]["up_ncbi_taxid"],
scientific_name=data_json["target_entities"][0]["up_organism"],
)
# create an asymmetric unit and an entity per target sequence
asym_units = {}
for cif_ent in data_json["target_entities"]:
def _get_modelcif_entities(target_ents, source, asym_units, system):
"""Create ModelCIF entities and asymmetric units."""
for cif_ent in target_ents:
# ToDo [input]: Get entity description
mdlcif_ent = modelcif.Entity(
cif_ent["pdb_sequence"],
......@@ -430,6 +475,31 @@ def _store_as_modelcif(interaction_name, data_json, ost_ent, file_prfx):
)
system.target_entities.append(mdlcif_ent)
def _store_as_modelcif(interaction_name, data_json, ost_ent, file_prfx):
"""Mix all the data into a ModelCIF file."""
print(" generating ModelCIF objects...", end="")
pstart = timer()
# ToDo [internal]: Get protocol/ software
# create system to gather all the data
system = modelcif.System(
title=data_json["title"],
id=interaction_name.upper(),
model_details=data_json["model_details"],
)
# create target entities, references, source, asymmetric units & assembly
# for source we assume all chains come from the same taxon
source = ihm.source.Natural(
ncbi_taxonomy_id=data_json["target_entities"][0]["up_ncbi_taxid"],
scientific_name=data_json["target_entities"][0]["up_organism"],
)
# create an asymmetric unit and an entity per target sequence
asym_units = {}
_get_modelcif_entities(
data_json["target_entities"], source, asym_units, system
)
# ToDo [input]: Get Assembly name
assembly = modelcif.Assembly(
asym_units.values(), name="ma_struct_assembly_details.assembly_name"
......@@ -460,6 +530,56 @@ def _store_as_modelcif(interaction_name, data_json, ost_ent, file_prfx):
)
system.model_groups.append(model_group)
# Add protocol
protocol = modelcif.protocol.Protocol()
for js_step in data_json["protocol"]:
sftwre = None
# ToDo [input]: Turn into software group if parameters are available
# ToDo [input]: Get software.description
# ToDo [input]: Get software.version
if js_step["software"]:
sftwre = modelcif.Software(
js_step["software"]["name"],
js_step["software"]["classification"],
js_step["software"]["description"],
js_step["software"]["location"],
js_step["software"]["type"],
js_step["software"]["version"],
citation=ihm.Citation(
pmid=js_step["software"]["citation"]["pmid"],
title=js_step["software"]["citation"]["title"],
journal=js_step["software"]["citation"]["journal"],
volume=js_step["software"]["citation"]["volume"],
page_range=js_step["software"]["citation"]["page_range"],
year=js_step["software"]["citation"]["year"],
authors=js_step["software"]["citation"]["authors"],
doi=js_step["software"]["citation"]["doi"],
),
)
if js_step["input"] == "target_sequences":
input_data = modelcif.data.DataGroup(system.target_entities)
elif js_step["input"] == "model":
input_data = model
else:
raise RuntimeError(f"Unknown protocol input: '{js_step['input']}'")
if js_step["output"] == "model":
output_data = model
else:
raise RuntimeError(
f"Unknown protocol output: '{js_step['output']}'"
)
protocol.steps.append(
modelcif.protocol.Step(
input_data=input_data,
output_data=output_data,
name=js_step["name"],
details=js_step["details"],
software=sftwre,
)
)
protocol.steps[-1].method_type = js_step["method_type"]
system.protocols.append(protocol)
# write modelcif System to file
print(" write to disk...", end="", flush=True)
pstart = timer()
......@@ -484,6 +604,7 @@ def _create_model_json(data, pdb_file, up_acs):
"""Create a dictionary (mimicking JSON) that contains all the data."""
data["target_entities"], ost_ent = _get_entities(pdb_file, up_acs)
data["protocol"] = _get_protocol_steps_and_software(data["target_entities"])
return ost_ent
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment