Skip to content
Snippets Groups Projects
Commit 1e7793ce authored by Bienchen's avatar Bienchen
Browse files

First draft of protocol

parent a28ca7de
Branches
No related tags found
No related merge requests found
...@@ -40,6 +40,9 @@ import modelcif.protocol ...@@ -40,6 +40,9 @@ import modelcif.protocol
# script) # script)
# ToDo: sort non-ModelCIF items in the main JSON object into '__meta__' # ToDo: sort non-ModelCIF items in the main JSON object into '__meta__'
# ToDo: protocol step software parameters # ToDo: protocol step software parameters
# ToDo: Example 1 from the GitHub repo mentions MMseqs2
# ToDo: Discuss input of protocol steps, feature creation has baits, sequences
# does modelling depend on mode?
flags.DEFINE_string( flags.DEFINE_string(
"ap_output", None, "AlphaPulldown pipeline output directory." "ap_output", None, "AlphaPulldown pipeline output directory."
) )
...@@ -232,17 +235,37 @@ def _get_modelcif_entities(target_ents, asym_units, system): ...@@ -232,17 +235,37 @@ def _get_modelcif_entities(target_ents, asym_units, system):
system.target_entities.append(mdlcif_ent) system.target_entities.append(mdlcif_ent)
def _get_step_output_method_type(method_type, protocol_steps):
"""Get the output of a protocol step of a certain type."""
for step in protocol_steps:
if step.method_type == method_type:
# modelcif.data.DataGroup is some kind of list
if isinstance(step.output_data, list):
return step.output_data
return modelcif.data.DataGroup(step.output_data)
raise RuntimeError(f"Step with 'method_type' '{method_type}' not found.")
def _get_modelcif_protocol_input( def _get_modelcif_protocol_input(
input_data_group, target_entities, ref_dbs, model input_data_group, target_entities, ref_dbs, protocol_steps
): ):
"""Assemble input data for a ModelCIF protocol step.""" """Assemble input data for a ModelCIF protocol step."""
if input_data_group == "target_sequences": input_data = modelcif.data.DataGroup()
input_data = modelcif.data.DataGroup(target_entities) for inpt in input_data_group:
input_data.extend(ref_dbs) if inpt == "target_sequences":
elif input_data_group == "model": input_data.extend(target_entities)
input_data = model elif inpt == "reference_dbs":
else: input_data.extend(ref_dbs)
raise RuntimeError(f"Unknown protocol input: '{input_data_group}'") elif inpt.startswith("STEPTYPE$"):
input_data.extend(
_get_step_output_method_type(
inpt[len("STEPTYPE$") :], protocol_steps
)
)
else:
raise RuntimeError(f"Unknown protocol input: '{inpt}'")
return input_data return input_data
...@@ -271,7 +294,10 @@ def _get_modelcif_protocol( ...@@ -271,7 +294,10 @@ def _get_modelcif_protocol(
for js_step in protocol_steps: for js_step in protocol_steps:
# assemble input & output data # assemble input & output data
input_data = _get_modelcif_protocol_input( input_data = _get_modelcif_protocol_input(
js_step["input_data_group"], target_entities, ref_dbs, model js_step["input_data_group"],
target_entities,
ref_dbs,
protocol.steps,
) )
output_data = _get_modelcif_protocol_output( output_data = _get_modelcif_protocol_output(
js_step["output_data_group"], model js_step["output_data_group"], model
...@@ -302,13 +328,7 @@ def _get_modelcif_protocol( ...@@ -302,13 +328,7 @@ def _get_modelcif_protocol(
software=sw_grp, software=sw_grp,
) )
) )
print("modelcif.protocol.Step(") protocol.steps[-1].method_type = js_step["method_type"]
print(f" input_data={input_data},")
print(f" output_data={output_data},")
print(f" name={js_step['step_name']},")
print(f" details=\"{js_step['details']}\",")
print(f" software={sw_grp},")
print(")")
return protocol return protocol
...@@ -757,14 +777,14 @@ def _get_protocol_steps(modelcif_json): ...@@ -757,14 +777,14 @@ def _get_protocol_steps(modelcif_json):
"""Create the list of protocol steps with software and parameters used.""" """Create the list of protocol steps with software and parameters used."""
protocol = [] protocol = []
# MSA/ monomer feature generation step # MSA/ monomer feature generation step
# ToDo: Discuss input, manual has baits & sequences
step = { step = {
"method_type": "coevolution MSA", "method_type": "coevolution MSA",
"step_name": "MSA generation", "step_name": "MSA generation",
"details": "Create sequence features for corresponding monomers.", "details": "Create sequence features for corresponding monomers.",
"input_data_group": "target_sequences", "input_data_group": ["target_sequences", "reference_dbs"],
"output_data_group": "monomer_pickle_files", "output_data_group": "monomer_pickle_files",
"software_group": [] "software_group": [],
# _ma_protocol_step.protocol_id
} }
for sftwr in modelcif_json["__meta__"].values(): for sftwr in modelcif_json["__meta__"].values():
sftwr = sftwr["software"] sftwr = sftwr["software"]
...@@ -774,8 +794,25 @@ def _get_protocol_steps(modelcif_json): ...@@ -774,8 +794,25 @@ def _get_protocol_steps(modelcif_json):
protocol.append(step) protocol.append(step)
# modelling step # modelling step
# ToDo: Discuss input, seem to depend on mode
# ToDo: what about step details? Would it be nice to add the AlphaPulldown
# mode here?
# ToDo: get software_group from external input
step = {
"method_type": "modeling",
"step_name": None,
"details": None,
"input_data_group": ["target_sequences", "STEPTYPE$coevolution MSA"],
"output_data_group": "model",
"software_group": ["AlphaPulldown", "AlphaFold"],
}
protocol.append(step)
# model selection step <- ask if there is automated selection, if only
# manual, skip this step here?
# model selection step # ToDo: Example 1 in the GitHub repo has a 3rd step: "Evalutaion and
# visualisation"
return protocol return protocol
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment