diff --git a/convert_to_modelcif.py b/convert_to_modelcif.py index e90516d7411e59e226e7ea242520e9bef0abf4fe..0c7a11a4297673371c44fae3a3d76ca2e717e181 100755 --- a/convert_to_modelcif.py +++ b/convert_to_modelcif.py @@ -40,6 +40,9 @@ import modelcif.protocol # script) # ToDo: sort non-ModelCIF items in the main JSON object into '__meta__' # ToDo: protocol step software parameters +# ToDo: Example 1 from the GitHub repo mentions MMseqs2 +# ToDo: Discuss input of protocol steps, feature creation has baits, sequences +# does modelling depend on mode? flags.DEFINE_string( "ap_output", None, "AlphaPulldown pipeline output directory." ) @@ -232,17 +235,37 @@ def _get_modelcif_entities(target_ents, asym_units, system): system.target_entities.append(mdlcif_ent) +def _get_step_output_method_type(method_type, protocol_steps): + """Get the output of a protocol step of a certain type.""" + for step in protocol_steps: + if step.method_type == method_type: + # modelcif.data.DataGroup is some kind of list + if isinstance(step.output_data, list): + return step.output_data + return modelcif.data.DataGroup(step.output_data) + + raise RuntimeError(f"Step with 'method_type' '{method_type}' not found.") + + def _get_modelcif_protocol_input( - input_data_group, target_entities, ref_dbs, model + input_data_group, target_entities, ref_dbs, protocol_steps ): """Assemble input data for a ModelCIF protocol step.""" - if input_data_group == "target_sequences": - input_data = modelcif.data.DataGroup(target_entities) - input_data.extend(ref_dbs) - elif input_data_group == "model": - input_data = model - else: - raise RuntimeError(f"Unknown protocol input: '{input_data_group}'") + input_data = modelcif.data.DataGroup() + for inpt in input_data_group: + if inpt == "target_sequences": + input_data.extend(target_entities) + elif inpt == "reference_dbs": + input_data.extend(ref_dbs) + elif inpt.startswith("STEPTYPE$"): + input_data.extend( + _get_step_output_method_type( + inpt[len("STEPTYPE$") :], protocol_steps + ) + ) + else: + raise RuntimeError(f"Unknown protocol input: '{inpt}'") + return input_data @@ -271,7 +294,10 @@ def _get_modelcif_protocol( for js_step in protocol_steps: # assemble input & output data input_data = _get_modelcif_protocol_input( - js_step["input_data_group"], target_entities, ref_dbs, model + js_step["input_data_group"], + target_entities, + ref_dbs, + protocol.steps, ) output_data = _get_modelcif_protocol_output( js_step["output_data_group"], model @@ -302,13 +328,7 @@ def _get_modelcif_protocol( software=sw_grp, ) ) - print("modelcif.protocol.Step(") - print(f" input_data={input_data},") - print(f" output_data={output_data},") - print(f" name={js_step['step_name']},") - print(f" details=\"{js_step['details']}\",") - print(f" software={sw_grp},") - print(")") + protocol.steps[-1].method_type = js_step["method_type"] return protocol @@ -757,14 +777,14 @@ def _get_protocol_steps(modelcif_json): """Create the list of protocol steps with software and parameters used.""" protocol = [] # MSA/ monomer feature generation step + # ToDo: Discuss input, manual has baits & sequences step = { "method_type": "coevolution MSA", "step_name": "MSA generation", "details": "Create sequence features for corresponding monomers.", - "input_data_group": "target_sequences", + "input_data_group": ["target_sequences", "reference_dbs"], "output_data_group": "monomer_pickle_files", - "software_group": [] - # _ma_protocol_step.protocol_id + "software_group": [], } for sftwr in modelcif_json["__meta__"].values(): sftwr = sftwr["software"] @@ -774,8 +794,25 @@ def _get_protocol_steps(modelcif_json): protocol.append(step) # modelling step + # ToDo: Discuss input, seem to depend on mode + # ToDo: what about step details? Would it be nice to add the AlphaPulldown + # mode here? + # ToDo: get software_group from external input + step = { + "method_type": "modeling", + "step_name": None, + "details": None, + "input_data_group": ["target_sequences", "STEPTYPE$coevolution MSA"], + "output_data_group": "model", + "software_group": ["AlphaPulldown", "AlphaFold"], + } + protocol.append(step) + + # model selection step <- ask if there is automated selection, if only + # manual, skip this step here? - # model selection step + # ToDo: Example 1 in the GitHub repo has a 3rd step: "Evalutaion and + # visualisation" return protocol