diff --git a/translate2modelcif.py b/translate2modelcif.py index 77f93e71f17cf605017b762c1e86d9659f739a6b..3d87f9034a09bdab8fd0581cbc3652fe91183526 100644 --- a/translate2modelcif.py +++ b/translate2modelcif.py @@ -755,43 +755,64 @@ def _get_sequence_dbs(seq_dbs): return [db_dict[seq_db] for seq_db in seq_dbs] +def _get_modelcif_protocol_software(js_step): + """Assemble software entries for a ModelCIF protocol step.""" + if js_step["software"]: + if len(js_step["software"]) == 1: + sftwre = _assemble_modelcif_software(js_step["software"][0]) + else: + sftwre = [] + for sft in js_step["software"]: + sftwre.append(_assemble_modelcif_software(sft)) + sftwre = modelcif.SoftwareGroup(elements=sftwre) + if js_step["software_parameters"]: + params = [] + for key, val in js_step["software_parameters"].items(): + params.append(modelcif.SoftwareParameter(key, val)) + if isinstance(sftwre, modelcif.SoftwareGroup): + sftwre.parameters = params + else: + sftwre = modelcif.SoftwareGroup( + elements=(sftwre,), parameters=params + ) + return sftwre + + return None + + +def _get_modelcif_protocol_input(js_step, target_entities, ref_dbs, model): + """Assemble input data for a ModelCIF protocol step.""" + if js_step["input"] == "target_sequences": + input_data = modelcif.data.DataGroup(target_entities) + input_data.extend(ref_dbs) + elif js_step["input"] == "model": + input_data = model + else: + raise RuntimeError(f"Unknown protocol input: '{js_step['input']}'") + + return input_data + + +def _get_modelcif_protocol_output(js_step, model): + """Assemble output data for a ModelCIF protocol step.""" + if js_step["output"] == "model": + output_data = model + else: + raise RuntimeError(f"Unknown protocol output: '{js_step['output']}'") + + return output_data + + def _get_modelcif_protocol(protocol_steps, target_entities, model, ref_dbs): """Create the protocol for the ModelCIF file.""" protocol = modelcif.protocol.Protocol() for js_step in protocol_steps: - sftwre = None - if js_step["software"]: - if len(js_step["software"]) == 1: - sftwre = _assemble_modelcif_software(js_step["software"][0]) - else: - sftwre = [] - for sft in js_step["software"]: - sftwre.append(_assemble_modelcif_software(sft)) - sftwre = modelcif.SoftwareGroup(elements=sftwre) - if js_step["software_parameters"]: - params = [] - for k, v in js_step["software_parameters"].items(): - params.append(modelcif.SoftwareParameter(k, v)) - if isinstance(sftwre, modelcif.SoftwareGroup): - sftwre.parameters = params - else: - sftwre = modelcif.SoftwareGroup( - elements=(sftwre,), parameters=params - ) + sftwre = _get_modelcif_protocol_software(js_step) + input_data = _get_modelcif_protocol_input( + js_step, target_entities, ref_dbs, model + ) + output_data = _get_modelcif_protocol_output(js_step, model) - if js_step["input"] == "target_sequences": - input_data = modelcif.data.DataGroup(target_entities) - input_data.extend(ref_dbs) - elif js_step["input"] == "model": - input_data = model - else: - raise RuntimeError(f"Unknown protocol input: '{js_step['input']}'") - if js_step["output"] == "model": - output_data = model - else: - raise RuntimeError( - f"Unknown protocol output: '{js_step['output']}'" - ) protocol.steps.append( modelcif.protocol.Step( input_data=input_data, @@ -826,33 +847,31 @@ def _package_associated_files(mdl_name): os.remove(file) -def _store_as_modelcif( - interaction_name, data_json, ost_ent, out_dir, file_prfx, compress -): +def _store_as_modelcif(data_json, ost_ent, out_dir, file_prfx, compress): """Mix all the data into a ModelCIF file.""" print(" generating ModelCIF objects...", end="") pstart = timer() # create system to gather all the data system = modelcif.System( title=data_json["title"], - id=interaction_name.upper(), + id=data_json["data_block_id"].upper(), model_details=data_json["model_details"], ) + # create target entities, references, source, asymmetric units & assembly # for source we assume all chains come from the same taxon - source = ihm.source.Natural( - ncbi_taxonomy_id=data_json["target_entities"][0]["up_ncbi_taxid"], - scientific_name=data_json["target_entities"][0]["up_organism"], - ) - # create an asymmetric unit and an entity per target sequence asym_units = {} _get_modelcif_entities( - data_json["target_entities"], source, asym_units, system + data_json["target_entities"], + ihm.source.Natural( + ncbi_taxonomy_id=data_json["target_entities"][0]["up_ncbi_taxid"], + scientific_name=data_json["target_entities"][0]["up_organism"], + ), + asym_units, + system, ) - assembly = modelcif.Assembly(asym_units.values()) - # audit_authors system.authors.extend(data_json["audit_authors"]) @@ -865,7 +884,7 @@ def _store_as_modelcif( f"(#{data_json['rank_num']} ranked model)" ) model = _OST2ModelCIF( - assembly=assembly, + assembly=modelcif.Assembly(asym_units.values()), asym=asym_units, ost_entity=ost_ent, name=mdl_list_name, @@ -877,10 +896,9 @@ def _store_as_modelcif( system.repositories.append(model.add_scores(data_json, system.id, mdl_name)) print(f" ({timer()-pstart:.2f}s)") - model_group = modelcif.model.ModelGroup( - [model], name=data_json["model_group_name"] + system.model_groups.append( + modelcif.model.ModelGroup([model], name=data_json["model_group_name"]) ) - system.model_groups.append(model_group) ref_dbs = _get_sequence_dbs(data_json["config_data"]["seq_dbs"]) protocol = _get_modelcif_protocol( @@ -919,7 +937,7 @@ def _create_interaction_json(config_data): return data -def _create_model_json(data, pdb_file, up_acs): +def _create_model_json(data, pdb_file, up_acs, block_id): """Create a dictionary (mimicking JSON) that contains all the data.""" data["target_entities"], ost_ent = _get_entities(pdb_file, up_acs) @@ -927,6 +945,7 @@ def _create_model_json(data, pdb_file, up_acs): for i in data["target_entities"]: gns.append(i["up_gn"]) data["title"] = _get_title(gns) + data["data_block_id"] = block_id data["model_details"] = _get_model_details(gns) data["model_group_name"] = _get_model_group_name() @@ -964,8 +983,6 @@ def _main(): print(" preparing data...", end="") pstart = timer() - # NOTE: could also be prepared globally if all carefully overwritten - # but not worth the trouble... mdlcf_json = _create_interaction_json(config_data) # uid = ..._rank_X_model_Y.pdb @@ -975,14 +992,14 @@ def _main(): mdlcf_json["rank_num"] = int(mdl_name_parts[-3]) mdlcf_json["mdl_num"] = int(mdl_name_parts[-1]) - ost_ent = _create_model_json(mdlcf_json, fle, up_acs) + ost_ent = _create_model_json(mdlcf_json, fle, up_acs, uid) # read quality scores from JSON file _get_scores(mdlcf_json, file_prfx) print(f" ({timer()-pstart:.2f}s)") _store_as_modelcif( - uid, mdlcf_json, ost_ent, opts.out_dir, file_prfx, opts.compress + mdlcf_json, ost_ent, opts.out_dir, file_prfx, opts.compress ) print(f" ... done with {fle} ({timer()-pdb_start:.2f}s).")