diff --git a/translate2modelcif.py b/translate2modelcif.py index de052c613d19355822c4e3ef9011fedf6743a9ad..6222ac4c3f3b38c99b6fc6842048347f8ad391ac 100644 --- a/translate2modelcif.py +++ b/translate2modelcif.py @@ -1,6 +1,8 @@ #! /usr/local/bin/ost """Translate models from Tara/ Xabi from PDB + extra data into ModelCIF.""" # ToDo [internal]: Add software group for ColabFold and AF2-Multimer +# ToDo [internal]: get DB versions in - https://colabfold.mmseqs.com, scroll +# down to "Database Information" import argparse import datetime @@ -202,6 +204,8 @@ def _check_interaction_extra_files_present(model_dir): cnfg = os.path.join(model_dir, "config.json") _check_file(cnfg) + return cnfg + def _check_model_extra_files_present(model_dir, pdb_file): """Check that all files needed to process this model are present.""" @@ -228,7 +232,7 @@ def _get_audit_authors(): ) -def _get_protocol_steps_and_software(): +def _get_protocol_steps_and_software(cnfg_file): """Create the list of protocol steps with software and parameters used.""" protocol = [] @@ -271,7 +275,8 @@ def _get_protocol_steps_and_software(): "version": "1.2.0", } # get parameters - step["software_parameters"] = {} + with open(cnfg_file, encoding="utf8") as jfh: + step["software_parameters"] = json.load(jfh) protocol.append(step) # model selection step @@ -301,7 +306,7 @@ def _get_model_details(gene_names): return ( f"Dimer model generated for {' and '.join(gene_names)}, produced " + "using AlphaFold-Multimer (AlphaFold v2.2.0) as implemented by " - + "ColabFold (v1.2.0) which uses MMseqs2 for MSA generation (UniRef90 " + + "ColabFold (v1.2.0) which uses MMseqs2 for MSA generation (UniRef30 " + "+ Environmental)." ) @@ -309,7 +314,7 @@ def _get_model_details(gene_names): def _get_model_group_name(): """Get a name for a model group.""" - return "Crosslinked Heterodimer Multimer v2 Models" + return "Crosslinked Heterodimer ALphaFold-Multimer v2 Models" def _get_sequence(chn): @@ -525,6 +530,18 @@ def _get_modelcif_protocol(protocol_steps, target_entities, model): doi=js_step["software"]["citation"]["doi"], ), ) + # if js_step['software_parameters']: + # params = [] + # # ToDo [internal]: handle lists! + # for k, v in js_step['software_parameters'].items(): + # print(k, v.__class__) + # params.append( + # modelcif.SoftwareParameter(k, v) + # ) + # sftwre = modelcif.SoftwareGroup( + # elements=(sftwre,), parameters=params + # ) + if js_step["input"] == "target_sequences": input_data = modelcif.data.DataGroup(target_entities) elif js_step["input"] == "model": @@ -616,12 +633,13 @@ def _store_as_modelcif(interaction_name, data_json, ost_ent, file_prfx): print(f" ({timer()-pstart:.2f}s)") -def _create_interaction_json(): +def _create_interaction_json(cnfg_file): """Create a dictionary (mimicking JSON) that contains data which is the same for all models.""" data = {} data["audit_authors"] = _get_audit_authors() + data["protocol"] = _get_protocol_steps_and_software(cnfg_file) return data @@ -630,7 +648,6 @@ def _create_model_json(data, pdb_file, up_acs): """Create a dictionary (mimicking JSON) that contains all the data.""" data["target_entities"], ost_ent = _get_entities(pdb_file, up_acs) - data["protocol"] = _get_protocol_steps_and_software() gns = [] for i in data["target_entities"]: gns.append(i["up_gn"]) @@ -651,9 +668,9 @@ def _main(): # get UniProtKB ACs from directory name up_acs = interaction.split("-") - _check_interaction_extra_files_present(opts.model_dir) + cnfg = _check_interaction_extra_files_present(opts.model_dir) - mdlcf_json = _create_interaction_json() + mdlcf_json = _create_interaction_json(cnfg) # iterate model directory for fle in os.listdir(opts.model_dir):