diff --git a/translate2modelcif.py b/translate2modelcif.py index 6222ac4c3f3b38c99b6fc6842048347f8ad391ac..bcfbed10a5cb5e257777e6b9c8767c11e1120f74 100644 --- a/translate2modelcif.py +++ b/translate2modelcif.py @@ -240,7 +240,10 @@ def _get_protocol_steps_and_software(cnfg_file): step = { "method_type": "modeling", "name": "ma_protocol_step.step_name", - "details": "ma_protocol_step.details", + "details": "Model using AlphaFold-Multimer (AlphaFold v2.2.0), " + + "without amber relaxation and producing 5 models with up to 3 " + + "recycles each, starting from paired and unparied MSAs for the " + + "dimers using MMseqs2.", } # get input data # Must refer to data already in the JSON, so we try keywords @@ -249,31 +252,99 @@ def _get_protocol_steps_and_software(cnfg_file): # Must refer to existing data, so we try keywords step["output"] = "model" # get software - step["software"] = { - "name": "ColabFold", - "classification": "model building", - "description": "software.description", - "citation": { - "pmid": None, - "title": "ColabFold - Making protein folding accessible to all", - "journal": "bioRxiv", - "volume": None, - "page_range": None, - "year": 2022, - "authors": [ - "Mirdita M", - "Schütze K", - "Moriwaki Y", - "Heo L", - "Ovchinnikov S", - "Steinegger M", - ], - "doi": "10.1101/2021.08.15.456425", + step["software"] = [ + { + "name": "ColabFold", + "classification": "model building", + # ToDo: Get description for ColabFold + "description": "software.description", + "citation": { + "pmid": None, + "title": "ColabFold - Making protein folding accessible to all", + "journal": "bioRxiv", + "volume": None, + "page_range": None, + "year": 2022, + "authors": [ + "Mirdita M", + "Schütze K", + "Moriwaki Y", + "Heo L", + "Ovchinnikov S", + "Steinegger M", + ], + "doi": "10.1101/2021.08.15.456425", + }, + "location": "https://github.com/sokrypton/ColabFold", + "type": "package", + "version": "1.2.0", }, - "location": "https://github.com/sokrypton/ColabFold", - "type": "package", - "version": "1.2.0", - } + { + "name": "MMseqs2", + "classification": "data collection", + "description": "Many-against-Many sequence searching", + "citation": { + "pmid": "30615063", + "title": "MMseqs2 desktop and local web server app for fast, " + + "interactive sequence searches", + "journal": "Bioinformatics", + "volume": 35, + "page_range": (2856, 2858), + "year": 2019, + "authors": [ + "Mirdita M", + "Steinegger M", + "Söding J", + ], + "doi": "10.1093/bioinformatics/bty1057", + }, + "location": "https://github.com/soedinglab/mmseqs2", + "type": "package", + "version": None, + }, + { + "name": "AlphaFold-Multimer", + "classification": "model building", + "description": "Structure prediction", + "citation": { + "pmid": None, + "title": "Protein complex prediction with " + + "AlphaFold-Multimer.", + "journal": "bioRxiv", + "volume": None, + "page_range": None, + "year": 2021, + "authors": [ + "Evans R", + "O'Neill M", + "Pritzel A", + "Antropova N", + "Senior A", + "Green T", + "Žídek A", + "Bates R", + "Blackwell S", + "Yim J", + "Ronneberger O", + "Bodenstein S", + "Zielinski M", + "Bridgland A", + "Potapenko A", + "Cowie A", + "Tunyasuvunakool K", + "Jain R", + "Clancy E", + "Kohli P", + "Jumper J", + "Hassabis D", + ], + "doi": "10.1101/2021.10.04.463034", + }, + "location": "https://github.com/deepmind/alphafold", + "type": "package", + "version": "2.1.1", + }, + ] # get parameters with open(cnfg_file, encoding="utf8") as jfh: step["software_parameters"] = json.load(jfh) @@ -285,11 +356,14 @@ def _get_protocol_steps_and_software(cnfg_file): step = { "method_type": "model selection", "name": "ma_protocol_step.step_name", - "details": "ma_protocol_step.details", + "details": "Select best model, which is either the top-ranked model " + + "as determined by the ColabFold pipeline " + + "(iptmscore*0.8+ptmscore*0.2), or else the model with best " + + "congruence with crosslinks reported in the related study.", } step["input"] = "model" step["output"] = "model" - step["software"] = {} + step["software"] = [] step["software_parameters"] = {} protocol.append(step) @@ -504,37 +578,46 @@ def _get_modelcif_entities(target_ents, source, asym_units, system): system.target_entities.append(mdlcif_ent) +def _assemble_modelcif_software(soft_dict): + """Create a modelcif.Software instance from dictionary.""" + return modelcif.Software( + soft_dict["name"], + soft_dict["classification"], + soft_dict["description"], + soft_dict["location"], + soft_dict["type"], + soft_dict["version"], + citation=ihm.Citation( + pmid=soft_dict["citation"]["pmid"], + title=soft_dict["citation"]["title"], + journal=soft_dict["citation"]["journal"], + volume=soft_dict["citation"]["volume"], + page_range=soft_dict["citation"]["page_range"], + year=soft_dict["citation"]["year"], + authors=soft_dict["citation"]["authors"], + doi=soft_dict["citation"]["doi"], + ), + ) + + def _get_modelcif_protocol(protocol_steps, target_entities, model): """Create the protocol for the ModelCIF file.""" protocol = modelcif.protocol.Protocol() for js_step in protocol_steps: sftwre = None # ToDo [input]: Turn into software group if parameters are available - # ToDo [input]: Get software.description if js_step["software"]: - sftwre = modelcif.Software( - js_step["software"]["name"], - js_step["software"]["classification"], - js_step["software"]["description"], - js_step["software"]["location"], - js_step["software"]["type"], - js_step["software"]["version"], - citation=ihm.Citation( - pmid=js_step["software"]["citation"]["pmid"], - title=js_step["software"]["citation"]["title"], - journal=js_step["software"]["citation"]["journal"], - volume=js_step["software"]["citation"]["volume"], - page_range=js_step["software"]["citation"]["page_range"], - year=js_step["software"]["citation"]["year"], - authors=js_step["software"]["citation"]["authors"], - doi=js_step["software"]["citation"]["doi"], - ), - ) + if len(js_step["software"]) == 1: + sftwre = _assemble_modelcif_software(js_step["software"][0]) + else: + sftwre = [] + for sft in js_step["software"]: + sftwre.append(_assemble_modelcif_software(sft)) + sftwre = modelcif.SoftwareGroup(elements=sftwre) # if js_step['software_parameters']: # params = [] # # ToDo [internal]: handle lists! # for k, v in js_step['software_parameters'].items(): - # print(k, v.__class__) # params.append( # modelcif.SoftwareParameter(k, v) # ) @@ -711,4 +794,4 @@ if __name__ == "__main__": # LocalWords: mdlcf mdlcif asym AsymUnit init kwargs atm pos het hetatom pTM # LocalWords: biso ujson GlobalPTM pLDDT ptm jfh numpy np GlobalPLDDT lDDT # LocalWords: plddt LocalPLDDT timeit PAE MetricType LocalPairwisePAE lpae -# LocalWords: nd pae qa pstart +# LocalWords: nd pae qa pstart ColabFold