diff --git a/projects/novelfams/translate2modelcif.py b/projects/novelfams/translate2modelcif.py index 49e1e728252177a9acad183a9faaf177644b1df2..45849872d5346d2ae834021ef0ba53e3ae98e8e1 100644 --- a/projects/novelfams/translate2modelcif.py +++ b/projects/novelfams/translate2modelcif.py @@ -426,9 +426,6 @@ def _get_modelcif_protocol_data(data_label, target_entities, model): """Assemble data for a ModelCIF protocol step.""" if data_label == "target_sequences": data = modelcif.data.DataGroup(target_entities) - elif data_label == "target_sequences_and_MSA": - data = modelcif.data.DataGroup(target_entities) - data.append(aln_data) elif data_label == "model": data = model else: @@ -492,6 +489,29 @@ def _get_assoc_zip_file(fle_path, data_json): return zfile +def _get_audit_authors(): + """Return the list of authors that produced this model.""" + return ( + "Rodriguez del Rio, Alvaro", + "Giner-Lamia, Joaquin", + "Cantalapiedra, Carlo P.", + "Botas, Jorge", + "Deng, Ziqi", + "Hernandez-Plaza, Ana", + "Munar-Palmer, Marti", + "Santamaria-Hernando, Saray", + "Rodriguez-Herva, Jose J.", + "Ruscheweyh, Hans-Joachim", + "Paoli, Lucas", + "Schmidt, Thomas S.B.", + "Sunagawa, Shinichi", + "Bork, Peer", + "Lopez-Solanilla, Emilia", + "Pedro Coelho, Luis", + "Huerta-Cepas, Jaime", + ) + + def _store_as_modelcif( data_json, ost_ent, @@ -511,6 +531,9 @@ def _store_as_modelcif( asym_units = {} _get_modelcif_entities(data_json["target_entities"], asym_units, system) + # audit_authors + system.authors.extend(data_json["audit_authors"]) + # set up the model to produce coordinates model = _OST2ModelCIF( assembly=modelcif.Assembly(asym_units.values()), @@ -543,6 +566,19 @@ def _store_as_modelcif( os.chdir(oldpwd) +def _get_af2_software(): + """Get AF2 as dictionary, suitable to create a modelcif software object.""" + return { + "name": "AlphaFold", + "classification": "model building", + "description": "Structure prediction", + "citation": ihm.citations.alphafold2, + "location": "https://github.com/deepmind/alphafold", + "type": "package", + "version": None, + } + + def _get_protocol_steps_and_software_colabfold(config_data): """Get protocol steps for ColabFold models.""" protocol = [] @@ -571,17 +607,7 @@ def _get_protocol_steps_and_software_colabfold(config_data): "version": None, } ] - step["software"].append( - { - "name": "AlphaFold", - "classification": "model building", - "description": "Structure prediction", - "citation": ihm.citations.alphafold2, - "location": "https://github.com/deepmind/alphafold", - "type": "package", - "version": None, - } - ) + step["software"].append(_get_af2_software()) step["software_parameters"] = None protocol.append(step) @@ -595,11 +621,43 @@ def _get_config_colabfold(): return {"description": description} +def _get_config_alphafold(): + """Get config variables for AlphaFold""" + description = "Model generation using AlphaFold." + + return {"description": description} + + +def _get_protocol_steps_and_software_alphafold(config_data): + """Get protocol steps for AF2 based models.""" + protocol = [] + + # modelling step + step = { + "method_type": "modeling", + "name": None, + "details": config_data["description"], + } + # get input data + # Must refer to data already in the JSON, so we try keywords + step["input"] = "target_sequences" + # get output data + # Must refer to existing data, so we try keywords + step["output"] = "model" + # get software + step["software"] = [_get_af2_software()] + step["software_parameters"] = None + protocol.append(step) + + return protocol + + def _get_protocol_steps_and_software(mdl_id, af2_lst): """Get protocol steps for this model, make a difference between AF2 and ColabFold models.""" if mdl_id in af2_lst: - protocol = _get_protocol_steps_and_software_alphafold() + config_data = _get_config_alphafold() + protocol = _get_protocol_steps_and_software_alphafold(config_data) else: config_data = _get_config_colabfold() protocol = _get_protocol_steps_and_software_colabfold(config_data) @@ -620,6 +678,7 @@ def _translate2modelcif_single( # gather data into JSON-like structure mdlcf_json = {} + mdlcf_json["audit_authors"] = _get_audit_authors() mdlcf_json["mdl_id"] = fam_name # used for entry ID mdlcf_json["protocol"] = _get_protocol_steps_and_software( fam_name, af2_lst