From 1fc029a09bf1b6608b7f7609f27358c3b8c1943e Mon Sep 17 00:00:00 2001
From: Stefan Bienert <stefan.bienert@unibas.ch>
Date: Wed, 10 Jan 2024 16:41:20 +0100
Subject: [PATCH] Add software parameters

---
 convert_to_modelcif.py | 152 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 138 insertions(+), 14 deletions(-)

diff --git a/convert_to_modelcif.py b/convert_to_modelcif.py
index bb4557e..974d594 100755
--- a/convert_to_modelcif.py
+++ b/convert_to_modelcif.py
@@ -326,21 +326,23 @@ def _get_modelcif_protocol(
         )
         # loop over software group and assemble software group from that
         sw_grp = modelcif.SoftwareGroup()
-        for pss in js_step["software_group"]:  # protocol step software
-            if sw_dict[pss] is not None:
+        for (
+            pss,
+            psp,
+        ) in zip(  # protocol step software & protocol step parameters
+            js_step["software_group"], js_step["parameter_group"]
+        ):
+            plst = []
+            for arg, val in psp.items():
+                plst.append(modelcif.SoftwareParameter(arg, val))
+            if len(plst) > 0:
+                # add software with individual parameters
+                sw_grp.append(
+                    modelcif.SoftwareWithParameters(sw_dict[pss], plst)
+                )
+            else:
                 # add software w/o individual parameters
                 sw_grp.append(sw_dict[pss])
-                # add software with individual parameters
-                # Commented out code does not need spelling check, so disable
-                # it in Pylint
-                # pylint: disable=wrong-spelling-in-comment
-                # sw_grp.append(
-                #     modelcif.SoftwareWithParameters(
-                #         sw_dict[pss],
-                #         [modelcif.SoftwareParameter(<name>, <value>)],
-                #     )
-                # )
-                # pylint: enable=wrong-spelling-in-comment
         # ToDo: make sure AlphaPulldown is first in the SoftwareGroup() list,
         #       AlphaFold second; that influences citation order in the ModelCIF
         #       file.
@@ -622,6 +624,117 @@ def _file_exists_or_exit(path, msg):
         sys.exit()
 
 
+def _cast_param(val):
+    """Cast a string input val to its actual data type."""
+    try:
+        return int(val)
+    except ValueError:
+        pass
+    try:
+        return float(val)
+    except ValueError:
+        pass
+    if val == "True":
+        return True
+    if val == "False":
+        return False
+
+    return val
+
+
+def _get_software_with_parameters(sw_dict, other_dict):
+    """Get software with versions and parameters."""
+    # ToDo: deal with `use_mmseqs=True`
+    # ToDo: should all args go to AlphaPulldown?
+    known_args = {
+        "db_preset": ["AlphaFold"],
+        "max_template_date": ["AlphaFold"],
+        "model_preset": ["AlphaFold"],
+        "num_multimer_predictions_per_model": ["AlphaFold"],
+        "plddt_threshold": ["AlphaPulldown"],
+        "hb_allowance": ["AlphaPulldown"],
+        "threshold_clashes": ["AlphaPulldown"],
+        "job_index": ["AlphaPulldown"],
+        "use_mmseqs2": ["AlphaPulldown"],
+        "skip_existing": ["AlphaPulldown"],
+        "save_msa_files": ["AlphaPulldown"],
+        "num_predictions_per_model": ["AlphaPulldown"],
+        "benchmark": ["AlphaFold", "AlphaPulldown"],
+        "use_precomputed_msas": ["AlphaFold", "AlphaPulldown"],
+        "models_to_relax": ["AlphaFold", "AlphaPulldown"],
+    }
+    trans_args = {
+        "num_multimer_predictions_per_model": "num_predictions_per_model"
+    }
+    ignored_args = [
+        "?",
+        "alsologtostderr",
+        "bfd_database_path",
+        "data_dir",
+        "delta_threshold",
+        "description_file",
+        "hbm_oom_exit",
+        "hhblits_binary_path",
+        "hhsearch_binary_path",
+        "hmmbuild_binary_path",
+        "hmmsearch_binary_path",
+        "jackhmmer_binary_path",
+        "kalign_binary_path",
+        "log_dir",
+        "logger_levels",
+        "logtostderr",
+        "mgnify_database_path",
+        "obsolete_pdbs_path",
+        "only_check_args",
+        "op_conversion_fallback_to_while_loop",
+        "output_dir",
+        "path_to_fasta",
+        "path_to_mmt",
+        "pdb",
+        "pdb70_database_path",
+        "pdb_post_mortem",
+        "pdb_seqres_database_path",
+        "run_with_pdb",
+        "run_with_profiling",
+        "runtime_oom_exit",
+        "showprefixforinfo",
+        "small_bfd_database_path",
+        "stderrthreshold",
+        "template_mmcif_dir",
+        "tt_check_filter",
+        "tt_single_core_summaries",
+        "uniprot_database_path",
+        "uniref30_database_path",
+        "uniref90_database_path",
+        "use_small_bfd",
+        "v",
+        "verbosity",
+        "xml_output_file",
+    ]
+    re_args = re.compile(
+        r"(?:fasta_path|multimeric_chains|multimeric_templates|protein)_\d+"
+    )
+    swwp = sw_dict  # Software With Parameters
+    for key, val in other_dict.items():
+        if key in known_args:
+            for tool in known_args[key]:
+                if "params" not in swwp[tool]:
+                    swwp[tool]["params"] = {}
+                swwp[tool]["params"][f"--{key}"] = _cast_param(val)
+            if key in trans_args:
+                key = trans_args[key]
+                for tool in known_args[key]:
+                    if "params" not in swwp[tool]:
+                        swwp[tool]["params"] = {}
+                    swwp[tool]["params"][f"--{key}"] = _cast_param(val)
+        else:
+            if key not in ignored_args and re.match(re_args, key) is None:
+                logging.info(f"Found unknown key in 'other': {key}")
+                sys.exit()
+
+    return swwp
+
+
 def _get_feature_metadata(
     modelcif_json: dict,
     cmplx_name: str,
@@ -642,8 +755,10 @@ def _get_feature_metadata(
         # ToDo: make sure that its always ASCII
         with open(feature_json, "r", encoding="ascii") as jfh:
             jdata = json.load(jfh)
-        modelcif_json["__meta__"][mnmr]["software"] = jdata["software"]
         modelcif_json["__meta__"][mnmr]["databases"] = jdata["databases"]
+        modelcif_json["__meta__"][mnmr][
+            "software"
+        ] = _get_software_with_parameters(jdata["software"], jdata["other"])
 
     return cmplx_name
 
@@ -944,12 +1059,17 @@ def _get_protocol_steps(modelcif_json):
         "input_data_group": ["target_sequences", "reference_dbs"],
         "output_data_group": "monomer_pickle_files",
         "software_group": [],
+        "parameter_group": [],
     }
     for sftwr in modelcif_json["__meta__"].values():
         sftwr = sftwr["software"]
         for tool in sftwr:
             if tool not in step["software_group"]:
                 step["software_group"].append(tool)
+                if "params" in sftwr[tool]:
+                    step["parameter_group"].append(sftwr[tool]["params"])
+                else:
+                    step["parameter_group"].append({})
     protocol.append(step)
 
     # modelling step
@@ -963,6 +1083,10 @@ def _get_protocol_steps(modelcif_json):
         "input_data_group": ["target_sequences", "STEPTYPE$coevolution MSA"],
         "output_data_group": "model",
         "software_group": ["AlphaPulldown", "AlphaFold"],
+        "parameter_group": [
+            sftwr["AlphaPulldown"]["params"],
+            sftwr["AlphaFold"]["params"],
+        ],
     }
     protocol.append(step)
 
-- 
GitLab