diff --git a/convert_to_modelcif.py b/convert_to_modelcif.py index dbfae0fb71a9f9dfaaf8f180b73d0043b9fa775a..00fbad7abec5775d0c9d0f62743f0e4672abcdad 100755 --- a/convert_to_modelcif.py +++ b/convert_to_modelcif.py @@ -27,6 +27,8 @@ import modelcif.dumper import modelcif.model import modelcif.protocol +from alphapulldown.utils import make_dir_monomer_dictionary + # ToDo: Get options properly, best get the same names as used in existing # scripts, e.g. could '--monomer_objects_dir' be used as feature # directory/ directory with the feature JSON files? @@ -43,17 +45,25 @@ import modelcif.protocol # ToDo: Example 1 from the GitHub repo mentions MMseqs2 # ToDo: Discuss input of protocol steps, feature creation has baits, sequences # does modelling depend on mode? +# ToDo: check that PAE files are written to an associated file +# ToDo: deal with `--max_template_date`, beta-barrel project has it as software +# parameter flags.DEFINE_string( - "ap_output", None, "AlphaPulldown pipeline output directory." + "ap_output", None, "AlphaPulldown pipeline output directory" +) +flags.DEFINE_list( + "monomer_objects_dir", + None, + "a list of directories where monomer objects are stored", ) flags.DEFINE_integer( "model_selected", None, - "Model to be converted into ModelCIF, use '--select_all' to convert all " + "model to be converted into ModelCIF, use '--select_all' to convert all " + "models found in '--af2_output'", ) -flags.DEFINE_bool("compress", False, "Compress the ModelCIF file using Gzip") -flags.mark_flags_as_required(["ap_output"]) +flags.DEFINE_bool("compress", False, "compress the ModelCIF file using Gzip") +flags.mark_flags_as_required(["ap_output", "monomer_objects_dir"]) FLAGS = flags.FLAGS @@ -535,19 +545,19 @@ def _get_model_details(cmplx_name: str, data_json: dict) -> str: def _get_feature_metadata( - modelcif_json: dict, cmplx_name: str, prj_dir: str + modelcif_json: dict, + cmplx_name: str, + monomer_objects_dir: list, ) -> list: """Read metadata from a feature JSON file.""" cmplx_name = cmplx_name.split("_and_") - prj_dir = os.path.join(prj_dir, "features_monomers") - if not os.path.isdir(prj_dir): - logging.info(f"No feature directory '{prj_dir}' found.") - sys.exit() + mnmr_obj_fls = make_dir_monomer_dictionary(monomer_objects_dir) if "__meta__" not in modelcif_json: modelcif_json["__meta__"] = {} for mnmr in cmplx_name: modelcif_json["__meta__"][mnmr] = {} - feature_json = os.path.join(prj_dir, f"{mnmr}_feature_metadata.json") + feature_json = f"{mnmr}_feature_metadata.json" + feature_json = os.path.join(mnmr_obj_fls[feature_json], feature_json) if not os.path.isfile(feature_json): logging.info(f"No feature metadata file '{feature_json}' found.") sys.exit() @@ -826,6 +836,7 @@ def alphapulldown_model_to_modelcif( mdl: tuple, out_dir: str, prj_dir: str, + monomer_objects_dir: list, compress: bool = False, ) -> None: """Convert an AlphaPulldown model into a ModelCIF formatted mmCIF file. @@ -836,7 +847,9 @@ def alphapulldown_model_to_modelcif( # ToDo: ENABLE logging.info(f"Processing '{mdl[0]}'...") modelcif_json = {} # fetch metadata - cmplx_name = _get_feature_metadata(modelcif_json, cmplx_name, prj_dir) + cmplx_name = _get_feature_metadata( + modelcif_json, cmplx_name, monomer_objects_dir + ) # fetch/ assemble more data about the modelling experiment _get_model_info( modelcif_json, @@ -963,6 +976,7 @@ def main(argv): mdl, model_dir, FLAGS.ap_output, + FLAGS.monomer_objects_dir, FLAGS.compress, )