diff --git a/convert_to_modelcif.py b/convert_to_modelcif.py index 129f0d417ad82cb620f50ead3a93d255ec45c164..398ff2c91aa175b66a5ffc2b21a0d374ca9577bd 100755 --- a/convert_to_modelcif.py +++ b/convert_to_modelcif.py @@ -3,68 +3,76 @@ """Take the output of the AlphaPulldown pipeline and turn it into a ModelCIF file with a lot of metadata in place.""" -from absl import app -from absl import flags +import os +import sys +from absl import app, flags, logging -FLAGS = flags.FLAGS -flags.DEFINE_list( - "metadata", - None, - "JSON files with information about experiment setup, one file per feature", + +# ToDo: Get options properly, best get the same names as used in existing +# scripts, e.g. could '--monomer_objects_dir' be used as feature +# directory/ directory with the feature JSON files? +flags.DEFINE_string( + "ap_output", None, "AlphaPulldown pipeline output directory." ) -flags.DEFINE_string("af2_output", None, "Results of AlphaFold2 modelling") flags.DEFINE_integer( "model_selected", None, "Model to be converted into ModelCIF, use '--select_all' to convert all " + "models found in '--af2_output'", ) -flags.DEFINE_bool( - "select_all", - False, - "Convert all models found in '--af2_output' into ModelCIF, excludes " - + "'--model_selected'", -) +flags.mark_flags_as_required(["ap_output"]) -def _mark_model_selection_as_mutual_exclusive(): - """Create & register a validator for model selection. +FLAGS = flags.FLAGS + +# ToDo: implement a flags.register_validator() checking that files/ directories +# exist as expected. - Enforce only one option is set, either '--select_all' or '--model_selected'. - Enforce that at least one of '--select_all' or '--model_selected' is set - (so this options will not pop up in `flags.mark_flags_as_required`). - """ - def _validate_mutual_exclusion_model_selection(flags_dict): - not_set_count = 0 - if flags_dict["model_selected"] is None: - not_set_count += 1 - if flags_dict["select_all"] is False: - not_set_count += 1 +def alphapulldown_model_to_modelcif() -> None: + """Convert an AlphaPulldown model into a ModelCIF formatted mmCIF file. - if not_set_count == 1: - return True - return False + Metadata for the ModelCIF categories will be fetched from AlphaPulldown + output as far as possible. This expects modelling projects to exists in + AlphaPulldown's output directory structure.""" - flags.register_multi_flags_validator( - ["model_selected", "select_all"], - _validate_mutual_exclusion_model_selection, - "Exactly one (and only one) argument needs to be set.", - ) +def _get_model_list(ap_dir, model_selected) -> list: + """Get the list of models to be converted. -_mark_model_selection_as_mutual_exclusive() -flags.mark_flags_as_required(["metadata", "af2_output"]) -# ToDo: implement a flags.register_validator() for 'metadata', checking that -# the file exists and is readable. -# ToDo: implement a flags.register_validator() for 'af2_output', checking that -# the file directory exists and is readable (overkill: check directory -# structure). + If `model_selected` is none, all models will be marked for conversion.""" + # ToDo: Question - use 'ranked_*.pdb' or + # 'unrelaxed_model_*_multimer_v3_pred_0.pdb' models? + + if model_selected is not None: + mdl_paths = os.path.join(ap_dir, "models") + cmplx = os.listdir(mdl_paths) + # For now, exactly 1 complex is expected in the 'models' subdirectory. + # If there are more, the 'model_selected' mechanism needs to be further + # tuned to get to the right model. + assert len(cmplx) == 1 + cmplx = cmplx[0] + mdl_paths = os.path.join( + mdl_paths, cmplx, f"ranked_{model_selected}.pdb" + ) + mdl_paths = [mdl_paths] + + for mdl in mdl_paths: + if os.path.isfile(mdl): + logging.info( + f"Model file '{mdl}' does not exist or is not a regular file." + ) + sys.exit() + + # check that files actually exist + + return [] def main(argv): """Run as script.""" + # pylint: disable=pointless-string-statement """ Here, the metadata json files for each feature are in features_monomers/ directory. The models are in models/ directory, and usually there are many @@ -81,10 +89,22 @@ def main(argv): AlphaPulldown side and may be added now or later on. Let me know if it is critical for you now. """ + # pylint: enable=pointless-string-statement del argv # Unused. + # make list of selected models + model_conversions = _get_model_list(FLAGS.ap_output, FLAGS.model_selected) + # assemble selected models into ModelCIF files + associated data archives + alphapulldown_model_to_modelcif() + if __name__ == "__main__": app.run(main) -# LocalWords: ToDo +# ToDo: Question - option to include all the non-selected models in associated +# data archive? This blows up storage size (especially if PAEs included), +# but we did that already in the past. Idea is to have all models +# available for... reproducibility and whatnot, but show the selected +# (representative) of the modelling experiment/ study more prominently. + +# LocalWords: ToDo AlphaPulldown PAEs dir