diff --git a/translate2modelcif.py b/translate2modelcif.py index 50ffdbb80fe082c921293c72ebc71971a9b3e2f0..214477e542af8e2ebbd0de03c701a809f4c55833 100644 --- a/translate2modelcif.py +++ b/translate2modelcif.py @@ -1,12 +1,17 @@ #! /usr/local/bin/ost """Translate models from Tara/ Xabi from PDB + extra data into ModelCIF.""" -# ToDo [internal]: get DB versions in - https://colabfold.mmseqs.com, scroll -# down to "Database Information" + +# EXAMPLES for running: +""" +ost scripts/translate2modelcif.py "A0A1B0GTU1-O75152" \ + --top_ranked_only --out_dir="./modelcif" +""" import argparse import datetime import os import sys +import gzip, shutil, zipfile from timeit import default_timer as timer import numpy as np @@ -14,6 +19,7 @@ import requests import ujson as json import ihm +import ihm.citations import modelcif import modelcif.associated import modelcif.dumper @@ -38,6 +44,27 @@ def _parse_args(): help="Directory with model(s) to be translated. Must be of form " + "'<UniProtKB AC>-<UniProtKB AC>'", ) + parser.add_argument( + "--top_ranked_only", + default=False, + action="store_true", + help="Only process top ranked model." + ) + parser.add_argument( + "--out_dir", + type=str, + metavar="<OUTPUT DIR>", + default="", + help="Path to separate path to store results " \ + "(model_dir used, if none given).", + ) + parser.add_argument( + "--compress", + default=False, + action="store_true", + help="Compress ModelCIF file with gzip " \ + "(note that QA file is zipped either way).", + ) opts = parser.parse_args() @@ -48,42 +75,49 @@ def _parse_args(): _abort_msg(f"Model directory '{opts.model_dir}' does not exist.") if not os.path.isdir(opts.model_dir): _abort_msg(f"Path '{opts.model_dir}' does not point to a directory.") + # check out_dir + if not opts.out_dir: + opts.out_dir = opts.model_dir + else: + if not os.path.exists(opts.out_dir): + _abort_msg(f"Output directory '{opts.out_dir}' does not exist.") + if not os.path.isdir(opts.out_dir): + _abort_msg(f"Path '{opts.out_dir}' does not point to a directory.") return opts # pylint: disable=too-few-public-methods class _GlobalPTM(modelcif.qa_metric.Global, modelcif.qa_metric.PTM): - """Predicted accuracy according to the TM-score score in [0,1].""" + """Predicted accuracy according to the TM-score score in [0,1]""" name = "pTM" software = None class _GlobalPLDDT(modelcif.qa_metric.Global, modelcif.qa_metric.PLDDT): - """Predicted accuracy according to the CA-only lDDT in [0,100].""" + """Predicted accuracy according to the CA-only lDDT in [0,100]""" name = "pLDDT" software = None class _LocalPLDDT(modelcif.qa_metric.Local, modelcif.qa_metric.PLDDT): - """Predicted accuracy according to the CA-only lDDT in [0,100].""" + """Predicted accuracy according to the CA-only lDDT in [0,100]""" name = "pLDDT" software = None class _PAE(modelcif.qa_metric.MetricType): - """Predicted aligned error (in Angstroms). - See :class:`MetricType` for more information.""" + """Predicted aligned error (in Angstroms)""" type = "PAE" other_details = None class _LocalPairwisePAE(modelcif.qa_metric.LocalPairwise, _PAE): - """predicted aligned error (in Angstroms).""" + """Predicted aligned error (in Angstroms)""" name = "PAE" software = None @@ -118,7 +152,7 @@ class _OST2ModelCIF(modelcif.model.AbInitioModel): occupancy=atm.occupancy, ) - def add_scores(self, scores_json, entry_id, ac_file_prfx): + def add_scores(self, scores_json, entry_id, mdl_name): """Add QA metrics from AF2 scores.""" # global scores self.qa_metrics.extend( @@ -162,25 +196,28 @@ class _OST2ModelCIF(modelcif.model.AbInitioModel): self.qa_metrics.extend(lpae) - ac_file = f"{os.path.basename(ac_file_prfx)}_local_pairwise_qa.cif" + ac_file = f"{mdl_name}_local_pairwise_qa.cif" + qa_file = modelcif.associated.LocalPairwiseQAScoresFile( + ac_file, + categories=["_ma_qa_metric_local_pairwise"], + copy_categories=["_ma_qa_metric"], + entry_id=entry_id, + entry_details="This file is an associated file consisting " + + "of local pairwise QA metrics. This is a partial mmCIF " + + "file and can be validated by merging with the main " + + "mmCIF file containing the model coordinates and other " + + "associated data.", + details="Predicted aligned error", + ) return modelcif.associated.Repository( "", [ - modelcif.associated.LocalPairwiseQAScoresFile( - ac_file, - categories=["_ma_qa_metric_local_pairwise"], - copy_categories=["_ma_qa_metric"], - entry_id=entry_id, - entry_details="This file is an associated file consisting " - + "of local pairwise QA metrics. This is a partial mmCIF " - + "file and can be validated by merging with the main " - + "mmCIF file containing the model coordinates and other " - + "associated data.", - details="Predicted aligned error.", - ) + modelcif.associated.ZipFile(f"{mdl_name}.zip", + files=[qa_file]) ], ) + # NOTE: by convention MA expects zip file with same name as model-cif def _abort_msg(msg, exit_code=1): @@ -220,29 +257,131 @@ def _get_audit_authors(): """Return the list of authors that produced this model.""" # ToDo: tell Xabi that his name can't have a รก in mmCIF return ( - "Bartolec T", - "Vazquez-Campos X", - "Johnson M", - "Norman A", - "Payne R", - "Wilkins M", - "Mackay J", - "Low J", + "Bartolec, T.", + "Vazquez-Campos, X.", + "Johnson, M.", + "Norman, A.", + "Payne, R.", + "Wilkins, M.", + "Mackay, J.", + "Low, J.", ) -def _get_protocol_steps_and_software(cnfg_file): +def _parse_colabfold_config(cnfg_file): + """Read config.json and fetch relevant data from it.""" + # NOTE: following code from https://github.com/sokrypton/ColabFold/blob/main/colabfold/batch.py to understand config + + # fetch and drop fields which are not relevant for model building + with open(cnfg_file, encoding="utf8") as jfh: + cf_config = json.load(jfh) + if "num_queries" in cf_config: + del cf_config["num_queries"] + # fetch relevant data + # -> MSA mode + if cf_config["msa_mode"] == "MMseqs2 (UniRef+Environmental)": + seq_dbs = ["UniRef", "Environmental"] + use_mmseqs = True + use_msa = True + elif cf_config["msa_mode"] == "MMseqs2 (UniRef only)": + seq_dbs = ["UniRef"] + use_mmseqs = True + use_msa = True + elif cf_config["msa_mode"] == "single_sequence": + seq_dbs = [] + use_mmseqs = False + use_msa = False + elif cf_config["msa_mode"] == "custom": + print("WARNING: Custom MSA mode used. Not clear from config what to do here!") + seq_dbs = [] + use_mmseqs = False + use_msa = True + else: + raise ValueError(f"Unknown msa_mode {cf_config['msa_mode']}") + # -> model type + if cf_config["model_type"] == "AlphaFold2-multimer-v1": + # AF-Multimer as introduced in AlphaFold v2.1.0 + use_multimer = True + multimer_version = 1 + elif cf_config["model_type"] == "AlphaFold2-multimer-v2": + # AF-Multimer as introduced in AlphaFold v2.2.0 + use_multimer = True + multimer_version = 2 + elif cf_config["model_type"] == "AlphaFold2-ptm": + use_multimer = False + multimer_version = None + else: + raise ValueError(f"Unknown model_type {cf_config['model_type']}") + + # write description + description = f"Model generated using ColabFold v{cf_config['version']}" + if use_multimer: + description += f" with AlphaFold-Multimer (v{multimer_version})" + else: + description += f" with AlphaFold" + description += f" producing {cf_config['num_models']} models" \ + f" with {cf_config['num_recycles']} recycles each" + if cf_config["use_amber"]: + description += ", with AMBER relaxation" + else: + description += ", without model relaxation" + if cf_config["use_templates"]: + print("WARNING: ColabFold may use PDB70 or custom templates. " \ + "Not clear from config!") + description += ", using templates" + else: + description += ", without templates" + if cf_config["rank_by"] == "plddt": + description += ", ranked by pLDDT" + elif cf_config["rank_by"] == "ptmscore": + description += ", ranked by pTM" + elif cf_config["rank_by"] == "multimer": + description += ", ranked by ipTM*0.8+pTM*0.2" + else: + raise ValueError(f"Unknown rank_by {cf_config['rank_by']}") + if use_msa: + description += ", starting from" + if use_mmseqs: + msa_type = "MSA" + else: + msa_type = "custom MSA" + if use_multimer: + if cf_config["pair_mode"] == "unpaired+paired": + description += f" paired and unpaired {msa_type}s" + elif cf_config["pair_mode"] == "paired": + description += f" paired {msa_type}s" + elif cf_config["pair_mode"] == "unpaired": + description += f" unpaired {msa_type}s" + else: + raise ValueError(f"Unknown pair_mode {cf_config['pair_mode']}") + else: + description += f" an {msa_type}" + if use_mmseqs: + description += f" from MMseqs2 ({'+'.join(seq_dbs)})" + else: + description += " without an MSA" + description += "." + + return { + "config": cf_config, + "seq_dbs": seq_dbs, + "use_mmseqs": use_mmseqs, + "use_msa": use_msa, + "use_multimer": use_multimer, + "multimer_version": multimer_version, + "description": description + } + + +def _get_protocol_steps_and_software(config_data): """Create the list of protocol steps with software and parameters used.""" protocol = [] # modelling step step = { "method_type": "modeling", - "name": "ma_protocol_step.step_name", - "details": "Model using AlphaFold-Multimer (AlphaFold v2.2.0), " - + "without amber relaxation and producing 5 models with up to 3 " - + "recycles each, starting from paired and unparied MSAs for the " - + "dimers using MMseqs2.", + "name": None, + "details": config_data["description"], } # get input data # Must refer to data already in the JSON, so we try keywords @@ -255,116 +394,116 @@ def _get_protocol_steps_and_software(cnfg_file): { "name": "ColabFold", "classification": "model building", - # ToDo: Get description for ColabFold - "description": "software.description", - "citation": { - "pmid": None, - "title": "ColabFold - Making protein folding accessible to all", - "journal": "bioRxiv", - "volume": None, - "page_range": None, - "year": 2022, - "authors": [ - "Mirdita M", - "Schuetze K", - "Moriwaki Y", - "Heo L", - "Ovchinnikov S", - "Steinegger M", - ], - "doi": "10.1101/2021.08.15.456425", - }, + "description": "Structure prediction", + "citation": ihm.citations.colabfold, "location": "https://github.com/sokrypton/ColabFold", "type": "package", "version": "1.2.0", - }, - { + }] + if config_data["use_mmseqs"]: + step["software"].append({ "name": "MMseqs2", "classification": "data collection", "description": "Many-against-Many sequence searching", - "citation": { - "pmid": "30615063", - "title": "MMseqs2 desktop and local web server app for fast, " - + "interactive sequence searches", - "journal": "Bioinformatics", - "volume": 35, - "page_range": (2856, 2858), - "year": 2019, - "authors": [ - "Mirdita M", - "Steinegger M", - "Soeding J", + "citation": ihm.Citation( + pmid="30615063", + title="MMseqs2 desktop and local web server app for fast, " + + "interactive sequence searches.", + journal="Bioinformatics", + volume=35, + page_range=(2856, 2858), + year=2019, + authors=[ + "Mirdita, M.", + "Steinegger, M.", + "Soeding, J.", ], - "doi": "10.1093/bioinformatics/bty1057", - }, + doi="10.1093/bioinformatics/bty1057", + ), "location": "https://github.com/soedinglab/mmseqs2", "type": "package", "version": None, - }, - { + }) + if config_data["use_multimer"]: + step["software"].append({ "name": "AlphaFold-Multimer", "classification": "model building", "description": "Structure prediction", - "citation": { - "pmid": None, - "title": "Protein complex prediction with " + "citation": ihm.Citation( + pmid=None, + title="Protein complex prediction with " + "AlphaFold-Multimer.", - "journal": "bioRxiv", - "volume": None, - "page_range": None, - "year": 2021, - "authors": [ - "Evans R", - "O'Neill M", - "Pritzel A", - "Antropova N", - "Senior A", - "Green T", - "Zidek A", - "Bates R", - "Blackwell S", - "Yim J", - "Ronneberger O", - "Bodenstein S", - "Zielinski M", - "Bridgland A", - "Potapenko A", - "Cowie A", - "Tunyasuvunakool K", - "Jain R", - "Clancy E", - "Kohli P", - "Jumper J", - "Hassabis D", + journal="bioRxiv", + volume=None, + page_range=None, + year=2021, + authors=[ + "Evans, R.", + "O'Neill, M.", + "Pritzel, A.", + "Antropova, N.", + "Senior, A.", + "Green, T.", + "Zidek, A.", + "Bates, R.", + "Blackwell, S.", + "Yim, J.", + "Ronneberger, O.", + "Bodenstein, S.", + "Zielinski, M.", + "Bridgland, A.", + "Potapenko, A.", + "Cowie, A.", + "Tunyasuvunakool, K.", + "Jain, R.", + "Clancy, E.", + "Kohli, P.", + "Jumper, J.", + "Hassabis, D.", ], - "doi": "10.1101/2021.10.04.463034", - }, + doi="10.1101/2021.10.04.463034", + ), "location": "https://github.com/deepmind/alphafold", "type": "package", - "version": "2.1.1", - }, - ] - # get parameters - with open(cnfg_file, encoding="utf8") as jfh: - step["software_parameters"] = json.load(jfh) + "version": None, + }) + else: + step["software"].append({ + "name": "AlphaFold", + "classification": "model building", + "description": "Structure prediction", + "citation": ihm.citations.alphafold2, + "location": "https://github.com/deepmind/alphafold", + "type": "package", + "version": None, + }) + step["software_parameters"] = config_data["config"] protocol.append(step) # model selection step # ToDo [input/ internal]: model selection step on a single model is a bit # silly, how do we get a list of models? - step = { - "method_type": "model selection", - "name": "ma_protocol_step.step_name", - "details": "Select best model, which is either the top-ranked model " - + "as determined by the ColabFold pipeline " - + "(iptmscore*0.8+ptmscore*0.2), or else the model with best " - + "congruence with crosslinks reported in the related study.", - } - step["input"] = "model" - step["output"] = "model" - step["software"] = [] - step["software_parameters"] = {} - protocol.append(step) + # GT-NOTES: + # - input/output should be ok without list of models + # - rank of model is already stored in _ma_model_list.model_name and + # _ma_data.name (in _store_as_modelcif) + # - ColabFold ranking details is already in details of step above. + # - Suggestion: add extra step only if AF-ranking was overruled and + # include it in step above. + + # step = { + # "method_type": "model selection", + # "name": "ma_protocol_step.step_name", + # "details": "Select best model, which is either the top-ranked model " + # + "as determined by the ColabFold pipeline " + # + "(iptmscore*0.8+ptmscore*0.2), or else the model with best " + # + "congruence with crosslinks reported in the related study.", + # } + # step["input"] = "model" + # step["output"] = "model" + # step["software"] = [] + # step["software_parameters"] = {} + # protocol.append(step) return protocol @@ -387,7 +526,7 @@ def _get_model_details(gene_names): def _get_model_group_name(): """Get a name for a model group.""" - return "Crosslinked Heterodimer ALphaFold-Multimer v2 Models" + return "Crosslinked Heterodimer AlphaFold-Multimer v2 Models" def _get_sequence(chn): @@ -529,7 +668,8 @@ def _get_entities(pdb_file, up_acs): upkb = _get_upkb_for_sequence(sqe, up_acs[i]) cif_ent["pdb_sequence"] = sqe cif_ent["pdb_chain_id"] = chn.name - cif_ent["description"] = f"Model of {upkb['up_gn']} ({upkb['up_ac']})" + cif_ent["description"] = f"{upkb['up_organism']} {upkb['up_gn']} " \ + f"({upkb['up_ac']})" cif_ent.update(upkb) entities.append(cif_ent) @@ -542,8 +682,8 @@ def _get_scores(data, prfx): with open(scrs_fle, encoding="utf8") as jfh: scrs_json = json.load(jfh) - # ToDo: is dict.update still the way to go when iterating multiple model - # directories? Aka, does dict.update overwrite old scores? + # NOTE for reuse of data when iterating multiple models: this will overwrite + # scores in data but will not delete any scores if prev. models had more... data.update(scrs_json) @@ -570,10 +710,8 @@ def _get_modelcif_entities(target_ents, source, asym_units, system): ) ], ) - # ToDo [input]: Add details asym_units[cif_ent["pdb_chain_id"]] = modelcif.AsymUnit( - mdlcif_ent, - details="struct_asym.details", + mdlcif_ent ) system.target_entities.append(mdlcif_ent) @@ -587,25 +725,34 @@ def _assemble_modelcif_software(soft_dict): soft_dict["location"], soft_dict["type"], soft_dict["version"], - citation=ihm.Citation( - pmid=soft_dict["citation"]["pmid"], - title=soft_dict["citation"]["title"], - journal=soft_dict["citation"]["journal"], - volume=soft_dict["citation"]["volume"], - page_range=soft_dict["citation"]["page_range"], - year=soft_dict["citation"]["year"], - authors=soft_dict["citation"]["authors"], - doi=soft_dict["citation"]["doi"], - ), + citation=soft_dict["citation"] ) -def _get_modelcif_protocol(protocol_steps, target_entities, model): +def _get_sequence_dbs(seq_dbs): + """Get ColabFold seq. DBs.""" + # NOTE: hard coded for ColabFold versions before 2022/07/13 + # -> afterwards UniRef30 updated to 2022_02 (and maybe more changes) + db_dict = { + "UniRef": modelcif.ReferenceDatabase( + "UniRef30", + "http://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2103.tar.gz", + version="2021_03" + ), + "Environmental": modelcif.ReferenceDatabase( + "ColabFold DB", + "http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz", + version="2021_08" + ) + } + return [db_dict[seq_db] for seq_db in seq_dbs] + + +def _get_modelcif_protocol(protocol_steps, target_entities, model, ref_dbs): """Create the protocol for the ModelCIF file.""" protocol = modelcif.protocol.Protocol() for js_step in protocol_steps: sftwre = None - # ToDo [input]: Turn into software group if parameters are available if js_step["software"]: if len(js_step["software"]) == 1: sftwre = _assemble_modelcif_software(js_step["software"][0]) @@ -616,7 +763,6 @@ def _get_modelcif_protocol(protocol_steps, target_entities, model): sftwre = modelcif.SoftwareGroup(elements=sftwre) if js_step["software_parameters"]: params = [] - # ToDo [internal]: handle lists! for k, v in js_step["software_parameters"].items(): params.append( modelcif.SoftwareParameter(k, v) @@ -630,7 +776,7 @@ def _get_modelcif_protocol(protocol_steps, target_entities, model): if js_step["input"] == "target_sequences": input_data = modelcif.data.DataGroup(target_entities) - # ToDo: Add databases + versions + input_data.extend(ref_dbs) elif js_step["input"] == "model": input_data = model else: @@ -655,7 +801,28 @@ def _get_modelcif_protocol(protocol_steps, target_entities, model): return protocol -def _store_as_modelcif(interaction_name, data_json, ost_ent, file_prfx): +def _compress_cif_file(cif_file): + """Compress cif file and delete original.""" + with open(cif_file, 'rb') as f_in: + with gzip.open(cif_file + '.gz', 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + os.remove(cif_file) + + +def _package_associated_files(mdl_name): + """Compress associated files into single zip file and delete original.""" + # file names must match ones from add_scores + zip_path = f"{mdl_name}.zip" + files = [f"{mdl_name}_local_pairwise_qa.cif"] + # zip settings tested for good speed vs compression + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_BZIP2) as myzip: + for file in files: + myzip.write(file) + os.remove(file) + + +def _store_as_modelcif(interaction_name, data_json, ost_ent, out_dir, file_prfx, + compress): """Mix all the data into a ModelCIF file.""" print(" generating ModelCIF objects...", end="") pstart = timer() @@ -678,27 +845,31 @@ def _store_as_modelcif(interaction_name, data_json, ost_ent, file_prfx): data_json["target_entities"], source, asym_units, system ) - # ToDo [input]: Get Assembly name assembly = modelcif.Assembly( - asym_units.values(), name="ma_struct_assembly_details.assembly_name" + asym_units.values() ) # audit_authors system.authors.extend(data_json["audit_authors"]) # set up the model to produce coordinates - # ToDo [input]: Get ma_model_list.model_name + if data_json["rank_num"] == 1: + mdl_list_name = f"Model {data_json['mdl_num']} (top ranked model)" + else: + mdl_list_name = f"Model {data_json['mdl_num']} " \ + f"(#{data_json['rank_num']} ranked model)" model = _OST2ModelCIF( assembly=assembly, asym=asym_units, ost_entity=ost_ent, - name="ma_model_list.model_name", + name=mdl_list_name, ) print(f" ({timer()-pstart:.2f}s)") print(" processing QA scores...", end="", flush=True) pstart = timer() + mdl_name = os.path.basename(file_prfx) system.repositories.append( - model.add_scores(data_json, system.id, file_prfx) + model.add_scores(data_json, system.id, mdl_name) ) print(f" ({timer()-pstart:.2f}s)") @@ -707,26 +878,39 @@ def _store_as_modelcif(interaction_name, data_json, ost_ent, file_prfx): ) system.model_groups.append(model_group) + ref_dbs = _get_sequence_dbs(data_json["config_data"]["seq_dbs"]) protocol = _get_modelcif_protocol( - data_json["protocol"], system.target_entities, model + data_json["protocol"], system.target_entities, model, ref_dbs ) system.protocols.append(protocol) # write modelcif System to file print(" write to disk...", end="", flush=True) pstart = timer() - with open(f"{file_prfx}.cif", "w", encoding="ascii") as mmcif_fh: - modelcif.dumper.write(mmcif_fh, [system]) + # NOTE: this will dump PAE on path provided in add_scores + # -> hence we cheat by changing path and back while being exception-safe... + oldpwd = os.getcwd() + os.chdir(out_dir) + try: + with open(f"{mdl_name}.cif", "w", encoding="ascii") as mmcif_fh: + modelcif.dumper.write(mmcif_fh, [system]) + _package_associated_files(mdl_name) + if compress: + _compress_cif_file(f"{mdl_name}.cif") + finally: + os.chdir(oldpwd) + print(f" ({timer()-pstart:.2f}s)") -def _create_interaction_json(cnfg_file): +def _create_interaction_json(config_data): """Create a dictionary (mimicking JSON) that contains data which is the same for all models.""" data = {} data["audit_authors"] = _get_audit_authors() - data["protocol"] = _get_protocol_steps_and_software(cnfg_file) + data["protocol"] = _get_protocol_steps_and_software(config_data) + data["config_data"] = config_data return data @@ -756,14 +940,15 @@ def _main(): up_acs = interaction.split("-") cnfg = _check_interaction_extra_files_present(opts.model_dir) - - mdlcf_json = _create_interaction_json(cnfg) + config_data = _parse_colabfold_config(cnfg) # iterate model directory - for fle in os.listdir(opts.model_dir): + for fle in sorted(os.listdir(opts.model_dir)): # iterate PDB files if not fle.endswith(".pdb"): continue + if opts.top_ranked_only and "rank_1" not in fle: + continue print(f" translating {fle}...") pdb_start = timer() file_prfx, uid = _check_model_extra_files_present(opts.model_dir, fle) @@ -772,14 +957,26 @@ def _main(): # gather data into JSON-like structure print(" preparing data...", end="") pstart = timer() + + # NOTE: could also be prepared globally if all carefully overwritten + # but not worth the trouble... + mdlcf_json = _create_interaction_json(config_data) + + # uid = ..._rank_X_model_Y.pdb + mdl_name_parts = uid.split('_') + assert mdl_name_parts[-4] == "rank" + assert mdl_name_parts[-2] == "model" + mdlcf_json["rank_num"] = int(mdl_name_parts[-3]) + mdlcf_json["mdl_num"] = int(mdl_name_parts[-1]) + ost_ent = _create_model_json(mdlcf_json, fle, up_acs) # read quality scores from JSON file _get_scores(mdlcf_json, file_prfx) print(f" ({timer()-pstart:.2f}s)") - _store_as_modelcif(uid, mdlcf_json, ost_ent, file_prfx) - # ToDo [internal]: wipe data or is it overwritten in mdlcf_json? + _store_as_modelcif(uid, mdlcf_json, ost_ent, opts.out_dir, file_prfx, + opts.compress) print(f" ... done with {fle} ({timer()-pdb_start:.2f}s).") print(f"... done with {opts.model_dir}.")