diff --git a/projects/PP2A-B55-design/translate2modelcif.py b/projects/PP2A-B55-design/translate2modelcif.py index 045c8dfb561ae234d4493dc12657b125b1cd5e1e..725d7976725b1804c136490daaef5ce1ca5d1cc0 100644 --- a/projects/PP2A-B55-design/translate2modelcif.py +++ b/projects/PP2A-B55-design/translate2modelcif.py @@ -1,7 +1,7 @@ #! /usr/local/bin/ost # -*- coding: utf-8 -*- -"""Translate PRC models for Juntao from PDB + extra data into ModelCIF.""" +"""Translate PP2A-B55 models for Julia from PDB + extra data into ModelCIF.""" # EXAMPLES for running: # ost translate2modelcif.py ./modelarchive_submission ./modelcif @@ -32,9 +32,6 @@ import pandas as pd from ost import io, seq -# In[2]: - - ################################################################################ # GENERAL HELPER FUNCTIONS ################################################################################ @@ -104,9 +101,6 @@ def _get_sequence(chn, use_auth=False): ################################################################################ -# In[3]: - - ################################################################################ # DATA HANDLING ################################################################################ @@ -310,8 +304,6 @@ def _get_n_parse_up_entry(up_ac, up_txt_path): + f"UniProtKB entry '{up_ac}': {data['up_seqlen']} != " + f"{len(data['up_sequence'])}" ) - data["up_ns_aa"] = _check_sequence(data["up_ac"], data["up_sequence"]) - if "up_id" not in data: raise RuntimeError(f"No ID found for UniProtKB entry '{up_ac}'.") if "up_ncbi_taxid" not in data: @@ -340,26 +332,6 @@ def _fetch_unisave_entry(up_ac, version): ) -def _check_sequence(up_ac, sequence): - """Verify sequence to only contain standard olc.""" - ns_aa_pos = [] # positions of non-standard amino acids - for i, res in enumerate(sequence): - if res not in "ACDEFGHIKLMNPQRSTVWY": - if res == "U": - _warn_msg( - f"Selenocysteine found at position {i+1} of entry " - + f"'{up_ac}', this residue may be missing in the " - + "model." - ) - ns_aa_pos.append(i) - continue - raise RuntimeError( - "Non-standard aa found in UniProtKB sequence " - + f"for entry '{up_ac}': {res}, position {i+1}" - ) - return ns_aa_pos - - # for cache below upkb_entry_cache = {} # key = (up_ac, up_version, mdl_sequence) def _fetch_upkb_cached(sqe, up_ac, up_version=None): @@ -1045,9 +1017,6 @@ def _get_model_details(metadata): ################################################################################ -# In[4]: - - ################################################################################ # ModelCIF HANDLING ################################################################################ @@ -1325,31 +1294,6 @@ def _get_assoc_png_file(fle_path, png_type): return afile -def _get_assoc_mdl_file(fle_path, data_json): - """Generate a modelcif.associated.File object that looks like a CIF file. - The dedicated CIFFile functionality in modelcif would also try to write it. - """ - cfile = modelcif.associated.File( - fle_path, - details=f"models-details", - ) - cfile.file_format = "cif" - return cfile - - -def _get_assoc_zip_file(fle_path, data_json): - """Create a modelcif.associated.File object that looks like a ZIP file. - This is NOT the archive ZIP file for the PAEs but to store that in the - ZIP archive of the selected model.""" - zfile = modelcif.associated.File( - fle_path, - details="archive with multiple files for " - + f"#{data_json['mdl_name']}", - ) - zfile.file_format = "other" - return zfile - - def _get_associated_files(mdl_name, arc_files): """Create entry for associated files.""" # package all into zip file @@ -1597,9 +1541,6 @@ def _store_as_modelcif(data_json, ost_ent, out_dir, mdl_name, compress, add_pae, ################################################################################ -# In[5]: - - ################################################################################ # HANDLE FULL DATA SET ################################################################################ @@ -1769,9 +1710,6 @@ def _translate2modelcif(metadata, opts, add_files=[]): return issues -# In[6]: - - def _get_metadata(input_data_path, single_model=None): metadata_csv = pd.read_csv(os.path.join( @@ -1803,8 +1741,8 @@ def _get_metadata(input_data_path, single_model=None): chain = { "chain": chain_id, "up_ac": None, - "up_range": None, - "is_synthetic_construct": True + "up_range": None, + "is_synthetic_construct": True } else: if ':' in chain_data: @@ -1854,9 +1792,6 @@ def _get_metadata(input_data_path, single_model=None): return metadata_full -# In[7]: - - ################################################################################ # HANDLE ONE MODEL IN A NOTEBOOK ################################################################################