Skip to content
Snippets Groups Projects
Commit 3fd151a3 authored by B13nch3n's avatar B13nch3n
Browse files

Update translationscript

parent de569684
Branches
No related tags found
No related merge requests found
......@@ -228,6 +228,11 @@ def _abort_msg(msg, exit_code=1):
sys.exit(exit_code)
def _warn_msg(msg):
"""Write a warning message to stdout."""
print(f"WARNING: {msg}")
def _check_file(file_path):
"""Make sure a file exists and is actually a file."""
if not os.path.exists(file_path):
......@@ -292,9 +297,8 @@ def _parse_colabfold_config(cnfg_file):
use_mmseqs = False
use_msa = False
elif cf_config["msa_mode"] == "custom":
print(
"WARNING: Custom MSA mode used. Not clear from config what to do "
+ "here!"
_warn_msg(
"Custom MSA mode used. Not clear from config what to do here!"
)
seq_dbs = []
use_mmseqs = False
......@@ -336,9 +340,9 @@ def _parse_colabfold_config(cnfg_file):
else:
mdl_description += ", without model relaxation"
if cf_config["use_templates"]:
print(
"WARNING: ColabFold may use PDB70 or custom templates. "
"Not clear from config!"
_warn_msg(
"ColabFold may use PDB70 or custom templates. Not clear "
+ "from config!"
)
mdl_description += ", using templates"
else:
......@@ -553,12 +557,22 @@ def _get_sequence(chn):
def _check_sequence(up_ac, sequence):
"""Verify sequence to only contain standard olc."""
for res in sequence:
ns_aa_pos = [] # positions of non-standard amino acids
for i, res in enumerate(sequence):
if res not in "ACDEFGHIKLMNPQRSTVWY":
if res == "U":
_warn_msg(
f"Selenocysteine found at position {i+1} of entry "
+ f"'{up_ac}', this residue may be missing in the "
+ "model."
)
ns_aa_pos.append(i)
continue
raise RuntimeError(
"Non-standard aa found in UniProtKB sequence "
+ f"for entry '{up_ac}': {res}"
+ f"for entry '{up_ac}': {res}, position {i+1}"
)
return ns_aa_pos
def _get_n_parse_up_entry(up_ac, up_url):
......@@ -626,7 +640,11 @@ def _get_n_parse_up_entry(up_ac, up_url):
data["up_isoform"] = None
if "up_gn" not in data:
_abort_msg(f"No gene name found for UniProtKB entry '{up_ac}'.")
_warn_msg(
f"No gene name found for UniProtKB entry '{up_ac}', using "
+ "UniProtKB AC instead."
)
data["up_gn"] = up_ac
if "up_last_mod" not in data:
_abort_msg(f"No sequence version found for UniProtKB entry '{up_ac}'.")
if "up_crc64" not in data:
......@@ -640,7 +658,7 @@ def _get_n_parse_up_entry(up_ac, up_url):
+ f"UniProtKB entry '{up_ac}': {data['up_seqlen']} != "
+ f"{len(data['up_sequence'])}"
)
_check_sequence(data["up_ac"], data["up_sequence"])
data["up_ns_aa"] = _check_sequence(data["up_ac"], data["up_sequence"])
if "up_id" not in data:
_abort_msg(f"No ID found for UniProtKB entry '{up_ac}'.")
......@@ -668,10 +686,24 @@ def _fetch_unisave_entry(up_ac, version):
)
def _cmp_sequences(mdl, upkb, ns_aa_pos):
"""Compare sequence while paying attention on non-standard amino acids."""
# We add a U to the sequence when necessary. AF2 does not model it. The PDB
# has selenocysteine as canonical aa, see PDB entry 7Z0T.
for pos in ns_aa_pos:
if mdl[pos] != "-":
_abort_msg(
f"Position {pos+1} of non-canonical amino acid should be "
"a gap!"
)
mdl = mdl[0:pos] + "U" + mdl[pos + 1 :]
return mdl == upkb
def _get_upkb_for_sequence(sqe, up_ac):
"""Get UniProtKB entry data for given sequence."""
up_data = _fetch_upkb_entry(up_ac)
while sqe != up_data["up_sequence"]:
while not _cmp_sequences(sqe, up_data["up_sequence"], up_data["up_ns_aa"]):
if up_data["up_entry_version"] > 1:
up_data = _fetch_unisave_entry(
up_ac, up_data["up_entry_version"] - 1
......@@ -732,7 +764,10 @@ def _get_modelcif_entities(target_ents, source, asym_units, system):
"""Create ModelCIF entities and asymmetric units."""
for cif_ent in target_ents:
mdlcif_ent = modelcif.Entity(
cif_ent["pdb_sequence"],
# Use up_sequence above pdb_sequence as it contains no gaps but
# otherwise is equal to the pdb_sequence. Gaps in pdb_sequence
# originate from missing residues in the model.
cif_ent["up_sequence"],
description=cif_ent["description"],
source=source,
references=[
......@@ -1111,3 +1146,5 @@ def _main():
if __name__ == "__main__":
_main()
# LocalWords: aa pdb
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment