Skip to content
Snippets Groups Projects
Commit 056b6d8d authored by B13nch3n's avatar B13nch3n
Browse files

On missing gene name for an UniProtKB entry, use the AC.

parent 0b4c172c
No related branches found
No related tags found
No related merge requests found
......@@ -228,6 +228,11 @@ def _abort_msg(msg, exit_code=1):
sys.exit(exit_code)
def _warn_msg(msg):
"""Write a warning message to stdout."""
print(f"WARNING: {msg}")
def _check_file(file_path):
"""Make sure a file exists and is actually a file."""
if not os.path.exists(file_path):
......@@ -292,9 +297,8 @@ def _parse_colabfold_config(cnfg_file):
use_mmseqs = False
use_msa = False
elif cf_config["msa_mode"] == "custom":
print(
"WARNING: Custom MSA mode used. Not clear from config what to do "
+ "here!"
_warn_msg(
"Custom MSA mode used. Not clear from config what to do here!"
)
seq_dbs = []
use_mmseqs = False
......@@ -336,9 +340,9 @@ def _parse_colabfold_config(cnfg_file):
else:
mdl_description += ", without model relaxation"
if cf_config["use_templates"]:
print(
"WARNING: ColabFold may use PDB70 or custom templates. "
"Not clear from config!"
_warn_msg(
"ColabFold may use PDB70 or custom templates. Not clear "
+ "from config!"
)
mdl_description += ", using templates"
else:
......@@ -557,10 +561,10 @@ def _check_sequence(up_ac, sequence):
for i, res in enumerate(sequence):
if res not in "ACDEFGHIKLMNPQRSTVWY":
if res == "U":
print(
f"WARNING: Selenocysteine found at position {i+1} of "
f"entry '{up_ac}', this residue may be missing in the "
"model."
_warn_msg(
f"Selenocysteine found at position {i+1} of entry "
+ f"'{up_ac}', this residue may be missing in the "
+ "model."
)
ns_aa_pos.append(i)
continue
......@@ -636,7 +640,11 @@ def _get_n_parse_up_entry(up_ac, up_url):
data["up_isoform"] = None
if "up_gn" not in data:
_abort_msg(f"No gene name found for UniProtKB entry '{up_ac}'.")
_warn_msg(
f"No gene name found for UniProtKB entry '{up_ac}', using "
+ "UniProtKB AC instead."
)
data["up_gn"] = up_ac
if "up_last_mod" not in data:
_abort_msg(f"No sequence version found for UniProtKB entry '{up_ac}'.")
if "up_crc64" not in data:
......@@ -688,7 +696,7 @@ def _cmp_sequences(mdl, upkb, ns_aa_pos):
f"Position {pos+1} of non-canonical amino acid should be "
"a gap!"
)
mdl = mdl[0:pos] + "U" + mdl[pos+1:]
mdl = mdl[0:pos] + "U" + mdl[pos + 1 :]
return mdl == upkb
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment