diff --git a/translate2modelcif.py b/translate2modelcif.py index e76380b47f69cd8c3f9d662cda40d75b2a9ffeef..4f5dbb70c4e25a5d7c4264c2e27a8a97b25833ab 100644 --- a/translate2modelcif.py +++ b/translate2modelcif.py @@ -228,6 +228,11 @@ def _abort_msg(msg, exit_code=1): sys.exit(exit_code) +def _warn_msg(msg): + """Write a warning message to stdout.""" + print(f"WARNING: {msg}") + + def _check_file(file_path): """Make sure a file exists and is actually a file.""" if not os.path.exists(file_path): @@ -292,9 +297,8 @@ def _parse_colabfold_config(cnfg_file): use_mmseqs = False use_msa = False elif cf_config["msa_mode"] == "custom": - print( - "WARNING: Custom MSA mode used. Not clear from config what to do " - + "here!" + _warn_msg( + "Custom MSA mode used. Not clear from config what to do here!" ) seq_dbs = [] use_mmseqs = False @@ -336,9 +340,9 @@ def _parse_colabfold_config(cnfg_file): else: mdl_description += ", without model relaxation" if cf_config["use_templates"]: - print( - "WARNING: ColabFold may use PDB70 or custom templates. " - "Not clear from config!" + _warn_msg( + "ColabFold may use PDB70 or custom templates. Not clear " + + "from config!" ) mdl_description += ", using templates" else: @@ -557,10 +561,10 @@ def _check_sequence(up_ac, sequence): for i, res in enumerate(sequence): if res not in "ACDEFGHIKLMNPQRSTVWY": if res == "U": - print( - f"WARNING: Selenocysteine found at position {i+1} of " - f"entry '{up_ac}', this residue may be missing in the " - "model." + _warn_msg( + f"Selenocysteine found at position {i+1} of entry " + + f"'{up_ac}', this residue may be missing in the " + + "model." ) ns_aa_pos.append(i) continue @@ -636,7 +640,11 @@ def _get_n_parse_up_entry(up_ac, up_url): data["up_isoform"] = None if "up_gn" not in data: - _abort_msg(f"No gene name found for UniProtKB entry '{up_ac}'.") + _warn_msg( + f"No gene name found for UniProtKB entry '{up_ac}', using " + + "UniProtKB AC instead." + ) + data["up_gn"] = up_ac if "up_last_mod" not in data: _abort_msg(f"No sequence version found for UniProtKB entry '{up_ac}'.") if "up_crc64" not in data: @@ -688,7 +696,7 @@ def _cmp_sequences(mdl, upkb, ns_aa_pos): f"Position {pos+1} of non-canonical amino acid should be " "a gap!" ) - mdl = mdl[0:pos] + "U" + mdl[pos+1:] + mdl = mdl[0:pos] + "U" + mdl[pos + 1 :] return mdl == upkb