Skip to content
Snippets Groups Projects
Commit 00f02c44 authored by B13nch3n's avatar B13nch3n
Browse files

If model sequence and UniProtKB sequence do not match, go down the UniProtKB entrie's history

parent 97ea50bf
No related merge requests found
......@@ -561,8 +561,8 @@ def _check_sequence(up_ac, sequence):
)
def _fetch_upkb_entry(up_ac):
"""Fetch data for an UniProtKB entry."""
def _get_n_parse_up_entry(up_ac, up_url):
"""Get data for an UniProtKB entry and parse it."""
# This is a simple parser for UniProtKB txt format, instead of breaking it
# up into multiple functions, we just allow many many branches & statements,
# here.
......@@ -571,9 +571,7 @@ def _fetch_upkb_entry(up_ac):
data["up_organism"] = ""
data["up_sequence"] = ""
data["up_ac"] = up_ac
rspns = requests.get(
f"https://rest.uniprot.org/uniprotkb/{up_ac}.txt", timeout=180
)
rspns = requests.get(up_url, timeout=180)
for line in rspns.iter_lines(decode_unicode=True):
if line.startswith("ID "):
sline = line.split()
......@@ -615,6 +613,11 @@ def _fetch_upkb_entry(up_ac):
data["up_last_mod"] = datetime.datetime.strptime(
dt_flds[0], "%d-%b-%Y"
)
elif dt_flds[1].upper().startswith("ENTRY VERSION "):
data["up_entry_version"] = dt_flds[1][len("ENTRY VERSION ") :]
if data["up_entry_version"][-1] == ".":
data["up_entry_version"] = data["up_entry_version"][:-1]
data["up_entry_version"] = int(data["up_entry_version"])
elif line.startswith("GN Name="):
data["up_gn"] = line[len("GN Name=") :].split(";")[0]
data["up_gn"] = data["up_gn"].split("{")[0].strip()
......@@ -648,14 +651,37 @@ def _fetch_upkb_entry(up_ac):
return data
def _fetch_upkb_entry(up_ac):
"""Get an UniProtKB entry."""
return _get_n_parse_up_entry(
up_ac, f"https://rest.uniprot.org/uniprotkb/{up_ac}.txt"
)
def _fetch_unisave_entry(up_ac, version):
"""Get an UniSave entry, in contrast to an UniProtKB entry, that allows us
to specify a version."""
return _get_n_parse_up_entry(
up_ac,
f"https://rest.uniprot.org/unisave/{up_ac}?format=txt&"
+ f"versions={version}",
)
def _get_upkb_for_sequence(sqe, up_ac):
"""Get UniProtKB entry data for given sequence."""
up_data = _fetch_upkb_entry(up_ac)
if sqe != up_data["up_sequence"]:
raise RuntimeError(
f"Sequences not equal from file: {sqe}, from UniProtKB: "
+ f"{up_data['up_sequence']} ({up_ac})"
)
while sqe != up_data["up_sequence"]:
if up_data["up_entry_version"] > 1:
up_data = _fetch_unisave_entry(
up_ac, up_data["up_entry_version"] - 1
)
else:
raise RuntimeError(
f"Sequences not equal from file: {sqe}, from UniProtKB: "
f"{up_data['up_sequence']} ({up_ac}), checked entire entry "
"history."
)
return up_data
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment