Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • schwede/ma-wilkins-import
1 result
Show changes
Commits on Source (2)
......@@ -561,8 +561,8 @@ def _check_sequence(up_ac, sequence):
)
def _fetch_upkb_entry(up_ac):
"""Fetch data for an UniProtKB entry."""
def _get_n_parse_up_entry(up_ac, up_url):
"""Get data for an UniProtKB entry and parse it."""
# This is a simple parser for UniProtKB txt format, instead of breaking it
# up into multiple functions, we just allow many many branches & statements,
# here.
......@@ -571,9 +571,7 @@ def _fetch_upkb_entry(up_ac):
data["up_organism"] = ""
data["up_sequence"] = ""
data["up_ac"] = up_ac
rspns = requests.get(
f"https://www.uniprot.org/uniprot/{up_ac}.txt", timeout=180
)
rspns = requests.get(up_url, timeout=180)
for line in rspns.iter_lines(decode_unicode=True):
if line.startswith("ID "):
sline = line.split()
......@@ -615,6 +613,11 @@ def _fetch_upkb_entry(up_ac):
data["up_last_mod"] = datetime.datetime.strptime(
dt_flds[0], "%d-%b-%Y"
)
elif dt_flds[1].upper().startswith("ENTRY VERSION "):
data["up_entry_version"] = dt_flds[1][len("ENTRY VERSION ") :]
if data["up_entry_version"][-1] == ".":
data["up_entry_version"] = data["up_entry_version"][:-1]
data["up_entry_version"] = int(data["up_entry_version"])
elif line.startswith("GN Name="):
data["up_gn"] = line[len("GN Name=") :].split(";")[0]
data["up_gn"] = data["up_gn"].split("{")[0].strip()
......@@ -648,14 +651,37 @@ def _fetch_upkb_entry(up_ac):
return data
def _fetch_upkb_entry(up_ac):
"""Get an UniProtKB entry."""
return _get_n_parse_up_entry(
up_ac, f"https://rest.uniprot.org/uniprotkb/{up_ac}.txt"
)
def _fetch_unisave_entry(up_ac, version):
"""Get an UniSave entry, in contrast to an UniProtKB entry, that allows us
to specify a version."""
return _get_n_parse_up_entry(
up_ac,
f"https://rest.uniprot.org/unisave/{up_ac}?format=txt&"
+ f"versions={version}",
)
def _get_upkb_for_sequence(sqe, up_ac):
"""Get UniProtKB entry data for given sequence."""
up_data = _fetch_upkb_entry(up_ac)
if sqe != up_data["up_sequence"]:
raise RuntimeError(
f"Sequences not equal from file: {sqe}, from UniProtKB: "
+ f"{up_data['up_sequence']}"
)
while sqe != up_data["up_sequence"]:
if up_data["up_entry_version"] > 1:
up_data = _fetch_unisave_entry(
up_ac, up_data["up_entry_version"] - 1
)
else:
raise RuntimeError(
f"Sequences not equal from file: {sqe}, from UniProtKB: "
f"{up_data['up_sequence']} ({up_ac}), checked entire entry "
"history."
)
return up_data
......