Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • schwede/ma-wilkins-import
1 result
Select Git revision
Show changes
Commits on Source (2)
...@@ -561,8 +561,8 @@ def _check_sequence(up_ac, sequence): ...@@ -561,8 +561,8 @@ def _check_sequence(up_ac, sequence):
) )
def _fetch_upkb_entry(up_ac): def _get_n_parse_up_entry(up_ac, up_url):
"""Fetch data for an UniProtKB entry.""" """Get data for an UniProtKB entry and parse it."""
# This is a simple parser for UniProtKB txt format, instead of breaking it # This is a simple parser for UniProtKB txt format, instead of breaking it
# up into multiple functions, we just allow many many branches & statements, # up into multiple functions, we just allow many many branches & statements,
# here. # here.
...@@ -571,9 +571,7 @@ def _fetch_upkb_entry(up_ac): ...@@ -571,9 +571,7 @@ def _fetch_upkb_entry(up_ac):
data["up_organism"] = "" data["up_organism"] = ""
data["up_sequence"] = "" data["up_sequence"] = ""
data["up_ac"] = up_ac data["up_ac"] = up_ac
rspns = requests.get( rspns = requests.get(up_url, timeout=180)
f"https://www.uniprot.org/uniprot/{up_ac}.txt", timeout=180
)
for line in rspns.iter_lines(decode_unicode=True): for line in rspns.iter_lines(decode_unicode=True):
if line.startswith("ID "): if line.startswith("ID "):
sline = line.split() sline = line.split()
...@@ -615,6 +613,11 @@ def _fetch_upkb_entry(up_ac): ...@@ -615,6 +613,11 @@ def _fetch_upkb_entry(up_ac):
data["up_last_mod"] = datetime.datetime.strptime( data["up_last_mod"] = datetime.datetime.strptime(
dt_flds[0], "%d-%b-%Y" dt_flds[0], "%d-%b-%Y"
) )
elif dt_flds[1].upper().startswith("ENTRY VERSION "):
data["up_entry_version"] = dt_flds[1][len("ENTRY VERSION ") :]
if data["up_entry_version"][-1] == ".":
data["up_entry_version"] = data["up_entry_version"][:-1]
data["up_entry_version"] = int(data["up_entry_version"])
elif line.startswith("GN Name="): elif line.startswith("GN Name="):
data["up_gn"] = line[len("GN Name=") :].split(";")[0] data["up_gn"] = line[len("GN Name=") :].split(";")[0]
data["up_gn"] = data["up_gn"].split("{")[0].strip() data["up_gn"] = data["up_gn"].split("{")[0].strip()
...@@ -648,14 +651,37 @@ def _fetch_upkb_entry(up_ac): ...@@ -648,14 +651,37 @@ def _fetch_upkb_entry(up_ac):
return data return data
def _fetch_upkb_entry(up_ac):
"""Get an UniProtKB entry."""
return _get_n_parse_up_entry(
up_ac, f"https://rest.uniprot.org/uniprotkb/{up_ac}.txt"
)
def _fetch_unisave_entry(up_ac, version):
"""Get an UniSave entry, in contrast to an UniProtKB entry, that allows us
to specify a version."""
return _get_n_parse_up_entry(
up_ac,
f"https://rest.uniprot.org/unisave/{up_ac}?format=txt&"
+ f"versions={version}",
)
def _get_upkb_for_sequence(sqe, up_ac): def _get_upkb_for_sequence(sqe, up_ac):
"""Get UniProtKB entry data for given sequence.""" """Get UniProtKB entry data for given sequence."""
up_data = _fetch_upkb_entry(up_ac) up_data = _fetch_upkb_entry(up_ac)
if sqe != up_data["up_sequence"]: while sqe != up_data["up_sequence"]:
raise RuntimeError( if up_data["up_entry_version"] > 1:
f"Sequences not equal from file: {sqe}, from UniProtKB: " up_data = _fetch_unisave_entry(
+ f"{up_data['up_sequence']}" up_ac, up_data["up_entry_version"] - 1
) )
else:
raise RuntimeError(
f"Sequences not equal from file: {sqe}, from UniProtKB: "
f"{up_data['up_sequence']} ({up_ac}), checked entire entry "
"history."
)
return up_data return up_data
......