diff --git a/translate2modelcif.py b/translate2modelcif.py index 71ae7d3c32c7253724e4004830b61941d38a2b3d..df86631e37023797bcfb18f79d4e0cc1a49c979e 100644 --- a/translate2modelcif.py +++ b/translate2modelcif.py @@ -561,8 +561,8 @@ def _check_sequence(up_ac, sequence): ) -def _fetch_upkb_entry(up_ac): - """Fetch data for an UniProtKB entry.""" +def _get_n_parse_up_entry(up_ac, up_url): + """Get data for an UniProtKB entry and parse it.""" # This is a simple parser for UniProtKB txt format, instead of breaking it # up into multiple functions, we just allow many many branches & statements, # here. @@ -571,9 +571,7 @@ def _fetch_upkb_entry(up_ac): data["up_organism"] = "" data["up_sequence"] = "" data["up_ac"] = up_ac - rspns = requests.get( - f"https://rest.uniprot.org/uniprotkb/{up_ac}.txt", timeout=180 - ) + rspns = requests.get(up_url, timeout=180) for line in rspns.iter_lines(decode_unicode=True): if line.startswith("ID "): sline = line.split() @@ -615,6 +613,11 @@ def _fetch_upkb_entry(up_ac): data["up_last_mod"] = datetime.datetime.strptime( dt_flds[0], "%d-%b-%Y" ) + elif dt_flds[1].upper().startswith("ENTRY VERSION "): + data["up_entry_version"] = dt_flds[1][len("ENTRY VERSION ") :] + if data["up_entry_version"][-1] == ".": + data["up_entry_version"] = data["up_entry_version"][:-1] + data["up_entry_version"] = int(data["up_entry_version"]) elif line.startswith("GN Name="): data["up_gn"] = line[len("GN Name=") :].split(";")[0] data["up_gn"] = data["up_gn"].split("{")[0].strip() @@ -648,14 +651,37 @@ def _fetch_upkb_entry(up_ac): return data +def _fetch_upkb_entry(up_ac): + """Get an UniProtKB entry.""" + return _get_n_parse_up_entry( + up_ac, f"https://rest.uniprot.org/uniprotkb/{up_ac}.txt" + ) + + +def _fetch_unisave_entry(up_ac, version): + """Get an UniSave entry, in contrast to an UniProtKB entry, that allows us + to specify a version.""" + return _get_n_parse_up_entry( + up_ac, + f"https://rest.uniprot.org/unisave/{up_ac}?format=txt&" + + f"versions={version}", + ) + + def _get_upkb_for_sequence(sqe, up_ac): """Get UniProtKB entry data for given sequence.""" up_data = _fetch_upkb_entry(up_ac) - if sqe != up_data["up_sequence"]: - raise RuntimeError( - f"Sequences not equal from file: {sqe}, from UniProtKB: " - + f"{up_data['up_sequence']} ({up_ac})" - ) + while sqe != up_data["up_sequence"]: + if up_data["up_entry_version"] > 1: + up_data = _fetch_unisave_entry( + up_ac, up_data["up_entry_version"] - 1 + ) + else: + raise RuntimeError( + f"Sequences not equal from file: {sqe}, from UniProtKB: " + f"{up_data['up_sequence']} ({up_ac}), checked entire entry " + "history." + ) return up_data