Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
ma-wilkins-import
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Analyze
Contributor analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
ma-wilkins-import
Commits
00f02c44
Commit
00f02c44
authored
2 years ago
by
B13nch3n
Browse files
Options
Downloads
Patches
Plain Diff
If model sequence and UniProtKB sequence do not match, go down the UniProtKB entrie's history
parent
97ea50bf
Loading
Loading
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
translate2modelcif.py
+36
-10
36 additions, 10 deletions
translate2modelcif.py
with
36 additions
and
10 deletions
translate2modelcif.py
+
36
−
10
View file @
00f02c44
...
...
@@ -561,8 +561,8 @@ def _check_sequence(up_ac, sequence):
)
def
_
f
et
ch
_up
kb
_entry
(
up_ac
):
"""
F
et
ch
data for an UniProtKB entry.
"""
def
_
g
et
_n_parse
_up_entry
(
up_ac
,
up_url
):
"""
G
et data for an UniProtKB entry
and parse it
.
"""
# This is a simple parser for UniProtKB txt format, instead of breaking it
# up into multiple functions, we just allow many many branches & statements,
# here.
...
...
@@ -571,9 +571,7 @@ def _fetch_upkb_entry(up_ac):
data
[
"
up_organism
"
]
=
""
data
[
"
up_sequence
"
]
=
""
data
[
"
up_ac
"
]
=
up_ac
rspns
=
requests
.
get
(
f
"
https://rest.uniprot.org/uniprotkb/
{
up_ac
}
.txt
"
,
timeout
=
180
)
rspns
=
requests
.
get
(
up_url
,
timeout
=
180
)
for
line
in
rspns
.
iter_lines
(
decode_unicode
=
True
):
if
line
.
startswith
(
"
ID
"
):
sline
=
line
.
split
()
...
...
@@ -615,6 +613,11 @@ def _fetch_upkb_entry(up_ac):
data
[
"
up_last_mod
"
]
=
datetime
.
datetime
.
strptime
(
dt_flds
[
0
],
"
%d-%b-%Y
"
)
elif
dt_flds
[
1
].
upper
().
startswith
(
"
ENTRY VERSION
"
):
data
[
"
up_entry_version
"
]
=
dt_flds
[
1
][
len
(
"
ENTRY VERSION
"
)
:]
if
data
[
"
up_entry_version
"
][
-
1
]
==
"
.
"
:
data
[
"
up_entry_version
"
]
=
data
[
"
up_entry_version
"
][:
-
1
]
data
[
"
up_entry_version
"
]
=
int
(
data
[
"
up_entry_version
"
])
elif
line
.
startswith
(
"
GN Name=
"
):
data
[
"
up_gn
"
]
=
line
[
len
(
"
GN Name=
"
)
:].
split
(
"
;
"
)[
0
]
data
[
"
up_gn
"
]
=
data
[
"
up_gn
"
].
split
(
"
{
"
)[
0
].
strip
()
...
...
@@ -648,14 +651,37 @@ def _fetch_upkb_entry(up_ac):
return
data
def
_fetch_upkb_entry
(
up_ac
):
"""
Get an UniProtKB entry.
"""
return
_get_n_parse_up_entry
(
up_ac
,
f
"
https://rest.uniprot.org/uniprotkb/
{
up_ac
}
.txt
"
)
def
_fetch_unisave_entry
(
up_ac
,
version
):
"""
Get an UniSave entry, in contrast to an UniProtKB entry, that allows us
to specify a version.
"""
return
_get_n_parse_up_entry
(
up_ac
,
f
"
https://rest.uniprot.org/unisave/
{
up_ac
}
?format=txt&
"
+
f
"
versions=
{
version
}
"
,
)
def
_get_upkb_for_sequence
(
sqe
,
up_ac
):
"""
Get UniProtKB entry data for given sequence.
"""
up_data
=
_fetch_upkb_entry
(
up_ac
)
if
sqe
!=
up_data
[
"
up_sequence
"
]:
raise
RuntimeError
(
f
"
Sequences not equal from file:
{
sqe
}
, from UniProtKB:
"
+
f
"
{
up_data
[
'
up_sequence
'
]
}
(
{
up_ac
}
)
"
)
while
sqe
!=
up_data
[
"
up_sequence
"
]:
if
up_data
[
"
up_entry_version
"
]
>
1
:
up_data
=
_fetch_unisave_entry
(
up_ac
,
up_data
[
"
up_entry_version
"
]
-
1
)
else
:
raise
RuntimeError
(
f
"
Sequences not equal from file:
{
sqe
}
, from UniProtKB:
"
f
"
{
up_data
[
'
up_sequence
'
]
}
(
{
up_ac
}
), checked entire entry
"
"
history.
"
)
return
up_data
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment