Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
modelcif-converters
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Analyze
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
modelcif-converters
Commits
3fd151a3
Commit
3fd151a3
authored
2 years ago
by
B13nch3n
Browse files
Options
Downloads
Patches
Plain Diff
Update translationscript
parent
de569684
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
projects/human-heterodimers-w-crosslinks/translate2modelcif.py
+49
-12
49 additions, 12 deletions
...cts/human-heterodimers-w-crosslinks/translate2modelcif.py
with
49 additions
and
12 deletions
projects/human-heterodimers-w-crosslinks/translate2modelcif.py
+
49
−
12
View file @
3fd151a3
...
...
@@ -228,6 +228,11 @@ def _abort_msg(msg, exit_code=1):
sys
.
exit
(
exit_code
)
def
_warn_msg
(
msg
):
"""
Write a warning message to stdout.
"""
print
(
f
"
WARNING:
{
msg
}
"
)
def
_check_file
(
file_path
):
"""
Make sure a file exists and is actually a file.
"""
if
not
os
.
path
.
exists
(
file_path
):
...
...
@@ -292,9 +297,8 @@ def _parse_colabfold_config(cnfg_file):
use_mmseqs
=
False
use_msa
=
False
elif
cf_config
[
"
msa_mode
"
]
==
"
custom
"
:
print
(
"
WARNING: Custom MSA mode used. Not clear from config what to do
"
+
"
here!
"
_warn_msg
(
"
Custom MSA mode used. Not clear from config what to do here!
"
)
seq_dbs
=
[]
use_mmseqs
=
False
...
...
@@ -336,9 +340,9 @@ def _parse_colabfold_config(cnfg_file):
else
:
mdl_description
+=
"
, without model relaxation
"
if
cf_config
[
"
use_templates
"
]:
print
(
"
WARNING:
ColabFold may use PDB70 or custom templates.
"
"
Not clear
from config!
"
_warn_msg
(
"
ColabFold may use PDB70 or custom templates.
Not clear
"
+
"
from config!
"
)
mdl_description
+=
"
, using templates
"
else
:
...
...
@@ -553,12 +557,22 @@ def _get_sequence(chn):
def
_check_sequence
(
up_ac
,
sequence
):
"""
Verify sequence to only contain standard olc.
"""
for
res
in
sequence
:
ns_aa_pos
=
[]
# positions of non-standard amino acids
for
i
,
res
in
enumerate
(
sequence
):
if
res
not
in
"
ACDEFGHIKLMNPQRSTVWY
"
:
if
res
==
"
U
"
:
_warn_msg
(
f
"
Selenocysteine found at position
{
i
+
1
}
of entry
"
+
f
"'
{
up_ac
}
'
, this residue may be missing in the
"
+
"
model.
"
)
ns_aa_pos
.
append
(
i
)
continue
raise
RuntimeError
(
"
Non-standard aa found in UniProtKB sequence
"
+
f
"
for entry
'
{
up_ac
}
'
:
{
res
}
"
+
f
"
for entry
'
{
up_ac
}
'
:
{
res
}
, position
{
i
+
1
}
"
)
return
ns_aa_pos
def
_get_n_parse_up_entry
(
up_ac
,
up_url
):
...
...
@@ -626,7 +640,11 @@ def _get_n_parse_up_entry(up_ac, up_url):
data
[
"
up_isoform
"
]
=
None
if
"
up_gn
"
not
in
data
:
_abort_msg
(
f
"
No gene name found for UniProtKB entry
'
{
up_ac
}
'
.
"
)
_warn_msg
(
f
"
No gene name found for UniProtKB entry
'
{
up_ac
}
'
, using
"
+
"
UniProtKB AC instead.
"
)
data
[
"
up_gn
"
]
=
up_ac
if
"
up_last_mod
"
not
in
data
:
_abort_msg
(
f
"
No sequence version found for UniProtKB entry
'
{
up_ac
}
'
.
"
)
if
"
up_crc64
"
not
in
data
:
...
...
@@ -640,7 +658,7 @@ def _get_n_parse_up_entry(up_ac, up_url):
+
f
"
UniProtKB entry
'
{
up_ac
}
'
:
{
data
[
'
up_seqlen
'
]
}
!=
"
+
f
"
{
len
(
data
[
'
up_sequence
'
])
}
"
)
_check_sequence
(
data
[
"
up_ac
"
],
data
[
"
up_sequence
"
])
data
[
"
up_ns_aa
"
]
=
_check_sequence
(
data
[
"
up_ac
"
],
data
[
"
up_sequence
"
])
if
"
up_id
"
not
in
data
:
_abort_msg
(
f
"
No ID found for UniProtKB entry
'
{
up_ac
}
'
.
"
)
...
...
@@ -668,10 +686,24 @@ def _fetch_unisave_entry(up_ac, version):
)
def
_cmp_sequences
(
mdl
,
upkb
,
ns_aa_pos
):
"""
Compare sequence while paying attention on non-standard amino acids.
"""
# We add a U to the sequence when necessary. AF2 does not model it. The PDB
# has selenocysteine as canonical aa, see PDB entry 7Z0T.
for
pos
in
ns_aa_pos
:
if
mdl
[
pos
]
!=
"
-
"
:
_abort_msg
(
f
"
Position
{
pos
+
1
}
of non-canonical amino acid should be
"
"
a gap!
"
)
mdl
=
mdl
[
0
:
pos
]
+
"
U
"
+
mdl
[
pos
+
1
:]
return
mdl
==
upkb
def
_get_upkb_for_sequence
(
sqe
,
up_ac
):
"""
Get UniProtKB entry data for given sequence.
"""
up_data
=
_fetch_upkb_entry
(
up_ac
)
while
sqe
!=
up_data
[
"
up_sequence
"
]:
while
not
_cmp_sequences
(
sqe
,
up_data
[
"
up_sequence
"
]
,
up_data
[
"
up_ns_aa
"
])
:
if
up_data
[
"
up_entry_version
"
]
>
1
:
up_data
=
_fetch_unisave_entry
(
up_ac
,
up_data
[
"
up_entry_version
"
]
-
1
...
...
@@ -732,7 +764,10 @@ def _get_modelcif_entities(target_ents, source, asym_units, system):
"""
Create ModelCIF entities and asymmetric units.
"""
for
cif_ent
in
target_ents
:
mdlcif_ent
=
modelcif
.
Entity
(
cif_ent
[
"
pdb_sequence
"
],
# Use up_sequence above pdb_sequence as it contains no gaps but
# otherwise is equal to the pdb_sequence. Gaps in pdb_sequence
# originate from missing residues in the model.
cif_ent
[
"
up_sequence
"
],
description
=
cif_ent
[
"
description
"
],
source
=
source
,
references
=
[
...
...
@@ -1111,3 +1146,5 @@ def _main():
if
__name__
==
"
__main__
"
:
_main
()
# LocalWords: aa pdb
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment