Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
modelcif-converters
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Analyze
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
modelcif-converters
Commits
07cf74e2
Commit
07cf74e2
authored
1 year ago
by
Bienchen
Browse files
Options
Downloads
Patches
Plain Diff
Protocol for CloabFold
parent
27be2ce6
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
projects/novelfams/translate2modelcif.py
+109
-41
109 additions, 41 deletions
projects/novelfams/translate2modelcif.py
with
109 additions
and
41 deletions
projects/novelfams/translate2modelcif.py
+
109
−
41
View file @
07cf74e2
...
...
@@ -301,8 +301,6 @@ def _get_sequence(chn, use_auth=False):
lst_rn
+=
1
sqe
+=
res
.
one_letter_code
if
"
-
"
in
sqe
:
print
(
"
GAP
"
)
return
sqe
...
...
@@ -347,7 +345,11 @@ def _get_entities(pdb_file, fam_name, trg_seq):
if
len_diff
>
0
:
exp_seq
+=
"
X
"
*
len_diff
if
exp_seq
!=
trg_seq
.
string
:
print
(
f
"
Sequence in
{
pdb_file
}
does not match target.
"
)
print
(
f
"
Sequence in
{
os
.
path
.
splitext
(
os
.
path
.
basename
(
pdb_file
))[
0
]
}
"
+
"
does not match target.
"
,
exp_seq
,
)
# ToDo: re-enable check
# raise RuntimeError(f"Sequence in {pdb_file} does not match target.")
...
...
@@ -474,27 +476,59 @@ def _get_modelcif_protocol_software(js_step):
return
None
def
_get_modelcif_protocol_data
(
data_label
,
target_entities
,
model
):
def
_get_sequence_dbs_colabfold
(
seq_dbs
):
"""
Get ColabFold seq. DBs.
"""
db_dict
=
{
"
UniRef
"
:
modelcif
.
ReferenceDatabase
(
"
UniRef30
"
,
"
https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2202.tar.gz
"
,
version
=
"
2022_02
"
,
),
"
Environmental
"
:
modelcif
.
ReferenceDatabase
(
"
ColabFold DB
"
,
"
http://wwwuser.gwdg.de/~compbiol/colabfold/
"
+
"
colabfold_envdb_202108.tar.gz
"
,
version
=
"
2021_08
"
,
),
}
return
[
db_dict
[
seq_db
]
for
seq_db
in
seq_dbs
]
def
_get_modelcif_protocol_data
(
data_labels
,
target_entities
,
model
,
msa
):
"""
Assemble data for a ModelCIF protocol step.
"""
if
data_label
==
"
target_sequences
"
:
data
=
modelcif
.
data
.
DataGroup
(
target_entities
)
elif
data_label
==
"
model
"
:
data
=
model
else
:
raise
RuntimeError
(
f
"
Unknown protocol data:
'
{
data_label
}
'"
)
data
=
modelcif
.
data
.
DataGroup
()
for
label
in
data_labels
:
if
label
==
"
target_sequences
"
:
data
.
extend
(
target_entities
)
elif
label
==
"
model
"
:
data
=
model
elif
label
==
"
colabfold_reference_dbs
"
:
data
.
extend
(
_get_sequence_dbs_colabfold
([
"
UniRef
"
,
"
Environmental
"
])
)
elif
label
==
"
msas
"
:
data
.
append
(
msa
)
else
:
raise
RuntimeError
(
f
"
Unknown protocol data:
'
{
label
}
'"
)
return
data
def
_get_modelcif_protocol
(
protocol_steps
,
target_entities
,
model
):
"""
Create the protocol for the ModelCIF file.
"""
protocol
=
modelcif
.
protocol
.
Protocol
()
# This is a bit unelegant, but we need a single MSA object, that can serve
# as output & input and is only referenced once in the ModelCIF file.
msa
=
modelcif
.
data
.
Data
(
"
MSA
"
,
details
=
"
MSAs of the target sequence and search DBs.
"
)
for
js_step
in
protocol_steps
:
sftwre
=
_get_modelcif_protocol_software
(
js_step
)
input_data
=
_get_modelcif_protocol_data
(
js_step
[
"
input
"
],
target_entities
,
model
js_step
[
"
input
"
],
target_entities
,
model
,
msa
)
output_data
=
_get_modelcif_protocol_data
(
js_step
[
"
output
"
],
target_entities
,
model
js_step
[
"
output
"
],
target_entities
,
model
,
msa
)
protocol
.
steps
.
append
(
...
...
@@ -618,7 +652,21 @@ def _store_as_modelcif(
os
.
chdir
(
oldpwd
)
def
_get_af2_software
():
def
_get_colabfold_software
(
version
=
None
):
"""
Get ColabFold as a dictionary, suitable to create a modelcif software
object.
"""
return
{
"
name
"
:
"
ColabFold
"
,
"
classification
"
:
"
model building
"
,
"
description
"
:
"
Structure prediction
"
,
"
citation
"
:
ihm
.
citations
.
colabfold
,
"
location
"
:
"
https://github.com/sokrypton/ColabFold
"
,
"
type
"
:
"
package
"
,
"
version
"
:
version
,
}
def
_get_af2_software
(
version
=
None
):
"""
Get AF2 as dictionary, suitable to create a modelcif software object.
"""
return
{
"
name
"
:
"
AlphaFold
"
,
...
...
@@ -627,7 +675,7 @@ def _get_af2_software():
"
citation
"
:
ihm
.
citations
.
alphafold2
,
"
location
"
:
"
https://github.com/deepmind/alphafold
"
,
"
type
"
:
"
package
"
,
"
version
"
:
N
on
e
,
"
version
"
:
versi
on
,
}
...
...
@@ -635,32 +683,32 @@ def _get_protocol_steps_and_software_colabfold(config_data):
"""
Get protocol steps for ColabFold models.
"""
protocol
=
[]
# MSA step
# Step 1 - MSA: Using default Colabfold databases with default parameters (colabfold_envdb_202108, uniref30_2202)
step
=
{
"
method_type
"
:
"
coevolution MSA
"
,
"
name
"
:
None
,
"
details
"
:
config_data
[
"
msa_description
"
],
"
input
"
:
[
"
target_sequences
"
,
"
colabfold_reference_dbs
"
],
"
output
"
:
[
"
msas
"
],
"
software
"
:
[
_get_colabfold_software
(
config_data
[
"
cf_version
"
])],
"
software_parameters
"
:
None
,
}
protocol
.
append
(
step
)
# modelling step
step
=
{
"
method_type
"
:
"
modeling
"
,
"
name
"
:
None
,
"
details
"
:
config_data
[
"
description
"
],
"
details
"
:
config_data
[
"
mdl_description
"
],
"
input
"
:
[
"
target_sequences
"
,
"
msas
"
],
"
output
"
:
[
"
model
"
],
"
software
"
:
[
_get_colabfold_software
(
config_data
[
"
cf_version
"
]),
_get_af2_software
(
config_data
[
"
af2_version
"
]),
],
"
software_parameters
"
:
None
,
}
# get input data
# Must refer to data already in the JSON, so we try keywords
step
[
"
input
"
]
=
"
target_sequences
"
# get output data
# Must refer to existing data, so we try keywords
step
[
"
output
"
]
=
"
model
"
# get software
step
[
"
software
"
]
=
[
{
"
name
"
:
"
ColabFold
"
,
"
classification
"
:
"
model building
"
,
"
description
"
:
"
Structure prediction
"
,
"
citation
"
:
ihm
.
citations
.
colabfold
,
"
location
"
:
"
https://github.com/sokrypton/ColabFold
"
,
"
type
"
:
"
package
"
,
"
version
"
:
None
,
}
]
step
[
"
software
"
].
append
(
_get_af2_software
())
step
[
"
software_parameters
"
]
=
None
protocol
.
append
(
step
)
return
protocol
...
...
@@ -668,14 +716,29 @@ def _get_protocol_steps_and_software_colabfold(config_data):
def
_get_config_colabfold
():
"""
Get config variables for ColabFold
"""
description
=
"
Model generation using ColabFold.
"
af2_version
=
"
2.1.14
"
cf_version
=
"
1.3.0
"
msa_description
=
(
"
MSAs created for corresponding target sequence with ColabFold using
"
+
"
default parameters.
"
)
mdl_description
=
(
f
"
Model generated using AlphaFold (
{
af2_version
}
,
"
+
f
"
executed within ColabFold
{
cf_version
}
) producing 5 models,
"
+
"
ranked by pLDDT, starting from the ColabFold produced MSA.
"
)
return
{
"
description
"
:
description
}
return
{
"
cf_version
"
:
cf_version
,
"
af2_version
"
:
af2_version
,
"
msa_description
"
:
msa_description
,
"
mdl_description
"
:
mdl_description
,
}
def
_get_config_alphafold
():
"""
Get config variables for AlphaFold
"""
description
=
"
Model generation
using AlphaFold.
"
description
=
"
Predict model coordinates
using AlphaFold.
"
return
{
"
description
"
:
description
}
...
...
@@ -692,12 +755,12 @@ def _get_protocol_steps_and_software_alphafold(config_data):
}
# get input data
# Must refer to data already in the JSON, so we try keywords
step
[
"
input
"
]
=
"
target_sequences
"
step
[
"
input
"
]
=
[
"
target_sequences
"
]
# get output data
# Must refer to existing data, so we try keywords
step
[
"
output
"
]
=
"
model
"
step
[
"
output
"
]
=
[
"
model
"
]
# get software
step
[
"
software
"
]
=
[
_get_af2_software
()]
step
[
"
software
"
]
=
[
_get_af2_software
(
"
2.2.0
"
)]
step
[
"
software_parameters
"
]
=
None
protocol
.
append
(
step
)
...
...
@@ -877,6 +940,11 @@ def _main():
)
except
(
_InvalidCoordinateError
,
_NoEntitiesError
):
continue
except
Exception
as
exc
:
# ToDo: remove catching ALL exceptions
_warn_msg
(
f
"
Uncaught exception for
'
{
f_name
}
'
:
"
)
print
(
str
(
exc
))
continue
# report progress after a bit of time
if
timer
()
-
tmstmp
>
60
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment