Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
modelcif-converters
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Analyze
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
modelcif-converters
Commits
733a3323
Commit
733a3323
authored
1 year ago
by
Bienchen
Browse files
Options
Downloads
Patches
Plain Diff
Add AlphaFOld DBs
parent
07cf74e2
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
projects/novelfams/translate2modelcif.py
+75
-13
75 additions, 13 deletions
projects/novelfams/translate2modelcif.py
with
75 additions
and
13 deletions
projects/novelfams/translate2modelcif.py
+
75
−
13
View file @
733a3323
...
...
@@ -6,6 +6,7 @@
from
timeit
import
default_timer
as
timer
import
argparse
import
datetime
import
gzip
import
os
import
shutil
...
...
@@ -494,9 +495,45 @@ def _get_sequence_dbs_colabfold(seq_dbs):
return
[
db_dict
[
seq_db
]
for
seq_db
in
seq_dbs
]
def
_get_sequence_dbs_alphafold
(
seq_dbs
):
"""
Get AlphaFold seq. DBs.
"""
db_dict
=
{
"
MGnify
"
:
modelcif
.
ReferenceDatabase
(
"
MGnify
"
,
"
https://storage.googleapis.com/alphafold-databases/
"
+
"
casp14_versions/mgy_clusters_2018_12.fa.gz
"
,
version
=
"
2018_12
"
,
release_date
=
datetime
.
datetime
(
2018
,
12
,
6
),
),
"
UniRef90
"
:
modelcif
.
ReferenceDatabase
(
"
UniRef90
"
,
"
ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/
"
+
"
uniref90.fasta.gz
"
,
version
=
None
,
release_date
=
None
,
),
"
BFD
"
:
modelcif
.
ReferenceDatabase
(
"
BFD
"
,
"
https://storage.googleapis.com/alphafold-databases/
"
+
"
casp14_versions/
"
+
"
bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz
"
,
version
=
"
6a634dc6eb105c2e9b4cba7bbae93412
"
,
),
"
Uniclust30
"
:
modelcif
.
ReferenceDatabase
(
"
Uniclust30
"
,
"
https://storage.googleapis.com/alphafold-databases/
"
+
"
casp14_versions/uniclust30_2018_08_hhsuite.tar.gz
"
,
version
=
"
2018_08
"
,
release_date
=
None
,
),
}
return
[
db_dict
[
seq_db
]
for
seq_db
in
seq_dbs
]
def
_get_modelcif_protocol_data
(
data_labels
,
target_entities
,
model
,
msa
):
"""
Assemble data for a ModelCIF protocol step.
"""
data
=
modelcif
.
data
.
DataGroup
()
for
label
in
data_labels
:
if
label
==
"
target_sequences
"
:
data
.
extend
(
target_entities
)
...
...
@@ -506,6 +543,12 @@ def _get_modelcif_protocol_data(data_labels, target_entities, model, msa):
data
.
extend
(
_get_sequence_dbs_colabfold
([
"
UniRef
"
,
"
Environmental
"
])
)
elif
label
==
"
alphafold_reference_dbs
"
:
data
.
extend
(
_get_sequence_dbs_alphafold
(
[
"
MGnify
"
,
"
UniRef90
"
,
"
BFD
"
,
"
Uniclust30
"
]
)
)
elif
label
==
"
msas
"
:
data
.
append
(
msa
)
else
:
...
...
@@ -684,7 +727,6 @@ def _get_protocol_steps_and_software_colabfold(config_data):
protocol
=
[]
# MSA step
# Step 1 - MSA: Using default Colabfold databases with default parameters (colabfold_envdb_202108, uniref30_2202)
step
=
{
"
method_type
"
:
"
coevolution MSA
"
,
"
name
"
:
None
,
...
...
@@ -738,30 +780,50 @@ def _get_config_colabfold():
def
_get_config_alphafold
():
"""
Get config variables for AlphaFold
"""
description
=
"
Predict model coordinates using AlphaFold.
"
af2_version
=
"
2.2.0
"
msa_description
=
(
"
MSAs created for corresponding target sequence with AlphaFold using
"
+
"
default parameters.
"
)
return
{
"
description
"
:
description
}
mdl_description
=
(
f
"
Model generated using AlphaFold (
{
af2_version
}
with default
"
+
"
parameters) producing 5 models,ranked by pLDDT, starting from a the
"
+
f
"
Alphafold
{
af2_version
}
produced MSA.
"
)
return
{
"
af2_version
"
:
af2_version
,
"
msa_description
"
:
msa_description
,
"
mdl_description
"
:
mdl_description
,
}
def
_get_protocol_steps_and_software_alphafold
(
config_data
):
"""
Get protocol steps for AF2 based models.
"""
protocol
=
[]
# MSA generation
step
=
{
"
method_type
"
:
"
coevolution MSA
"
,
"
name
"
:
None
,
"
details
"
:
config_data
[
"
msa_description
"
],
"
input
"
:
[
"
target_sequences
"
,
"
alphafold_reference_dbs
"
],
"
output
"
:
[
"
msas
"
],
"
software
"
:
[
_get_af2_software
(
config_data
[
"
af2_version
"
])],
"
software_parameters
"
:
None
,
}
protocol
.
append
(
step
)
# modelling step
step
=
{
"
method_type
"
:
"
modeling
"
,
"
name
"
:
None
,
"
details
"
:
config_data
[
"
description
"
],
"
details
"
:
config_data
[
"
mdl_description
"
],
"
input
"
:
[
"
target_sequences
"
],
"
output
"
:
[
"
model
"
],
"
software
"
:
[
_get_af2_software
(
"
2.2.0
"
)],
"
software_parameters
"
:
None
,
}
# get input data
# Must refer to data already in the JSON, so we try keywords
step
[
"
input
"
]
=
[
"
target_sequences
"
]
# get output data
# Must refer to existing data, so we try keywords
step
[
"
output
"
]
=
[
"
model
"
]
# get software
step
[
"
software
"
]
=
[
_get_af2_software
(
"
2.2.0
"
)]
step
[
"
software_parameters
"
]
=
None
protocol
.
append
(
step
)
return
protocol
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment