Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
modelcif-converters
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Analyze
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
modelcif-converters
Commits
d4835b37
Commit
d4835b37
authored
1 year ago
by
Bienchen
Browse files
Options
Downloads
Patches
Plain Diff
Add early protocol
parent
ed96fc9c
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
projects/novelfams/translate2modelcif.py
+109
-8
109 additions, 8 deletions
projects/novelfams/translate2modelcif.py
with
109 additions
and
8 deletions
projects/novelfams/translate2modelcif.py
+
109
−
8
View file @
d4835b37
...
...
@@ -80,6 +80,13 @@ def _parse_args():
metavar
=
"
<OUTPUT DIR>
"
,
help
=
"
Path to directory to store results.
"
,
)
parser
.
add_argument
(
"
--af2-models
"
,
default
=
None
,
type
=
str
,
metavar
=
"
<LIST FILE>
"
,
help
=
"
Path to a txt file with models build with AF2, 1 ID per line.
"
,
)
parser
.
add_argument
(
"
--compress
"
,
default
=
False
,
...
...
@@ -415,12 +422,10 @@ def _get_modelcif_protocol_software(js_step):
return
None
def
_get_modelcif_protocol_data
(
data_label
,
target_entities
,
aln_data
,
model
):
def
_get_modelcif_protocol_data
(
data_label
,
target_entities
,
model
):
"""
Assemble data for a ModelCIF protocol step.
"""
if
data_label
==
"
target_sequences
"
:
data
=
modelcif
.
data
.
DataGroup
(
target_entities
)
elif
data_label
==
"
MSA
"
:
data
=
aln_data
elif
data_label
==
"
target_sequences_and_MSA
"
:
data
=
modelcif
.
data
.
DataGroup
(
target_entities
)
data
.
append
(
aln_data
)
...
...
@@ -431,16 +436,16 @@ def _get_modelcif_protocol_data(data_label, target_entities, aln_data, model):
return
data
def
_get_modelcif_protocol
(
protocol_steps
,
target_entities
,
aln_data
,
model
):
def
_get_modelcif_protocol
(
protocol_steps
,
target_entities
,
model
):
"""
Create the protocol for the ModelCIF file.
"""
protocol
=
modelcif
.
protocol
.
Protocol
()
for
js_step
in
protocol_steps
:
sftwre
=
_get_modelcif_protocol_software
(
js_step
)
input_data
=
_get_modelcif_protocol_data
(
js_step
[
"
input
"
],
target_entities
,
aln_data
,
model
js_step
[
"
input
"
],
target_entities
,
model
)
output_data
=
_get_modelcif_protocol_data
(
js_step
[
"
output
"
],
target_entities
,
aln_data
,
model
js_step
[
"
output
"
],
target_entities
,
model
)
protocol
.
steps
.
append
(
...
...
@@ -518,6 +523,11 @@ def _store_as_modelcif(
model_group
=
modelcif
.
model
.
ModelGroup
([
model
])
system
.
model_groups
.
append
(
model_group
)
protocol
=
_get_modelcif_protocol
(
data_json
[
"
protocol
"
],
system
.
target_entities
,
model
)
system
.
protocols
.
append
(
protocol
)
# write modelcif System to file (NOTE: no PAE here!)
# NOTE: we change path and back while being exception-safe to handle zipfile
oldpwd
=
os
.
getcwd
()
...
...
@@ -533,10 +543,75 @@ def _store_as_modelcif(
os
.
chdir
(
oldpwd
)
def
_get_protocol_steps_and_software_colabfold
(
config_data
):
"""
Get protocol steps for ColabFold models.
"""
protocol
=
[]
# modelling step
step
=
{
"
method_type
"
:
"
modeling
"
,
"
name
"
:
None
,
"
details
"
:
config_data
[
"
description
"
],
}
# get input data
# Must refer to data already in the JSON, so we try keywords
step
[
"
input
"
]
=
"
target_sequences
"
# get output data
# Must refer to existing data, so we try keywords
step
[
"
output
"
]
=
"
model
"
# get software
step
[
"
software
"
]
=
[
{
"
name
"
:
"
ColabFold
"
,
"
classification
"
:
"
model building
"
,
"
description
"
:
"
Structure prediction
"
,
"
citation
"
:
ihm
.
citations
.
colabfold
,
"
location
"
:
"
https://github.com/sokrypton/ColabFold
"
,
"
type
"
:
"
package
"
,
"
version
"
:
None
,
}
]
step
[
"
software
"
].
append
(
{
"
name
"
:
"
AlphaFold
"
,
"
classification
"
:
"
model building
"
,
"
description
"
:
"
Structure prediction
"
,
"
citation
"
:
ihm
.
citations
.
alphafold2
,
"
location
"
:
"
https://github.com/deepmind/alphafold
"
,
"
type
"
:
"
package
"
,
"
version
"
:
None
,
}
)
step
[
"
software_parameters
"
]
=
None
protocol
.
append
(
step
)
return
protocol
def
_get_config_colabfold
():
"""
Get config variables for ColabFold
"""
description
=
"
Model generation using ColabFold.
"
return
{
"
description
"
:
description
}
def
_get_protocol_steps_and_software
(
mdl_id
,
af2_lst
):
"""
Get protocol steps for this model, make a difference between AF2 and
ColabFold models.
"""
if
mdl_id
in
af2_lst
:
protocol
=
_get_protocol_steps_and_software_alphafold
()
else
:
config_data
=
_get_config_colabfold
()
protocol
=
_get_protocol_steps_and_software_colabfold
(
config_data
)
return
protocol
def
_translate2modelcif_single
(
f_name
,
opts
,
mdl_details
,
af2_lst
,
):
"""
Convert a single model with its accompanying data to ModelCIF.
"""
# ToDo: re-enable Pylint
...
...
@@ -546,13 +621,16 @@ def _translate2modelcif_single(
# gather data into JSON-like structure
mdlcf_json
=
{}
mdlcf_json
[
"
mdl_id
"
]
=
fam_name
# used for entry ID
mdlcf_json
[
"
protocol
"
]
=
_get_protocol_steps_and_software
(
fam_name
,
af2_lst
)
# process coordinates
target_entities
,
ost_ent
=
_get_entities
(
f_name
,
fam_name
)
mdlcf_json
[
"
target_entities
"
]
=
target_entities
# fill annotations
mdlcf_json
[
"
title
"
]
=
_get_title
(
f_name
)
mdlcf_json
[
"
title
"
]
=
_get_title
(
f
am
_name
)
mdlcf_json
[
"
model_details
"
]
=
mdl_details
# save ModelCIF
...
...
@@ -565,7 +643,7 @@ def _translate2modelcif_single(
)
def
_translate2modelcif
(
f_name
,
opts
):
def
_translate2modelcif
(
f_name
,
af2_lst
,
opts
):
"""
Convert a family of models with their accompanying data to ModelCIF.
"""
# ToDo: re-enable Pylint
# pylint: disable=too-many-locals
...
...
@@ -590,9 +668,28 @@ def _translate2modelcif(f_name, opts):
f_name
,
opts
,
mdl_details
,
af2_lst
,
)
def
_read_af2_model_list
(
path
):
"""
Read a list of models build with AF2. One ID per line. Returns an empty
list if path is None.
"""
af2_lst
=
[]
if
path
is
None
:
return
af2_lst
with
open
(
path
,
encoding
=
"
ascii
"
)
as
lfh
:
for
line
in
lfh
:
line
=
line
.
strip
()
af2_lst
.
append
(
line
)
print
(
f
"
Got a list of
{
len
(
af2_lst
)
}
models built with AF2.
"
)
return
af2_lst
def
_main
():
"""
Run as script.
"""
s_tmstmp
=
timer
()
...
...
@@ -602,6 +699,9 @@ def _main():
pdb_files
=
_get_pdb_files
(
opts
.
model_dir
)
n_mdls
=
len
(
pdb_files
)
# read list of AF2 models
af2_mdls
=
_read_af2_model_list
(
opts
.
af2_models
)
# iterate over models
print
(
f
"
Processing
{
n_mdls
}
models.
"
)
tmstmp
=
s_tmstmp
...
...
@@ -610,6 +710,7 @@ def _main():
try
:
_translate2modelcif
(
f_name
,
af2_mdls
,
opts
,
)
except
(
_InvalidCoordinateError
,
_NoEntitiesError
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment