Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
AlphaPulldown-ModelCIF-Conversion
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Bienchen
AlphaPulldown-ModelCIF-Conversion
Commits
5f2676e3
Commit
5f2676e3
authored
1 year ago
by
Bienchen
Browse files
Options
Downloads
Patches
Plain Diff
Get draft of ModelCIF 'header'
parent
e4f5df09
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
convert_to_modelcif.py
+137
-25
137 additions, 25 deletions
convert_to_modelcif.py
pyproject.toml
+8
-0
8 additions, 0 deletions
pyproject.toml
with
145 additions
and
25 deletions
convert_to_modelcif.py
+
137
−
25
View file @
5f2676e3
...
...
@@ -3,11 +3,15 @@
"""
Take the output of the AlphaPulldown pipeline and turn it into a ModelCIF
file with a lot of metadata in place.
"""
from
typing
import
Tuple
import
json
import
os
import
sys
from
absl
import
app
,
flags
,
logging
import
modelcif
import
modelcif.dumper
# ToDo: Get options properly, best get the same names as used in existing
# scripts, e.g. could '--monomer_objects_dir' be used as feature
...
...
@@ -30,44 +34,144 @@ FLAGS = flags.FLAGS
# exist as expected.
def
alphapulldown_model_to_modelcif
()
->
None
:
def
_store_as_modelcif
(
data_json
:
dict
,
mdl_file
:
str
,
out_dir
:
str
,
# ost_ent, file_prfx, compress, add_files
)
->
None
:
"""
Create the actual ModelCIF file.
"""
system
=
modelcif
.
System
(
title
=
data_json
[
"
_struct.title
"
],
id
=
data_json
[
"
data_
"
].
upper
(),
model_details
=
data_json
[
"
_struct.pdbx_model_details
"
],
)
# write modelcif.System to file
# NOTE: this will dump PAE on path provided in add_scores
# -> hence we cheat by changing path and back while being exception-safe...
oldpwd
=
os
.
getcwd
()
os
.
chdir
(
out_dir
)
try
:
with
open
(
f
"
{
os
.
path
.
splitext
(
os
.
path
.
basename
(
mdl_file
))[
0
]
}
.cif
"
,
"
w
"
,
encoding
=
"
ascii
"
,
)
as
mmcif_fh
:
modelcif
.
dumper
.
write
(
mmcif_fh
,
[
system
])
# _package_associated_files(system.repositories[0])
# if compress:
# _compress_cif_file(mdl_fle)
finally
:
os
.
chdir
(
oldpwd
)
def
_get_model_details
(
cmplx_name
:
str
,
data_json
:
dict
)
->
str
:
"""
Get the model description.
"""
ap_versions
=
[]
for
mnmr
in
data_json
[
"
__meta__
"
]:
if
data_json
[
"
__meta__
"
][
mnmr
][
"
ap_version
"
]
not
in
ap_versions
:
ap_versions
.
append
(
data_json
[
"
__meta__
"
][
mnmr
][
"
ap_version
"
])
# ToDo: fetch AF2 version/ have it in metadata JSON
return
(
f
"
Model generated for
{
'
and
'
.
join
(
cmplx_name
)
}
, produced
"
+
"
using AlphaFold-Multimer (<AF2 VERSION>) as implemented by
"
+
f
"
AlphaPulldown (
{
'
,
'
.
join
(
ap_versions
)
}
).
"
)
def
_get_feature_metadata
(
modelcif_json
:
dict
,
cmplx_name
:
str
,
prj_dir
:
str
)
->
list
:
"""
Read metadata from a feature JSON file.
"""
cmplx_name
=
cmplx_name
.
split
(
"
_and_
"
)
prj_dir
=
os
.
path
.
join
(
prj_dir
,
"
features_monomers
"
)
if
not
os
.
path
.
isdir
(
prj_dir
):
logging
.
info
(
f
"
No feature directory
'
{
prj_dir
}
'
found.
"
)
sys
.
exit
()
if
"
__meta__
"
not
in
modelcif_json
:
modelcif_json
[
"
__meta__
"
]
=
{}
for
mnmr
in
cmplx_name
:
modelcif_json
[
"
__meta__
"
][
mnmr
]
=
{}
feature_json
=
os
.
path
.
join
(
prj_dir
,
f
"
{
mnmr
}
_feature_metadata.json
"
)
if
not
os
.
path
.
isfile
(
feature_json
):
logging
.
info
(
f
"
No feature metadata file
'
{
feature_json
}
'
found.
"
)
sys
.
exit
()
# ToDo: make sure that its always ASCII
with
open
(
feature_json
,
"
r
"
,
encoding
=
"
ascii
"
)
as
jfh
:
jdata
=
json
.
load
(
jfh
)
modelcif_json
[
"
__meta__
"
][
mnmr
][
"
ap_version
"
]
=
jdata
[
"
version
"
]
return
cmplx_name
def
_get_data_block_id_and_struct_and_entry_categories
(
cif_json
:
dict
,
cmplx_name
:
str
)
->
None
:
"""
Get
'
data_
'
block ID and data for categories
'
_struct
'
and
'
_entry
'
.
"""
cif_json
[
"
data_
"
]
=
"
_
"
.
join
(
cmplx_name
)
cif_json
[
"
_struct.title
"
]
=
f
"
Prediction for
{
'
and
'
.
join
(
cmplx_name
)
}
"
cif_json
[
"
_struct.pdbx_model_details
"
]
=
_get_model_details
(
cmplx_name
,
cif_json
)
def
alphapulldown_model_to_modelcif
(
cmplx_name
:
str
,
mdl_file
:
str
,
out_dir
:
str
,
prj_dir
:
str
,
)
->
None
:
"""
Convert an AlphaPulldown model into a ModelCIF formatted mmCIF file.
Metadata for the ModelCIF categories will be fetched from AlphaPulldown
output as far as possible. This expects modelling projects to exists in
AlphaPulldown
'
s output directory structure.
"""
def
_get_model_list
(
ap_dir
,
model_selected
)
->
list
:
# ToDo: ENABLE logging.info(f"Processing '{mdl_file}'...")
modelcif_json
=
{}
# fetch metadata
cmplx_name
=
_get_feature_metadata
(
modelcif_json
,
cmplx_name
,
prj_dir
)
# fetch/ assemble more data about the modelling experiment
_get_data_block_id_and_struct_and_entry_categories
(
modelcif_json
,
cmplx_name
)
_store_as_modelcif
(
modelcif_json
,
mdl_file
,
out_dir
)
# ToDo: ENABLE logging.info(f"... done with '{mdl_file}'")
def
_get_model_list
(
ap_dir
:
str
,
model_selected
:
str
)
->
Tuple
[
str
,
str
,
list
]:
"""
Get the list of models to be converted.
If `model_selected` is none, all models will be marked for conversion.
"""
# ToDo: Question - use 'ranked_*.pdb' or
# 'unrelaxed_model_*_multimer_v3_pred_0.pdb' models?
mdl_path
=
os
.
path
.
join
(
ap_dir
,
"
models
"
)
cmplx
=
os
.
listdir
(
mdl_path
)
# For now, exactly 1 complex is expected in the 'models' subdirectory. If
# there are more, the 'model_selected' mechanism needs to be further tuned
# to get to the right model.
assert
len
(
cmplx
)
==
1
cmplx
=
cmplx
[
0
]
mdl_path
=
os
.
path
.
join
(
mdl_path
,
cmplx
)
models
=
[]
if
model_selected
is
not
None
:
mdl_paths
=
os
.
path
.
join
(
ap_dir
,
"
models
"
)
cmplx
=
os
.
listdir
(
mdl_paths
)
# For now, exactly 1 complex is expected in the 'models' subdirectory.
# If there are more, the 'model_selected' mechanism needs to be further
# tuned to get to the right model.
assert
len
(
cmplx
)
==
1
cmplx
=
cmplx
[
0
]
mdl_paths
=
os
.
path
.
join
(
mdl_paths
,
cmplx
,
f
"
ranked_
{
model_selected
}
.pdb
"
)
mdl_paths
=
[
mdl_paths
]
models
.
append
(
os
.
path
.
join
(
mdl_path
,
f
"
ranked_
{
model_selected
}
.pdb
"
))
else
:
for
mdl
in
os
.
listdir
(
mdl_path
):
if
mdl
.
startswith
(
"
ranked_
"
):
models
.
append
(
os
.
path
.
join
(
mdl_path
,
mdl
))
for
mdl
in
mdl_paths
:
if
os
.
path
.
isfile
(
mdl
):
# check that files actually exist
for
mdl
in
models
:
if
not
os
.
path
.
isfile
(
mdl
):
logging
.
info
(
f
"
Model file
'
{
mdl
}
'
does not exist or is not a regular file.
"
)
sys
.
exit
()
# check that files actually exist
return
[]
return
cmplx
,
mdl_path
,
models
def
main
(
argv
):
...
...
@@ -92,10 +196,14 @@ def main(argv):
# pylint: enable=pointless-string-statement
del
argv
# Unused.
# make list of selected models
model_conversions
=
_get_model_list
(
FLAGS
.
ap_output
,
FLAGS
.
model_selected
)
# assemble selected models into ModelCIF files + associated data archives
alphapulldown_model_to_modelcif
()
# get list of selected models and assemble ModelCIF files + associated data
complex_name
,
model_dir
,
model_list
=
_get_model_list
(
FLAGS
.
ap_output
,
FLAGS
.
model_selected
)
for
mdl
in
model_list
:
alphapulldown_model_to_modelcif
(
complex_name
,
mdl
,
model_dir
,
FLAGS
.
ap_output
)
if
__name__
==
"
__main__
"
:
...
...
@@ -106,5 +214,9 @@ if __name__ == "__main__":
# but we did that already in the past. Idea is to have all models
# available for... reproducibility and whatnot, but show the selected
# (representative) of the modelling experiment/ study more prominently.
# ToDo: Things to look at: '_struct.title', '_struct.pdbx_model_details',
# 'data_', '_entry', maybe have a user-defined JSON document with things
# like that, including author names?
# ToDo: where to store which model was chosen? Should be in Tara's models.
# LocalWords: ToDo AlphaPulldown PAEs dir
# LocalWords: ToDo AlphaPulldown PAEs dir
struct
This diff is collapsed.
Click to expand it.
pyproject.toml
+
8
−
0
View file @
5f2676e3
...
...
@@ -9,8 +9,16 @@ dependencies = [
[tool.black]
line-length
=
80
[tool.pylint.MAIN]
load-plugins
=
"pylint.extensions.bad_builtin"
[tool.pylint.REPORTS]
reports
=
"no"
[tool.pylint.FORMAT]
max-line-length
=
81
[tool.pylint.deprecated_builtins]
# We want to use proper logging, so we can control *ALL* output bei the Abseil
# logger, hence: deprecate 'print'
bad-functions
=
[
"map"
,
"filter"
,
"print"
]
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment