Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
modelcif-converters
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
modelcif-converters
Commits
b98d8878
Commit
b98d8878
authored
2 years ago
by
B13nch3n
Browse files
Options
Downloads
Patches
Plain Diff
Update converter script.
parent
0ba72f69
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
projects/human-heterodimers-w-crosslinks/translate2modelcif.py
+152
-70
152 additions, 70 deletions
...cts/human-heterodimers-w-crosslinks/translate2modelcif.py
with
152 additions
and
70 deletions
projects/human-heterodimers-w-crosslinks/translate2modelcif.py
+
152
−
70
View file @
b98d8878
...
@@ -46,10 +46,12 @@ def _parse_args():
...
@@ -46,10 +46,12 @@ def _parse_args():
+
"'
<UniProtKB AC>-<UniProtKB AC>
'"
,
+
"'
<UniProtKB AC>-<UniProtKB AC>
'"
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"
--rank
"
,
"
--
selected_
rank
"
,
type
=
str
,
type
=
str
,
default
=
None
,
default
=
None
,
help
=
"
Only process the model with this rank.
"
,
help
=
"
If a certain model of a modelling project is selected by rank,
"
+
"
the other models are still translated to ModelCIF but stored as
"
+
"
accompanying files to the selected model.
"
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"
--out_dir
"
,
"
--out_dir
"
,
...
@@ -151,7 +153,7 @@ class _OST2ModelCIF(modelcif.model.AbInitioModel):
...
@@ -151,7 +153,7 @@ class _OST2ModelCIF(modelcif.model.AbInitioModel):
occupancy
=
atm
.
occupancy
,
occupancy
=
atm
.
occupancy
,
)
)
def
add_scores
(
self
,
scores_json
,
entry_id
,
mdl_name
):
def
add_scores
(
self
,
scores_json
,
entry_id
,
mdl_name
,
add_files
):
"""
Add QA metrics from AF2 scores.
"""
"""
Add QA metrics from AF2 scores.
"""
# global scores
# global scores
self
.
qa_metrics
.
extend
(
self
.
qa_metrics
.
extend
(
...
@@ -196,21 +198,26 @@ class _OST2ModelCIF(modelcif.model.AbInitioModel):
...
@@ -196,21 +198,26 @@ class _OST2ModelCIF(modelcif.model.AbInitioModel):
self
.
qa_metrics
.
extend
(
lpae
)
self
.
qa_metrics
.
extend
(
lpae
)
ac_file
=
f
"
{
mdl_name
}
_local_pairwise_qa.cif
"
ac_file
=
f
"
{
mdl_name
}
_local_pairwise_qa.cif
"
qa_file
=
modelcif
.
associated
.
LocalPairwiseQAScoresFile
(
arc_files
=
[
ac_file
,
modelcif
.
associated
.
LocalPairwiseQAScoresFile
(
categories
=
[
"
_ma_qa_metric_local_pairwise
"
],
ac_file
,
copy_categories
=
[
"
_ma_qa_metric
"
],
categories
=
[
"
_ma_qa_metric_local_pairwise
"
],
entry_id
=
entry_id
,
copy_categories
=
[
"
_ma_qa_metric
"
],
entry_details
=
"
This file is an associated file consisting
"
entry_id
=
entry_id
,
+
"
of local pairwise QA metrics. This is a partial mmCIF
"
entry_details
=
"
This file is an associated file consisting
"
+
"
file and can be validated by merging with the main
"
+
"
of local pairwise QA metrics. This is a partial mmCIF
"
+
"
mmCIF file containing the model coordinates and other
"
+
"
file and can be validated by merging with the main
"
+
"
associated data.
"
,
+
"
mmCIF file containing the model coordinates and other
"
details
=
"
Predicted aligned error
"
,
+
"
associated data.
"
,
)
details
=
"
Predicted aligned error
"
,
)
]
if
add_files
:
arc_files
.
extend
(
add_files
)
return
modelcif
.
associated
.
Repository
(
return
modelcif
.
associated
.
Repository
(
""
,
""
,
[
modelcif
.
associated
.
ZipFile
(
f
"
{
mdl_name
}
.zip
"
,
files
=
[
qa
_file
]
)],
[
modelcif
.
associated
.
ZipFile
(
f
"
{
mdl_name
}
.zip
"
,
files
=
arc
_file
s
)],
)
)
# NOTE: by convention MA expects zip file with same name as model-cif
# NOTE: by convention MA expects zip file with same name as model-cif
...
@@ -248,14 +255,14 @@ def _check_model_extra_files_present(model_dir, pdb_file):
...
@@ -248,14 +255,14 @@ def _check_model_extra_files_present(model_dir, pdb_file):
def
_get_audit_authors
():
def
_get_audit_authors
():
"""
Return the list of authors that produced this model.
"""
"""
Return the list of authors that produced this model.
"""
return
(
return
(
"
Bartolec, T.
"
,
"
Bartolec, T.
K.
"
,
"
Vazquez-Campos, X.
"
,
"
Vazquez-Campos, X.
"
,
"
Johnson, M.
"
,
"
Norman, A.
"
,
"
Norman, A.
"
,
"
Payne, R.
"
,
"
Luong, C.
"
,
"
Wilkins, M.
"
,
"
Payne, R.J.
"
,
"
Mackay, J.
"
,
"
Wilkins, M.R.
"
,
"
Low, J.
"
,
"
Mackay, J.P.
"
,
"
Low, J.K.K.
"
,
)
)
...
@@ -818,19 +825,19 @@ def _compress_cif_file(cif_file):
...
@@ -818,19 +825,19 @@ def _compress_cif_file(cif_file):
os
.
remove
(
cif_file
)
os
.
remove
(
cif_file
)
def
_package_associated_files
(
mdl_name
):
def
_package_associated_files
(
repo
):
"""
Compress associated files into single zip file and delete original.
"""
"""
Compress associated files into single zip file and delete original.
"""
# file names must match ones from add_scores
zip_path
=
f
"
{
mdl_name
}
.zip
"
files
=
[
f
"
{
mdl_name
}
_local_pairwise_qa.cif
"
]
# zip settings tested for good speed vs compression
# zip settings tested for good speed vs compression
with
zipfile
.
ZipFile
(
zip_path
,
"
w
"
,
zipfile
.
ZIP_BZIP2
)
as
myzip
:
for
archive
in
repo
.
files
:
for
file
in
files
:
with
zipfile
.
ZipFile
(
archive
.
path
,
"
w
"
,
zipfile
.
ZIP_BZIP2
)
as
cif_zip
:
myzip
.
write
(
file
)
for
zfile
in
archive
.
files
:
os
.
remove
(
file
)
cif_zip
.
write
(
zfile
.
path
,
arcname
=
zfile
.
path
)
os
.
remove
(
zfile
.
path
)
def
_store_as_modelcif
(
data_json
,
ost_ent
,
out_dir
,
file_prfx
,
compress
):
def
_store_as_modelcif
(
data_json
,
ost_ent
,
out_dir
,
file_prfx
,
compress
,
add_files
):
"""
Mix all the data into a ModelCIF file.
"""
"""
Mix all the data into a ModelCIF file.
"""
print
(
"
generating ModelCIF objects...
"
,
end
=
""
)
print
(
"
generating ModelCIF objects...
"
,
end
=
""
)
pstart
=
timer
()
pstart
=
timer
()
...
@@ -876,18 +883,23 @@ def _store_as_modelcif(data_json, ost_ent, out_dir, file_prfx, compress):
...
@@ -876,18 +883,23 @@ def _store_as_modelcif(data_json, ost_ent, out_dir, file_prfx, compress):
print
(
"
processing QA scores...
"
,
end
=
""
,
flush
=
True
)
print
(
"
processing QA scores...
"
,
end
=
""
,
flush
=
True
)
pstart
=
timer
()
pstart
=
timer
()
mdl_name
=
os
.
path
.
basename
(
file_prfx
)
mdl_name
=
os
.
path
.
basename
(
file_prfx
)
system
.
repositories
.
append
(
model
.
add_scores
(
data_json
,
system
.
id
,
mdl_name
))
system
.
repositories
.
append
(
model
.
add_scores
(
data_json
,
system
.
id
,
mdl_name
,
add_files
)
)
print
(
f
"
(
{
timer
()
-
pstart
:
.
2
f
}
s)
"
)
print
(
f
"
(
{
timer
()
-
pstart
:
.
2
f
}
s)
"
)
system
.
model_groups
.
append
(
system
.
model_groups
.
append
(
modelcif
.
model
.
ModelGroup
([
model
],
name
=
data_json
[
"
model_group_name
"
])
modelcif
.
model
.
ModelGroup
([
model
],
name
=
data_json
[
"
model_group_name
"
])
)
)
ref_dbs
=
_get_sequence_dbs
(
data_json
[
"
config_data
"
][
"
seq_dbs
"
])
system
.
protocols
.
append
(
protocol
=
_get_modelcif_protocol
(
_get_modelcif_protocol
(
data_json
[
"
protocol
"
],
system
.
target_entities
,
model
,
ref_dbs
data_json
[
"
protocol
"
],
system
.
target_entities
,
model
,
_get_sequence_dbs
(
data_json
[
"
config_data
"
][
"
seq_dbs
"
]),
)
)
)
system
.
protocols
.
append
(
protocol
)
# write modelcif System to file
# write modelcif System to file
print
(
"
write to disk...
"
,
end
=
""
,
flush
=
True
)
print
(
"
write to disk...
"
,
end
=
""
,
flush
=
True
)
...
@@ -896,17 +908,48 @@ def _store_as_modelcif(data_json, ost_ent, out_dir, file_prfx, compress):
...
@@ -896,17 +908,48 @@ def _store_as_modelcif(data_json, ost_ent, out_dir, file_prfx, compress):
# -> hence we cheat by changing path and back while being exception-safe...
# -> hence we cheat by changing path and back while being exception-safe...
oldpwd
=
os
.
getcwd
()
oldpwd
=
os
.
getcwd
()
os
.
chdir
(
out_dir
)
os
.
chdir
(
out_dir
)
mdl_fle
=
f
"
{
mdl_name
}
.cif
"
try
:
try
:
with
open
(
f
"
{
mdl_
name
}
.cif
"
,
"
w
"
,
encoding
=
"
ascii
"
)
as
mmcif_fh
:
with
open
(
mdl_
fle
,
"
w
"
,
encoding
=
"
ascii
"
)
as
mmcif_fh
:
modelcif
.
dumper
.
write
(
mmcif_fh
,
[
system
])
modelcif
.
dumper
.
write
(
mmcif_fh
,
[
system
])
_package_associated_files
(
mdl_name
)
_package_associated_files
(
system
.
repositories
[
0
]
)
if
compress
:
if
compress
:
_compress_cif_file
(
f
"
{
mdl_
name
}
.cif
"
)
_compress_cif_file
(
mdl_
fle
)
finally
:
finally
:
os
.
chdir
(
oldpwd
)
os
.
chdir
(
oldpwd
)
print
(
f
"
(
{
timer
()
-
pstart
:
.
2
f
}
s)
"
)
print
(
f
"
(
{
timer
()
-
pstart
:
.
2
f
}
s)
"
)
mdl_fle
=
_get_assoc_mdl_file
(
mdl_fle
,
data_json
)
zip_fle
=
_get_assoc_zip_file
(
system
.
repositories
[
0
].
files
[
0
].
path
,
data_json
)
return
mdl_fle
,
zip_fle
def
_get_assoc_mdl_file
(
fle_path
,
data_json
):
"""
Generate a modelcif.associated.File object that looks like a CIF file.
The dedicated CIFFile functionality in modelcif would also try to write it.
"""
cfile
=
modelcif
.
associated
.
File
(
fle_path
,
details
=
f
"
model
{
data_json
[
'
mdl_num
'
]
}
; rank
{
data_json
[
'
rank_num
'
]
}
"
,
)
cfile
.
file_format
=
"
cif
"
return
cfile
def
_get_assoc_zip_file
(
fle_path
,
data_json
):
"""
Create a modelcif.associated.File object that looks like a ZIP file.
This is NOT the archive ZIP file for the PAEs but to store that in the
ZIP archive of the selected model.
"""
zfile
=
modelcif
.
associated
.
File
(
fle_path
,
details
=
"
archive with multiple files for model
"
+
f
"
{
data_json
[
'
mdl_num
'
]
}
; rank
{
data_json
[
'
rank_num
'
]
}
"
,
)
zfile
.
file_format
=
"
zip
"
return
zfile
def
_create_interaction_json
(
config_data
):
def
_create_interaction_json
(
config_data
):
"""
Create a dictionary (mimicking JSON) that contains data which is the same
"""
Create a dictionary (mimicking JSON) that contains data which is the same
...
@@ -931,6 +974,41 @@ def _create_model_json(data, pdb_file, up_acs, block_id):
...
@@ -931,6 +974,41 @@ def _create_model_json(data, pdb_file, up_acs, block_id):
return
ost_ent
return
ost_ent
def
_translate2modelcif
(
up_acs
,
pdb_fle
,
config_data
,
opts
,
add_files
):
"""
Convert a PDB file with its accompanying data to ModelCIF.
"""
pdb_start
=
timer
()
file_prfx
,
uid
=
_check_model_extra_files_present
(
opts
.
model_dir
,
pdb_fle
)
pdb_fle
=
os
.
path
.
join
(
opts
.
model_dir
,
pdb_fle
)
# gather data into JSON-like structure
print
(
"
preparing data...
"
,
end
=
""
)
pstart
=
timer
()
mdlcf_json
=
_create_interaction_json
(
config_data
)
# uid = ..._rank_X_model_Y.pdb
mdl_name_parts
=
uid
.
split
(
"
_
"
)
assert
mdl_name_parts
[
-
4
]
==
"
rank
"
assert
mdl_name_parts
[
-
2
]
==
"
model
"
mdlcf_json
[
"
rank_num
"
]
=
int
(
mdl_name_parts
[
-
3
])
mdlcf_json
[
"
mdl_num
"
]
=
int
(
mdl_name_parts
[
-
1
])
ost_ent
=
_create_model_json
(
mdlcf_json
,
pdb_fle
,
up_acs
,
uid
)
# read quality scores from JSON file
_get_scores
(
mdlcf_json
,
file_prfx
)
print
(
f
"
(
{
timer
()
-
pstart
:
.
2
f
}
s)
"
)
mdlcf_fle
,
zip_fle
=
_store_as_modelcif
(
mdlcf_json
,
ost_ent
,
opts
.
out_dir
,
file_prfx
,
opts
.
compress
,
add_files
,
)
return
pdb_start
,
pdb_fle
,
mdlcf_fle
,
zip_fle
def
_main
():
def
_main
():
"""
Run as script.
"""
"""
Run as script.
"""
opts
=
_parse_args
()
opts
=
_parse_args
()
...
@@ -945,45 +1023,49 @@ def _main():
...
@@ -945,45 +1023,49 @@ def _main():
config_data
=
_parse_colabfold_config
(
cnfg
)
config_data
=
_parse_colabfold_config
(
cnfg
)
# iterate model directory
# iterate model directory
found_ranked
=
False
# There is 1 representative for a modelling project, the other models are
# stored in its ZIP archive.
not_slctd_mdls
=
[]
slctd_mdl
=
None
for
fle
in
sorted
(
os
.
listdir
(
opts
.
model_dir
)):
for
fle
in
sorted
(
os
.
listdir
(
opts
.
model_dir
)):
# iterate PDB files
# iterate PDB files
if
not
fle
.
endswith
(
"
.pdb
"
):
if
not
fle
.
endswith
(
"
.pdb
"
):
continue
continue
if
opts
.
rank
is
not
None
and
f
"
rank_
{
opts
.
rank
}
"
not
in
fle
:
if
(
opts
.
selected_rank
is
not
None
and
f
"
rank_
{
opts
.
selected_rank
}
"
in
fle
):
slctd_mdl
=
fle
continue
continue
found_ranked
=
True
print
(
f
"
translating
{
fle
}
...
"
)
print
(
f
"
translating
{
fle
}
...
"
)
pdb_start
=
timer
()
pdb_start
,
fle
,
mdlcf_fle
,
zip_fle
=
_translate2modelcif
(
file_prfx
,
uid
=
_check_model_extra_files_present
(
opts
.
model_dir
,
fle
)
up_acs
,
fle
=
os
.
path
.
join
(
opts
.
model_dir
,
fle
)
fle
,
config_data
,
# gather data into JSON-like structure
opts
,
print
(
"
preparing data...
"
,
end
=
""
)
None
,
pstart
=
timer
()
mdlcf_json
=
_create_interaction_json
(
config_data
)
# uid = ..._rank_X_model_Y.pdb
mdl_name_parts
=
uid
.
split
(
"
_
"
)
assert
mdl_name_parts
[
-
4
]
==
"
rank
"
assert
mdl_name_parts
[
-
2
]
==
"
model
"
mdlcf_json
[
"
rank_num
"
]
=
int
(
mdl_name_parts
[
-
3
])
mdlcf_json
[
"
mdl_num
"
]
=
int
(
mdl_name_parts
[
-
1
])
ost_ent
=
_create_model_json
(
mdlcf_json
,
fle
,
up_acs
,
uid
)
# read quality scores from JSON file
_get_scores
(
mdlcf_json
,
file_prfx
)
print
(
f
"
(
{
timer
()
-
pstart
:
.
2
f
}
s)
"
)
_store_as_modelcif
(
mdlcf_json
,
ost_ent
,
opts
.
out_dir
,
file_prfx
,
opts
.
compress
)
)
print
(
f
"
... done with
{
fle
}
(
{
timer
()
-
pdb_start
:
.
2
f
}
s).
"
)
print
(
f
"
... done with
{
fle
}
(
{
timer
()
-
pdb_start
:
.
2
f
}
s).
"
)
not_slctd_mdls
.
append
(
mdlcf_fle
)
not_slctd_mdls
.
append
(
zip_fle
)
if
opts
.
selected_rank
:
if
slctd_mdl
is
None
:
_abort_msg
(
f
"
Could not find model of requested rank
'
{
opts
.
selected_rank
}
'"
)
print
(
f
"
translating selected model
{
opts
.
selected_rank
}
"
+
f
"
(
{
slctd_mdl
}
)...
"
)
_translate2modelcif
(
up_acs
,
slctd_mdl
,
config_data
,
opts
,
not_slctd_mdls
,
)
print
(
f
"
... done with
{
slctd_mdl
}
(
{
timer
()
-
pdb_start
:
.
2
f
}
s).
"
)
if
opts
.
rank
and
not
found_ranked
:
_abort_msg
(
f
"
Could not find model of requested rank
'
{
opts
.
rank
}
'"
)
print
(
f
"
... done with
{
opts
.
model_dir
}
.
"
)
print
(
f
"
... done with
{
opts
.
model_dir
}
.
"
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment