Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
modelcif-converters
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
modelcif-converters
Commits
81dfbb2b
Commit
81dfbb2b
authored
2 years ago
by
B13nch3n
Browse files
Options
Downloads
Patches
Plain Diff
Read associated Zip archives.
parent
0a3c7dc4
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
validation/validate-mmcif-file.py
+61
-34
61 additions, 34 deletions
validation/validate-mmcif-file.py
with
61 additions
and
34 deletions
validation/validate-mmcif-file.py
+
61
−
34
View file @
81dfbb2b
...
@@ -10,6 +10,7 @@ and thus, won't be merged into the model mmCIF file and won't be checked.
...
@@ -10,6 +10,7 @@ and thus, won't be merged into the model mmCIF file and won't be checked.
# pylint: disable=invalid-name
# pylint: disable=invalid-name
# pylint: enable=invalid-name
# pylint: enable=invalid-name
from
io
import
TextIOWrapper
import
argparse
import
argparse
import
atexit
import
atexit
import
copy
import
copy
...
@@ -17,6 +18,7 @@ import os
...
@@ -17,6 +18,7 @@ import os
import
subprocess
import
subprocess
import
sys
import
sys
import
tempfile
import
tempfile
import
zipfile
import
rapidjson
as
json
import
rapidjson
as
json
...
@@ -169,11 +171,15 @@ class _CifCheckFailedError(RuntimeError):
...
@@ -169,11 +171,15 @@ class _CifCheckFailedError(RuntimeError):
self
.
cifcheck_cmd
=
cifcheck_cmd
self
.
cifcheck_cmd
=
cifcheck_cmd
def
_read_mmcif
(
filepath
):
def
_read_mmcif
(
filepath
_or_object
):
"""
Read a mmCIF file
"""
"""
Read a mmCIF file
"""
data_lst
=
[]
data_lst
=
[]
with
open
(
filepath
,
encoding
=
"
utf-8
"
)
as
ifh
:
if
isinstance
(
filepath_or_object
,
str
):
prd
=
PdbxReader
(
ifh
)
with
open
(
filepath_or_object
,
encoding
=
"
utf-8
"
)
as
ifh
:
prd
=
PdbxReader
(
ifh
)
prd
.
read
(
data_lst
)
else
:
prd
=
PdbxReader
(
filepath_or_object
)
prd
.
read
(
data_lst
)
prd
.
read
(
data_lst
)
return
data_lst
return
data_lst
...
@@ -188,6 +194,17 @@ def _write_mmcif(filepath, cif_data):
...
@@ -188,6 +194,17 @@ def _write_mmcif(filepath, cif_data):
cifwriter
.
write
(
cif_data
)
cifwriter
.
write
(
cif_data
)
def
_get_indeces
(
data_category
,
attribute_list
):
"""
Get column indexes for a list of attributes.
"""
idxs
=
{}
for
attr
in
attribute_list
:
idxs
[
attr
]
=
data_category
.
getAttributeIndex
(
attr
)
if
idxs
[
attr
]
==
-
1
:
return
{}
return
idxs
def
_get_entry_id
(
cif_datablock
,
entry_id_map
,
datablock_idx
):
def
_get_entry_id
(
cif_datablock
,
entry_id_map
,
datablock_idx
):
"""
Get a mapping of the entry.id from a cif datablock.
"""
"""
Get a mapping of the entry.id from a cif datablock.
"""
entry
=
cif_datablock
.
getObj
(
"
entry
"
)
entry
=
cif_datablock
.
getObj
(
"
entry
"
)
...
@@ -198,7 +215,17 @@ def _get_entry_id(cif_datablock, entry_id_map, datablock_idx):
...
@@ -198,7 +215,17 @@ def _get_entry_id(cif_datablock, entry_id_map, datablock_idx):
entry_id_map
[
row
[
eidx
]]
=
datablock_idx
entry_id_map
[
row
[
eidx
]]
=
datablock_idx
def
_get_associated_files
(
model_cif_file
):
def
_unzip_arc_cif
(
arc_file
,
cif_file
,
assoc_dir
):
"""
Extract a cif file from a ZIP archive.
"""
assoc_data
=
[]
with
zipfile
.
ZipFile
(
os
.
path
.
join
(
assoc_dir
,
arc_file
))
as
arc_zip
:
with
TextIOWrapper
(
arc_zip
.
open
(
cif_file
),
encoding
=
"
utf-8
"
)
as
cif_fh
:
assoc_data
=
_read_mmcif
(
cif_fh
)
return
assoc_data
def
_get_associated_files
(
model_cif_file
,
assoc_dir
):
"""
Get the list of associated files from a model cif file.
"""
"""
Get the list of associated files from a model cif file.
"""
# This is an intermediate step, so we do not need to check/ report anything
# This is an intermediate step, so we do not need to check/ report anything
# here. The actual confirmation comes out of CifCheck at a later stage.
# here. The actual confirmation comes out of CifCheck at a later stage.
...
@@ -206,40 +233,45 @@ def _get_associated_files(model_cif_file):
...
@@ -206,40 +233,45 @@ def _get_associated_files(model_cif_file):
entry_id_map
=
{}
entry_id_map
=
{}
assoc_files
=
[]
assoc_files
=
[]
idx
s
=
{}
archive
s
=
{}
for
i
,
pdbx_cntnr
in
enumerate
(
mdl_cif
):
for
i
,
pdbx_cntnr
in
enumerate
(
mdl_cif
):
# gather entry.id's for later
# gather entry.id's for later
_get_entry_id
(
pdbx_cntnr
,
entry_id_map
,
i
)
_get_entry_id
(
pdbx_cntnr
,
entry_id_map
,
i
)
meafs
=
pdbx_cntnr
.
getObj
(
"
ma_entry_associated_files
"
)
dat_cat
=
pdbx_cntnr
.
getObj
(
"
ma_entry_associated_files
"
)
# If ma_entry_associated_files is not present then
# If ma_entry_associated_files is not present then
# ma_associated_archive_file_details can't exist either since it has a
# ma_associated_archive_file_details can't exist either since it has a
# ma_entry_associated_files.id relation. (CifCheck should notice that.)
# ma_entry_associated_files.id relation. (CifCheck should notice that.)
if
meafs
is
None
:
if
dat_cat
is
None
:
continue
continue
not_found
=
False
idxs
=
_get_indeces
(
for
j
in
[
"
file_format
"
,
"
file_url
"
,
"
entry_id
"
]:
dat_cat
,
[
"
entry_id
"
,
"
file_format
"
,
"
file_type
"
,
"
file_url
"
,
"
id
"
]
idxs
[
j
]
=
meafs
.
getAttributeIndex
(
j
)
)
if
idxs
[
j
]
==
-
1
:
if
not
idxs
:
not_found
=
True
break
if
not_found
:
continue
continue
for
row
in
meafs
:
for
row
in
dat_cat
:
if
row
[
idxs
[
"
file_type
"
]]
==
"
archive
"
:
archives
[
row
[
idxs
[
"
id
"
]]]
=
(
row
[
idxs
[
"
file_url
"
]],
row
[
idxs
[
"
entry_id
"
]],
)
if
row
[
idxs
[
"
file_format
"
]]
!=
"
cif
"
:
if
row
[
idxs
[
"
file_format
"
]]
!=
"
cif
"
:
continue
continue
assoc_files
.
append
((
row
[
idxs
[
"
file_url
"
]],
row
[
idxs
[
"
entry_id
"
]]))
data
=
_read_mmcif
(
os
.
path
.
join
(
assoc_dir
,
row
[
idxs
[
"
file_url
"
]]))
assoc_files
.
append
((
data
,
row
[
idxs
[
"
entry_id
"
]]))
# make sure entry_id is matching in associated file!
# make sure entry_id is matching in associated file!
maafd
=
pdbx_cntnr
.
getObj
(
"
ma_associated_archive_file_details
"
)
dat_cat
=
pdbx_cntnr
.
getObj
(
"
ma_associated_archive_file_details
"
)
if
maafd
is
None
:
if
dat_cat
is
None
:
continue
idxs
[
"
file_format
"
]
=
maafd
.
getAttributeIndex
(
"
file_format
"
)
if
idxs
[
"
file_format
"
]
==
-
1
:
continue
continue
for
row
in
maafd
:
idxs
=
_get_indeces
(
dat_cat
,
[
"
archive_file_id
"
,
"
file_format
"
,
"
file_path
"
]
)
for
row
in
dat_cat
:
if
row
[
idxs
[
"
file_format
"
]]
==
"
cif
"
:
if
row
[
idxs
[
"
file_format
"
]]
==
"
cif
"
:
raise
NotImplementedError
(
arc_id
=
row
[
idxs
[
"
archive_file_id
"
]]
"
Fetching associated cif files from archive.
"
arc_file
=
archives
[
arc_id
][
0
]
)
cif_file
=
row
[
idxs
[
"
file_path
"
]]
data
=
_unzip_arc_cif
(
arc_file
,
cif_file
,
assoc_dir
)
assoc_files
.
append
((
data
,
archives
[
arc_id
][
1
]))
return
assoc_files
,
mdl_cif
,
entry_id_map
return
assoc_files
,
mdl_cif
,
entry_id_map
...
@@ -318,12 +350,9 @@ def _try_os_remove(path):
...
@@ -318,12 +350,9 @@ def _try_os_remove(path):
pass
pass
def
_merge_cif_data
(
def
_merge_cif_data
(
model_cif_data
,
assoc_cif
,
row_entry_id
,
entry_id_map
):
model_cif_data
,
associated_path
,
row_entry_id
,
entry_id_map
):
"""
Merge contents of an associated file into cif data.
"""
"""
Merge contents of an associated file into cif data.
"""
error_msgs
=
{
"
cifcheck-errors
"
:
[]}
error_msgs
=
{
"
cifcheck-errors
"
:
[]}
assoc_cif
=
_read_mmcif
(
associated_path
)
# per datablock, check to which datablock it belongs in the parent cif
# per datablock, check to which datablock it belongs in the parent cif
for
assoc_cntnr
in
assoc_cif
:
for
assoc_cntnr
in
assoc_cif
:
...
@@ -627,18 +656,16 @@ def _main():
...
@@ -627,18 +656,16 @@ def _main():
# check for associated files referenced by the model cif file
# check for associated files referenced by the model cif file
assoc_files
,
model_cif_data
,
entry_id_map
=
_get_associated_files
(
assoc_files
,
model_cif_data
,
entry_id_map
=
_get_associated_files
(
opts
.
model_cif
opts
.
model_cif
,
opts
.
associates_dir
,
)
)
# save original data for later
# save original data for later
if
opts
.
extend_validated_file
is
not
None
:
if
opts
.
extend_validated_file
is
not
None
:
o_model_cif_data
=
copy
.
deepcopy
(
model_cif_data
)
o_model_cif_data
=
copy
.
deepcopy
(
model_cif_data
)
# make sure associated files exist and merge all of them into the model
# make sure associated files exist and merge all of them into the model
for
assoc
,
entry_id
in
assoc_files
:
for
assoc
,
entry_id
in
assoc_files
:
assoc_path
=
os
.
path
.
join
(
opts
.
associates_dir
,
assoc
)
# merge the model.cif and the associated file
# merge the model.cif and the associated file
msgs
=
_merge_cif_data
(
msgs
=
_merge_cif_data
(
model_cif_data
,
assoc
,
entry_id
,
entry_id_map
)
model_cif_data
,
assoc_path
,
entry_id
,
entry_id_map
)
cifcheck
.
add_to_results
(
msgs
)
cifcheck
.
add_to_results
(
msgs
)
validate_file
=
opts
.
model_cif
validate_file
=
opts
.
model_cif
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment