Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
modelcif-converters
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Analyze
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
modelcif-converters
Commits
af54e24c
Commit
af54e24c
authored
2 years ago
by
Bienchen
Browse files
Options
Downloads
Plain Diff
Merge branch 'validation-tool' into develop
parents
d911872a
e427a626
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
validation/Dockerfile
+2
-0
2 additions, 0 deletions
validation/Dockerfile
validation/requirements.txt
+2
-0
2 additions, 0 deletions
validation/requirements.txt
validation/validate-mmcif-file.py
+45
-1
45 additions, 1 deletion
validation/validate-mmcif-file.py
with
49 additions
and
1 deletion
validation/Dockerfile
+
2
−
0
View file @
af54e24c
...
...
@@ -29,6 +29,7 @@ LABEL vendor3="Biozentrum - University of Basel (biozentrum.unibas.ch)"
## Install the RCSB CPP Dict Suite (only the binaries we need)
WORKDIR
${SRC_DIR}
COPY
requirements.txt ${SRC_DIR}
RUN
set
-e
pipefail
;
\
export
DICT_PACK_SRC_DIR
=
"
${
SRC_DIR
}
/cpp-dict-pack.git"
;
\
apk update
;
\
...
...
@@ -56,6 +57,7 @@ RUN set -e pipefail; \
/usr/local/bin/python -m pip install --upgrade pip; \
/usr/local/bin/python -m pip install mmcif==${VERSION_PY_MMCIF} \
python-rapidjson; \
/usr/local/bin/python -m pip install -r requirements.txt; \
#
## Clean up/ remove unnecessary stuff
apk del abuild binutils bison build-base cmake flex git gcc \
...
...
This diff is collapsed.
Click to expand it.
validation/requirements.txt
0 → 100644
+
2
−
0
View file @
af54e24c
python-rapidjson==1.9
validators==0.20.0
This diff is collapsed.
Click to expand it.
validation/validate-mmcif-file.py
+
45
−
1
View file @
af54e24c
...
...
@@ -20,7 +20,9 @@ import sys
import
tempfile
import
zipfile
from
validators
import
url
as
is_url
import
rapidjson
as
json
import
requests
from
mmcif.api.DataCategory
import
DataCategory
...
...
@@ -215,12 +217,51 @@ def _get_entry_id(cif_datablock, entry_id_map, datablock_idx):
entry_id_map
[
row
[
eidx
]]
=
datablock_idx
def
_download_file
(
file_url
):
"""
Download a file into a temporary file. Mark for deletion on
termination
"""
rspns
=
requests
.
get
(
file_url
,
stream
=
True
,
timeout
=
600
)
if
rspns
.
status_code
!=
200
:
raise
RuntimeError
(
f
"
File not found by URL
'
{
file_url
}
'
.
"
)
dlf
=
tempfile
.
TemporaryFile
()
for
chunk
in
rspns
.
iter_content
(
chunk_size
=
1024
):
dlf
.
write
(
chunk
)
dlf
.
seek
(
0
)
return
dlf
def
_get_assoc_obj
(
file_or_url
,
assoc_dir
):
"""
Get a path to an associated file. Will download from internet if path
is a URL. Downloaded files are automatically hooked up for deletion after
the script terminates.
"""
if
assoc_dir
is
None
or
not
os
.
path
.
exists
(
os
.
path
.
join
(
assoc_dir
,
file_or_url
)
):
if
is_url
(
file_or_url
):
return
_download_file
(
file_or_url
)
raise
RuntimeError
(
"
Associated file path does not point to actual file or URL:
"
+
f
"'
{
assoc_dir
}
/
{
file_or_url
}
'"
)
return
os
.
path
.
join
(
assoc_dir
,
file_or_url
)
def
_unzip_arc_cif
(
arc_file
,
cif_file
,
assoc_dir
):
"""
Extract a cif file from a ZIP archive.
"""
assoc_data
=
[]
with
zipfile
.
ZipFile
(
os
.
path
.
join
(
assoc_dir
,
arc_file
))
as
arc_zip
:
assoc_obj
=
_get_assoc_obj
(
arc_file
,
assoc_dir
)
with
zipfile
.
ZipFile
(
assoc_obj
)
as
arc_zip
:
with
TextIOWrapper
(
arc_zip
.
open
(
cif_file
),
encoding
=
"
utf-8
"
)
as
cif_fh
:
assoc_data
=
_read_mmcif
(
cif_fh
)
# in case assoc_obj is a temporary file, we need to close
try
:
assoc_obj
.
close
()
except
AttributeError
:
pass
return
assoc_data
...
...
@@ -256,6 +297,9 @@ def _get_associated_files(model_cif_file, assoc_dir):
)
if
row
[
idxs
[
"
file_format
"
]]
!=
"
cif
"
:
continue
# this should be easy to make reading URLs, using
# _get_assoc_obj(row[idxs["file_url"]], assoc_dir) but for now
# I have not seen the use case (no web server at hand for testing)
data
=
_read_mmcif
(
os
.
path
.
join
(
assoc_dir
,
row
[
idxs
[
"
file_url
"
]]))
assoc_files
.
append
((
data
,
row
[
idxs
[
"
entry_id
"
]]))
# make sure entry_id is matching in associated file!
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment