Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
AlphaPulldown-ModelCIF-Conversion
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Bienchen
AlphaPulldown-ModelCIF-Conversion
Commits
d45d73c3
Commit
d45d73c3
authored
1 year ago
by
Bienchen
Browse files
Options
Downloads
Patches
Plain Diff
Add software to AF2 scoresD
parent
c1006128
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
.gitignore
+5
-0
5 additions, 0 deletions
.gitignore
convert_to_modelcif.py
+103
-7
103 additions, 7 deletions
convert_to_modelcif.py
with
108 additions
and
7 deletions
.gitignore
+
5
−
0
View file @
d45d73c3
# Don't have Emac's backup files
# Don't have Emac's backup files
*~
*~
# ignore test scripts
biop-test.py
junk.py
# ignore some files used for testing
# ignore some files used for testing
1ake.1.pdb
1ake.1.pdb
3lre.3.A.pdb
3lre.3.A.pdb
6xne.pdb
6xne.pdb
cmp.cif
This diff is collapsed.
Click to expand it.
convert_to_modelcif.py
+
103
−
7
View file @
d45d73c3
...
@@ -20,6 +20,7 @@ from Bio.PDB.Structure import Structure as BioStructure
...
@@ -20,6 +20,7 @@ from Bio.PDB.Structure import Structure as BioStructure
from
absl
import
app
,
flags
,
logging
from
absl
import
app
,
flags
,
logging
import
numpy
as
np
import
numpy
as
np
import
ihm.citations
import
modelcif
import
modelcif
import
modelcif.associated
import
modelcif.associated
import
modelcif.dumper
import
modelcif.dumper
...
@@ -120,8 +121,13 @@ class _Biopython2ModelCIF(modelcif.model.AbInitioModel):
...
@@ -120,8 +121,13 @@ class _Biopython2ModelCIF(modelcif.model.AbInitioModel):
occupancy
=
atm
.
occupancy
,
occupancy
=
atm
.
occupancy
,
)
)
def
add_scores
(
self
,
scores_json
,
entry_id
,
file_prefix
):
def
add_scores
(
self
,
scores_json
,
entry_id
,
file_prefix
,
sw_dct
):
"""
Add QA metrics
"""
"""
Add QA metrics
"""
_GlobalPLDDT
.
software
=
sw_dct
[
"
alphafold
"
]
_GlobalPTM
.
software
=
sw_dct
[
"
alphafold
"
]
_GlobalIPTM
.
software
=
sw_dct
[
"
alphafold
"
]
_LocalPLDDT
.
software
=
sw_dct
[
"
alphafold
"
]
_LocalPairwisePAE
.
software
=
sw_dct
[
"
alphafold
"
]
# global scores
# global scores
self
.
qa_metrics
.
extend
(
self
.
qa_metrics
.
extend
(
(
(
...
@@ -254,9 +260,14 @@ def _store_as_modelcif(
...
@@ -254,9 +260,14 @@ def _store_as_modelcif(
name
=
"
ToDo: Model <N> (ranked #<M>)
"
,
name
=
"
ToDo: Model <N> (ranked #<M>)
"
,
)
)
# create software list from feature metadata
sw_dct
=
_get_software_data
(
data_json
[
"
__meta__
"
])
# process scores
# process scores
mdl_file
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
mdl_file
))[
0
]
mdl_file
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
mdl_file
))[
0
]
system
.
repositories
.
append
(
model
.
add_scores
(
data_json
,
system
.
id
,
mdl_file
))
system
.
repositories
.
append
(
model
.
add_scores
(
data_json
,
system
.
id
,
mdl_file
,
sw_dct
)
)
system
.
model_groups
.
append
(
modelcif
.
model
.
ModelGroup
([
model
]))
system
.
model_groups
.
append
(
modelcif
.
model
.
ModelGroup
([
model
]))
...
@@ -316,9 +327,16 @@ def _compress_cif_file(cif_file):
...
@@ -316,9 +327,16 @@ def _compress_cif_file(cif_file):
def
_get_model_details
(
cmplx_name
:
str
,
data_json
:
dict
)
->
str
:
def
_get_model_details
(
cmplx_name
:
str
,
data_json
:
dict
)
->
str
:
"""
Get the model description.
"""
"""
Get the model description.
"""
ap_versions
=
[]
ap_versions
=
[]
for
mnmr
in
data_json
[
"
__meta__
"
]:
for
mnmr
in
data_json
[
"
__meta__
"
]:
# mnmr = monomer
if
data_json
[
"
__meta__
"
][
mnmr
][
"
ap_version
"
]
not
in
ap_versions
:
if
(
ap_versions
.
append
(
data_json
[
"
__meta__
"
][
mnmr
][
"
ap_version
"
])
data_json
[
"
__meta__
"
][
mnmr
][
"
software
"
][
"
alphapulldown
"
][
"
version
"
]
not
in
ap_versions
):
ap_versions
.
append
(
data_json
[
"
__meta__
"
][
mnmr
][
"
software
"
][
"
alphapulldown
"
][
"
version
"
]
)
# ToDo: fetch AF2 version/ have it in metadata JSON
# ToDo: fetch AF2 version/ have it in metadata JSON
return
(
return
(
...
@@ -348,7 +366,13 @@ def _get_feature_metadata(
...
@@ -348,7 +366,13 @@ def _get_feature_metadata(
# ToDo: make sure that its always ASCII
# ToDo: make sure that its always ASCII
with
open
(
feature_json
,
"
r
"
,
encoding
=
"
ascii
"
)
as
jfh
:
with
open
(
feature_json
,
"
r
"
,
encoding
=
"
ascii
"
)
as
jfh
:
jdata
=
json
.
load
(
jfh
)
jdata
=
json
.
load
(
jfh
)
modelcif_json
[
"
__meta__
"
][
mnmr
][
"
ap_version
"
]
=
jdata
[
"
version
"
]
modelcif_json
[
"
__meta__
"
][
mnmr
][
"
software
"
]
=
jdata
[
"
binaries
"
]
modelcif_json
[
"
__meta__
"
][
mnmr
][
"
software
"
][
"
alphapulldown
"
]
=
{
"
version
"
:
jdata
[
"
version
"
]
}
modelcif_json
[
"
__meta__
"
][
mnmr
][
"
software
"
][
"
alphafold
"
]
=
{
"
version
"
:
jdata
[
"
AlphaFold version
"
]
}
return
cmplx_name
return
cmplx_name
...
@@ -418,7 +442,6 @@ def _get_scores(cif_json: dict, scr_file: str) -> None:
...
@@ -418,7 +442,6 @@ def _get_scores(cif_json: dict, scr_file: str) -> None:
"""
Add scores to JSON data.
"""
"""
Add scores to JSON data.
"""
with
open
(
scr_file
,
"
rb
"
)
as
sfh
:
with
open
(
scr_file
,
"
rb
"
)
as
sfh
:
scr_dict
=
pickle
.
load
(
sfh
)
scr_dict
=
pickle
.
load
(
sfh
)
# dict_keys(['distogram', 'experimentally_resolved', 'masked_msa', 'num_recycles', 'structure_module', 'aligned_confidence_probs', 'max_predicted_aligned_error', 'ranking_confidence'])
# Get pLDDT as a list, the global pLDDT is the average, calculated on the
# Get pLDDT as a list, the global pLDDT is the average, calculated on the
# spot.
# spot.
cif_json
[
"
plddt
"
]
=
scr_dict
[
"
plddt
"
]
cif_json
[
"
plddt
"
]
=
scr_dict
[
"
plddt
"
]
...
@@ -427,6 +450,79 @@ def _get_scores(cif_json: dict, scr_file: str) -> None:
...
@@ -427,6 +450,79 @@ def _get_scores(cif_json: dict, scr_file: str) -> None:
cif_json
[
"
pae
"
]
=
scr_dict
[
"
predicted_aligned_error
"
]
cif_json
[
"
pae
"
]
=
scr_dict
[
"
predicted_aligned_error
"
]
def
_get_software_data
(
meta_json
:
dict
)
->
list
:
"""
Turn meta data about software into modelcif.Software objects.
"""
# {key from json: dict needed to produce sw entry plus internal key}
sw_data
=
{
"
jackhmmer
"
:
None
,
"
hhblits
"
:
None
,
"
hhsearch
"
:
None
,
"
hmmsearch
"
:
None
,
"
hmmbuild
"
:
None
,
"
kalign
"
:
None
,
"
alphapulldown
"
:
None
,
"
alphafold
"
:
modelcif
.
Software
(
"
AlphaFold-Multimer
"
,
"
model building
"
,
"
Structure prediction
"
,
"
https://github.com/deepmind/alphafold
"
,
"
package
"
,
None
,
ihm
.
Citation
(
pmid
=
None
,
title
=
"
Protein complex prediction with AlphaFold-Multimer.
"
,
journal
=
"
bioRxiv
"
,
volume
=
None
,
page_range
=
None
,
year
=
2021
,
authors
=
[
"
Evans, R.
"
,
"
O
'
Neill, M.
"
,
"
Pritzel, A.
"
,
"
Antropova, N.
"
,
"
Senior, A.
"
,
"
Green, T.
"
,
"
Zidek, A.
"
,
"
Bates, R.
"
,
"
Blackwell, S.
"
,
"
Yim, J.
"
,
"
Ronneberger, O.
"
,
"
Bodenstein, S.
"
,
"
Zielinski, M.
"
,
"
Bridgland, A.
"
,
"
Potapenko, A.
"
,
"
Cowie, A.
"
,
"
Tunyasuvunakool, K.
"
,
"
Jain, R.
"
,
"
Clancy, E.
"
,
"
Kohli, P.
"
,
"
Jumper, J.
"
,
"
Hassabis, D.
"
,
],
doi
=
"
10.1101/2021.10.04.463034
"
,
),
),
}
for
data
in
meta_json
.
values
():
for
sftwr
,
version
in
data
[
"
software
"
].
items
():
if
sftwr
not
in
sw_data
:
raise
RuntimeError
(
"
Unknown software found in meta data:
"
+
f
"'
{
sftwr
}
'"
)
version
=
version
[
"
version
"
]
if
sw_data
[
sftwr
]
is
not
None
:
if
sw_data
[
sftwr
].
version
is
not
None
:
if
sw_data
[
sftwr
].
version
!=
version
:
raise
RuntimeError
(
"
Software versions differ for
"
+
f
"'
{
sftwr
}
'
:
'
{
sw_data
[
sftwr
].
version
}
'
vs.
"
+
f
"'
{
version
}
'"
)
sw_data
[
sftwr
].
version
=
version
return
sw_data
def
alphapulldown_model_to_modelcif
(
def
alphapulldown_model_to_modelcif
(
cmplx_name
:
str
,
cmplx_name
:
str
,
mdl_file
:
str
,
mdl_file
:
str
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment