Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
ma-wilkins-import
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container registry
Model registry
Analyze
Contributor analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
ma-wilkins-import
Commits
38a40f13
Commit
38a40f13
authored
3 years ago
by
B13nch3n
Browse files
Options
Downloads
Patches
Plain Diff
Mockup protocol
parent
a41aadae
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
.gitignore
+3
-1
3 additions, 1 deletion
.gitignore
translate2modelcif.py
+143
-22
143 additions, 22 deletions
translate2modelcif.py
with
146 additions
and
23 deletions
.gitignore
+
3
−
1
View file @
38a40f13
A0A1B0GTU1-O75152*
.*~
\ No newline at end of file
.*~
.docker-bash-history
.DS_Store
This diff is collapsed.
Click to expand it.
translate2modelcif.py
+
143
−
22
View file @
38a40f13
...
...
@@ -16,6 +16,7 @@ import modelcif
import
modelcif.associated
import
modelcif.dumper
import
modelcif.model
import
modelcif.protocol
import
modelcif.reference
from
ost
import
io
...
...
@@ -218,6 +219,69 @@ def _get_audit_authors():
return
(
"
Foo B
"
,
"
Bar F
"
)
def
_get_protocol_steps_and_software
(
trg_ents
):
"""
Create the list of protocol steps with software and parameters used.
"""
protocol
=
[]
# modelling step
step
=
{
"
method_type
"
:
"
modeling
"
,
"
name
"
:
"
ma_protocol_step.step_name
"
,
"
details
"
:
"
ma_protocol_step.details
"
,
}
# get input data
# Must refer to data already in the JSON, so we try keywords
step
[
"
input
"
]
=
"
target_sequences
"
# get output data
# Must refer to existing data, so we try keywords
step
[
"
output
"
]
=
"
model
"
# get software
step
[
"
software
"
]
=
{
"
name
"
:
"
ColabFold
"
,
"
classification
"
:
"
model building
"
,
"
description
"
:
"
software.description
"
,
"
citation
"
:
{
"
pmid
"
:
None
,
"
title
"
:
"
ColabFold - Making protein folding accessible to all
"
,
"
journal
"
:
"
bioRxiv
"
,
"
volume
"
:
None
,
"
page_range
"
:
None
,
"
year
"
:
2022
,
"
authors
"
:
[
"
Mirdita M
"
,
"
Schütze K
"
,
"
Moriwaki Y
"
,
"
Heo L
"
,
"
Ovchinnikov S
"
,
"
Steinegger M
"
,
],
"
doi
"
:
"
10.1101/2021.08.15.456425
"
,
},
"
location
"
:
"
https://github.com/sokrypton/ColabFold
"
,
"
type
"
:
"
package
"
,
"
version
"
:
"
software.version
"
,
}
# get parameters
step
[
"
software_parameters
"
]
=
{}
protocol
.
append
(
step
)
# model selection step
# ToDo [input/ internal]: model selection step on a single model is a bit
# silly, how do we get a list of models?
step
=
{
"
method_type
"
:
"
model selection
"
,
"
name
"
:
"
ma_protocol_step.step_name
"
,
"
details
"
:
"
ma_protocol_step.details
"
,
}
step
[
"
input
"
]
=
"
model
"
step
[
"
output
"
]
=
"
model
"
step
[
"
software
"
]
=
{}
step
[
"
software_parameters
"
]
=
{}
protocol
.
append
(
step
)
return
protocol
def
_get_title
():
"""
Get a title for this modelling experiment.
"""
# ToDo [input]: Add title
...
...
@@ -379,28 +443,9 @@ def _get_scores(data, prfx):
data
.
update
(
scrs_json
)
def
_store_as_modelcif
(
interaction_name
,
data_json
,
ost_ent
,
file_prfx
):
"""
Mix all the data into a ModelCIF file.
"""
print
(
"
generating ModelCIF objects...
"
,
end
=
""
)
pstart
=
timer
()
# ToDo [internal]: Get protocol/ software
# ToDo [internal]: Get QA metrics
# create system to gather all the data
system
=
modelcif
.
System
(
title
=
data_json
[
"
title
"
],
id
=
interaction_name
.
upper
(),
model_details
=
data_json
[
"
model_details
"
],
)
# create target entities, references, source, asymmetric units & assembly
# for source we assume all chains come from the same taxon
source
=
ihm
.
source
.
Natural
(
ncbi_taxonomy_id
=
data_json
[
"
target_entities
"
][
0
][
"
up_ncbi_taxid
"
],
scientific_name
=
data_json
[
"
target_entities
"
][
0
][
"
up_organism
"
],
)
# create an asymmetric unit and an entity per target sequence
asym_units
=
{}
for
cif_ent
in
data_json
[
"
target_entities
"
]:
def
_get_modelcif_entities
(
target_ents
,
source
,
asym_units
,
system
):
"""
Create ModelCIF entities and asymmetric units.
"""
for
cif_ent
in
target_ents
:
# ToDo [input]: Get entity description
mdlcif_ent
=
modelcif
.
Entity
(
cif_ent
[
"
pdb_sequence
"
],
...
...
@@ -430,6 +475,31 @@ def _store_as_modelcif(interaction_name, data_json, ost_ent, file_prfx):
)
system
.
target_entities
.
append
(
mdlcif_ent
)
def
_store_as_modelcif
(
interaction_name
,
data_json
,
ost_ent
,
file_prfx
):
"""
Mix all the data into a ModelCIF file.
"""
print
(
"
generating ModelCIF objects...
"
,
end
=
""
)
pstart
=
timer
()
# ToDo [internal]: Get protocol/ software
# create system to gather all the data
system
=
modelcif
.
System
(
title
=
data_json
[
"
title
"
],
id
=
interaction_name
.
upper
(),
model_details
=
data_json
[
"
model_details
"
],
)
# create target entities, references, source, asymmetric units & assembly
# for source we assume all chains come from the same taxon
source
=
ihm
.
source
.
Natural
(
ncbi_taxonomy_id
=
data_json
[
"
target_entities
"
][
0
][
"
up_ncbi_taxid
"
],
scientific_name
=
data_json
[
"
target_entities
"
][
0
][
"
up_organism
"
],
)
# create an asymmetric unit and an entity per target sequence
asym_units
=
{}
_get_modelcif_entities
(
data_json
[
"
target_entities
"
],
source
,
asym_units
,
system
)
# ToDo [input]: Get Assembly name
assembly
=
modelcif
.
Assembly
(
asym_units
.
values
(),
name
=
"
ma_struct_assembly_details.assembly_name
"
...
...
@@ -460,6 +530,56 @@ def _store_as_modelcif(interaction_name, data_json, ost_ent, file_prfx):
)
system
.
model_groups
.
append
(
model_group
)
# Add protocol
protocol
=
modelcif
.
protocol
.
Protocol
()
for
js_step
in
data_json
[
"
protocol
"
]:
sftwre
=
None
# ToDo [input]: Turn into software group if parameters are available
# ToDo [input]: Get software.description
# ToDo [input]: Get software.version
if
js_step
[
"
software
"
]:
sftwre
=
modelcif
.
Software
(
js_step
[
"
software
"
][
"
name
"
],
js_step
[
"
software
"
][
"
classification
"
],
js_step
[
"
software
"
][
"
description
"
],
js_step
[
"
software
"
][
"
location
"
],
js_step
[
"
software
"
][
"
type
"
],
js_step
[
"
software
"
][
"
version
"
],
citation
=
ihm
.
Citation
(
pmid
=
js_step
[
"
software
"
][
"
citation
"
][
"
pmid
"
],
title
=
js_step
[
"
software
"
][
"
citation
"
][
"
title
"
],
journal
=
js_step
[
"
software
"
][
"
citation
"
][
"
journal
"
],
volume
=
js_step
[
"
software
"
][
"
citation
"
][
"
volume
"
],
page_range
=
js_step
[
"
software
"
][
"
citation
"
][
"
page_range
"
],
year
=
js_step
[
"
software
"
][
"
citation
"
][
"
year
"
],
authors
=
js_step
[
"
software
"
][
"
citation
"
][
"
authors
"
],
doi
=
js_step
[
"
software
"
][
"
citation
"
][
"
doi
"
],
),
)
if
js_step
[
"
input
"
]
==
"
target_sequences
"
:
input_data
=
modelcif
.
data
.
DataGroup
(
system
.
target_entities
)
elif
js_step
[
"
input
"
]
==
"
model
"
:
input_data
=
model
else
:
raise
RuntimeError
(
f
"
Unknown protocol input:
'
{
js_step
[
'
input
'
]
}
'"
)
if
js_step
[
"
output
"
]
==
"
model
"
:
output_data
=
model
else
:
raise
RuntimeError
(
f
"
Unknown protocol output:
'
{
js_step
[
'
output
'
]
}
'"
)
protocol
.
steps
.
append
(
modelcif
.
protocol
.
Step
(
input_data
=
input_data
,
output_data
=
output_data
,
name
=
js_step
[
"
name
"
],
details
=
js_step
[
"
details
"
],
software
=
sftwre
,
)
)
protocol
.
steps
[
-
1
].
method_type
=
js_step
[
"
method_type
"
]
system
.
protocols
.
append
(
protocol
)
# write modelcif System to file
print
(
"
write to disk...
"
,
end
=
""
,
flush
=
True
)
pstart
=
timer
()
...
...
@@ -484,6 +604,7 @@ def _create_model_json(data, pdb_file, up_acs):
"""
Create a dictionary (mimicking JSON) that contains all the data.
"""
data
[
"
target_entities
"
],
ost_ent
=
_get_entities
(
pdb_file
,
up_acs
)
data
[
"
protocol
"
]
=
_get_protocol_steps_and_software
(
data
[
"
target_entities
"
])
return
ost_ent
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment