Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
ma-wilkins-import
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Analyze
Contributor analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
ma-wilkins-import
Commits
68ad8fb6
Commit
68ad8fb6
authored
2 years ago
by
Bienchen
Browse files
Options
Downloads
Patches
Plain Diff
Add selection step
parent
6b9c2544
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
translate2modelcif.py
+53
-63
53 additions, 63 deletions
translate2modelcif.py
with
53 additions
and
63 deletions
translate2modelcif.py
+
53
−
63
View file @
68ad8fb6
...
...
@@ -249,7 +249,6 @@ def _check_model_extra_files_present(model_dir, pdb_file):
def
_get_audit_authors
():
"""
Return the list of authors that produced this model.
"""
# ToDo: tell Xabi that his name can't have a á in mmCIF
return
(
"
Bartolec, T.
"
,
"
Vazquez-Campos, X.
"
,
...
...
@@ -289,7 +288,8 @@ def _parse_colabfold_config(cnfg_file):
use_msa
=
False
elif
cf_config
[
"
msa_mode
"
]
==
"
custom
"
:
print
(
"
WARNING: Custom MSA mode used. Not clear from config what to do here!
"
"
WARNING: Custom MSA mode used. Not clear from config what to do
"
+
"
here!
"
)
seq_dbs
=
[]
use_mmseqs
=
False
...
...
@@ -311,63 +311,71 @@ def _parse_colabfold_config(cnfg_file):
else
:
raise
ValueError
(
f
"
Unknown model_type
{
cf_config
[
'
model_type
'
]
}
"
)
# write description
description
=
f
"
Model generated using ColabFold v
{
cf_config
[
'
version
'
]
}
"
# write
modeling
description
mdl_
description
=
f
"
Model generated using ColabFold v
{
cf_config
[
'
version
'
]
}
"
if
use_multimer
:
description
+=
f
"
with AlphaFold-Multimer (v
{
multimer_version
}
)
"
mdl_
description
+=
f
"
with AlphaFold-Multimer (v
{
multimer_version
}
)
"
else
:
description
+=
"
with AlphaFold
"
mdl_
description
+=
"
with AlphaFold
"
if
cf_config
[
"
stop_at_score
"
]
<
100
:
# early stopping feature of ColabFold
upto
=
"
up to
"
else
:
upto
=
""
description
+=
(
mdl_
description
+=
(
f
"
producing
{
upto
}{
cf_config
[
'
num_models
'
]
}
models
"
f
"
with
{
upto
}{
cf_config
[
'
num_recycles
'
]
}
recycles each
"
)
if
cf_config
[
"
use_amber
"
]:
description
+=
"
, with AMBER relaxation
"
mdl_
description
+=
"
, with AMBER relaxation
"
else
:
description
+=
"
, without model relaxation
"
mdl_
description
+=
"
, without model relaxation
"
if
cf_config
[
"
use_templates
"
]:
print
(
"
WARNING: ColabFold may use PDB70 or custom templates.
"
"
Not clear from config!
"
)
description
+=
"
, using templates
"
mdl_
description
+=
"
, using templates
"
else
:
description
+=
"
, without templates
"
mdl_
description
+=
"
, without templates
"
if
cf_config
[
"
rank_by
"
]
==
"
plddt
"
:
description
+=
"
, ranked by pLDDT
"
mdl_
description
+=
"
, ranked by pLDDT
"
elif
cf_config
[
"
rank_by
"
]
==
"
ptmscore
"
:
description
+=
"
, ranked by pTM
"
mdl_
description
+=
"
, ranked by pTM
"
elif
cf_config
[
"
rank_by
"
]
==
"
multimer
"
:
description
+=
"
, ranked by ipTM*0.8+pTM*0.2
"
mdl_
description
+=
"
, ranked by ipTM*0.8+pTM*0.2
"
else
:
raise
ValueError
(
f
"
Unknown rank_by
{
cf_config
[
'
rank_by
'
]
}
"
)
if
use_msa
:
description
+=
"
, starting from
"
mdl_
description
+=
"
, starting from
"
if
use_mmseqs
:
msa_type
=
"
MSA
"
else
:
msa_type
=
"
custom MSA
"
if
use_multimer
:
if
cf_config
[
"
pair_mode
"
]
==
"
unpaired+paired
"
:
description
+=
f
"
paired and unpaired
{
msa_type
}
s
"
mdl_
description
+=
f
"
paired and unpaired
{
msa_type
}
s
"
elif
cf_config
[
"
pair_mode
"
]
==
"
paired
"
:
description
+=
f
"
paired
{
msa_type
}
s
"
mdl_
description
+=
f
"
paired
{
msa_type
}
s
"
elif
cf_config
[
"
pair_mode
"
]
==
"
unpaired
"
:
description
+=
f
"
unpaired
{
msa_type
}
s
"
mdl_
description
+=
f
"
unpaired
{
msa_type
}
s
"
else
:
raise
ValueError
(
f
"
Unknown pair_mode
{
cf_config
[
'
pair_mode
'
]
}
"
)
else
:
description
+=
f
"
an
{
msa_type
}
"
mdl_
description
+=
f
"
an
{
msa_type
}
"
if
use_mmseqs
:
description
+=
f
"
from MMseqs2 (
{
'
+
'
.
join
(
seq_dbs
)
}
)
"
mdl_
description
+=
f
"
from MMseqs2 (
{
'
+
'
.
join
(
seq_dbs
)
}
)
"
else
:
description
+=
"
without an MSA
"
description
+=
"
.
"
mdl_description
+=
"
without an MSA
"
mdl_description
+=
"
.
"
# write selection description
slct_description
=
(
"
Select best model, which is either the top-ranked model as
"
+
"
determined by the ColabFold pipeline (ipTM*0.8+pTM*0.2), or else
"
+
"
the model with best congruence with crosslinks reported in the
"
+
"
related study.
"
)
return
{
"
config
"
:
cf_config
,
...
...
@@ -376,7 +384,8 @@ def _parse_colabfold_config(cnfg_file):
"
use_msa
"
:
use_msa
,
"
use_multimer
"
:
use_multimer
,
"
multimer_version
"
:
multimer_version
,
"
description
"
:
description
,
"
modeling_description
"
:
mdl_description
,
"
selection_description
"
:
slct_description
,
}
...
...
@@ -388,7 +397,7 @@ def _get_protocol_steps_and_software(config_data):
step
=
{
"
method_type
"
:
"
modeling
"
,
"
name
"
:
None
,
"
details
"
:
config_data
[
"
description
"
],
"
details
"
:
config_data
[
"
modeling_
description
"
],
}
# get input data
# Must refer to data already in the JSON, so we try keywords
...
...
@@ -415,21 +424,7 @@ def _get_protocol_steps_and_software(config_data):
"
classification
"
:
"
data collection
"
,
"
description
"
:
"
Many-against-Many sequence searching
"
,
# ToDo: add citation to ihm.citations
"
citation
"
:
ihm
.
Citation
(
pmid
=
"
30615063
"
,
title
=
"
MMseqs2 desktop and local web server app for fast,
"
+
"
interactive sequence searches.
"
,
journal
=
"
Bioinformatics
"
,
volume
=
35
,
page_range
=
(
2856
,
2858
),
year
=
2019
,
authors
=
[
"
Mirdita, M.
"
,
"
Steinegger, M.
"
,
"
Soeding, J.
"
,
],
doi
=
"
10.1093/bioinformatics/bty1057
"
,
),
"
citation
"
:
ihm
.
citations
.
mmseqs2
,
"
location
"
:
"
https://github.com/soedinglab/mmseqs2
"
,
"
type
"
:
"
package
"
,
"
version
"
:
None
,
...
...
@@ -496,29 +491,22 @@ def _get_protocol_steps_and_software(config_data):
protocol
.
append
(
step
)
# model selection step
# ToDo [input/ internal]: model selection step on a single model is a bit
# silly, how do we get a list of models?
# GT-NOTES:
# - input/output should be ok without list of models
# - rank of model is already stored in _ma_model_list.model_name and
# _ma_data.name (in _store_as_modelcif)
# - ColabFold ranking details is already in details of step above.
# - Suggestion: add extra step only if AF-ranking was overruled and
# include it in step above.
# step = {
# "method_type": "model selection",
# "name": "ma_protocol_step.step_name",
# "details": "Select best model, which is either the top-ranked model "
# + "as determined by the ColabFold pipeline "
# + "(ipTM*0.8+pTM*0.2), or else the model with best "
# + "congruence with crosslinks reported in the related study.",
# }
# step["input"] = "model"
# step["output"] = "model"
# step["software"] = []
# step["software_parameters"] = {}
# protocol.append(step)
if
(
"
selection_description
"
not
in
config_data
or
len
(
config_data
[
"
selection_description
"
])
==
0
):
return
protocol
step
=
{
"
method_type
"
:
"
model selection
"
,
"
name
"
:
None
,
"
details
"
:
config_data
[
"
selection_description
"
],
}
step
[
"
input
"
]
=
"
model
"
step
[
"
output
"
]
=
"
model
"
step
[
"
software
"
]
=
[]
step
[
"
software_parameters
"
]
=
{}
protocol
.
append
(
step
)
return
protocol
...
...
@@ -584,7 +572,9 @@ def _fetch_upkb_entry(up_ac):
data
[
"
up_organism
"
]
=
""
data
[
"
up_sequence
"
]
=
""
data
[
"
up_ac
"
]
=
up_ac
rspns
=
requests
.
get
(
f
"
https://www.uniprot.org/uniprot/
{
up_ac
}
.txt
"
)
rspns
=
requests
.
get
(
f
"
https://www.uniprot.org/uniprot/
{
up_ac
}
.txt
"
,
timeout
=
180
)
for
line
in
rspns
.
iter_lines
(
decode_unicode
=
True
):
if
line
.
startswith
(
"
ID
"
):
sline
=
line
.
split
()
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment