Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Q
QMEAN
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Analyze
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
QMEAN
Commits
1f8bfed2
Commit
1f8bfed2
authored
3 years ago
by
B13nch3n
Browse files
Options
Downloads
Patches
Plain Diff
Update Docker entrypoint
parent
e8a47274
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
docker/docker-entrypoint.sh
+2
-6
2 additions, 6 deletions
docker/docker-entrypoint.sh
docker/run-qmeandisco.py
+0
-425
0 additions, 425 deletions
docker/run-qmeandisco.py
with
2 additions
and
431 deletions
docker/docker-entrypoint.sh
+
2
−
6
View file @
1f8bfed2
...
...
@@ -10,12 +10,8 @@ set -eo pipefail
if
[
$#
-eq
0
]
;
then
:
#set -- celery -A qm.clryschdlr worker --loglevel=info
else
if
[
$1
==
"qmeandisco"
]
;
then
set
--
/qmean/run_qmean.py
--method
QMEANDisCo
"
${
@
:2
}
"
elif
[
$1
==
"qmean"
]
;
then
set
--
/qmean/run_qmean.py
--method
QMEAN
"
${
@
:2
}
"
elif
[
$1
==
"qmeanbrane"
]
;
then
set
--
/qmean/run_qmean.py
--method
QMEANBrane
"
${
@
:2
}
"
if
[
$1
==
"run_qmean.py"
]
;
then
set
--
/qmean/run_qmean.py
"
${
@
:2
}
"
fi
fi
...
...
This diff is collapsed.
Click to expand it.
docker/run-qmeandisco.py
deleted
100755 → 0
+
0
−
425
View file @
e8a47274
#!/usr/local/bin/ost
"""
Run QMEAN DisCo for a mmCIF/ PDB file. Writes to the same location as where
the file comes from with
'
.json
'
as suffix.
"""
from
datetime
import
datetime
import
argparse
import
json
import
math
import
os
#import tempfile
from
qmean
import
QMEANScorer
from
qmean
import
predicted_sequence_features
import
ost
# from ost import conop
from
ost
import
(
LogLevel
,
LogSink
,
PopLogSink
,
PopVerbosityLevel
,
PushLogSink
,
PushVerbosityLevel
,
)
from
ost.io
import
LoadMMCIF
,
LoadPDB
,
LoadSequenceList
# from ost.mol.alg import Molck, MolckSettings
def
_ParseArgs
():
"""
Fetch arguments from the command line call.
"""
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
,
description
=
__doc__
,
)
parser
.
add_argument
(
"
model
"
,
help
=
"
Model file
"
)
parser
.
add_argument
(
"
-f
"
,
"
--format
"
,
help
=
"
Specify format of input file, mmCIF or PDB. Default is to
"
+
"
determine from file extension.
"
,
default
=
None
,
choices
=
[
"
mmcif
"
,
"
pdb
"
],
)
parser
.
add_argument
(
"
-o
"
,
"
--output
"
,
help
=
"
Path to write the JSON data to.
"
)
parser
.
add_argument
(
"
-s
"
,
"
--seqres
"
,
help
=
"
File with a list of sequences corresponding to the peptide
"
+
"
chains in the model.
"
,
)
parser
.
add_argument
(
"
-v
"
,
"
--verbose
"
,
dest
=
"
verbose_flag
"
,
default
=
0
,
help
=
"
Print more detailed output (0/1)
"
,
)
opts
=
parser
.
parse_args
()
return
opts
def
_TurnIntoQMEANServerJSON
(
# pylint: disable=bad-continuation
scores_json
,
ost_ent
,
seqres
,
file_nm
,
loaded_seqres
):
"""
Add
"
non score
"
data to the QMEAN JSON output.
"""
result_json
=
{
# "2019-03-07T15:33:12.932"
# 2020-03-06 15:53:48.925243
"
created
"
:
"
{:%Y-%m-%dT%H:%M:%S.%f}
"
.
format
(
datetime
.
now
()),
"
models
"
:
dict
(),
"
results_page
"
:
""
,
"
model_pdb
"
:
""
,
"
qmean_version
"
:
os
.
environ
.
get
(
"
VERSION_QMEAN
"
,
None
),
# "model_modifications": molck_json,
# Project naming may be a future feature
"
project_name
"
:
"
Untitled Project
"
,
# This needs to be fixed once we have the data in for doing DisCo
# sequence searches!
"
smtl_version
"
:
"
2020-06-24
"
,
"
method
"
:
"
QMEAN
"
,
"
seqres_uploaded
"
:
None
,
# Status may chainge once we have error handling
"
status
"
:
"
COMPLETED
"
,
}
# get per model scores
mdl_for_json
=
{
"
original_name
"
:
file_nm
,
# scores:
# Right now it seems to work that residue numbering from structures
# matches SEQRES. But in the past there has been user input where that
# was broken. So in case this happens again, here is an idea for a
# strategy:
# Create deep copy of entity, let the renumbered copy be equipped with
# bfactors, run along residues, fetch bfactor from renumbered entity
# and resnum/ chain name from deep copy to undo renumbering,
# scorer.AssignModelBFactors() seems important for that
"
scores
"
:
scores_json
,
"
chains
"
:
dict
(),
}
for
i
,
chn
in
enumerate
(
ost_ent
.
chains
):
seq
=
seqres
[
i
]
if
loaded_seqres
:
aln
=
predicted_sequence_features
.
AlignChainToSEQRES
(
chn
,
str
(
seq
))
aseq
=
aln
.
GetSequence
(
1
)
else
:
aseq
=
seq
mdl_for_json
[
"
chains
"
][
seq
.
name
]
=
{
"
seqres
"
:
str
(
seq
),
"
atomseq
"
:
str
(
aseq
),
"
name
"
:
"
seq_chain_%d
"
%
i
,
}
if
not
ost_ent
.
GetName
():
result_json
[
"
models
"
][
"
model_001
"
]
=
mdl_for_json
else
:
result_json
[
"
models
"
][
ost_ent
.
GetName
()]
=
mdl_for_json
return
result_json
def
_RenumberModel
(
model
,
seqres_list
):
"""
Make residue numbers correspond to SEQRES sequence.
We assume that the chains and the sequence list have the same order.
"""
edi
=
model
.
handle
.
EditXCS
()
for
chn
,
seqres
in
zip
(
model
.
chains
,
seqres_list
):
aln
=
predicted_sequence_features
.
AlignChainToSEQRES
(
chn
,
str
(
seqres
))
model_chain
=
model
.
CreateEmptyView
()
model_chain
.
AddChain
(
chn
,
ost
.
mol
.
INCLUDE_ALL
)
aln
.
AttachView
(
1
,
model_chain
)
for
i
in
range
(
aln
.
GetLength
()):
res
=
aln
.
GetResidue
(
1
,
i
)
if
res
.
IsValid
():
new_num
=
ost
.
mol
.
ResNum
(
i
+
1
)
edi
.
SetResidueNumber
(
res
.
handle
,
new_num
)
return
model
def
_AssembleResultJSON
(
scorer
,
seqres
):
"""
Given a QMEANScorer object, this function will extract global and local
scores and return them as a dictionary. You can access them by the keys
"
global_scores
"
and
"
local_scores
"
.
For
"
global_scores
"
you get another dictionary with keys:
*
"
qmean4_norm_score
"
*
"
qmean4_z_score
"
*
"
qmean6_norm_score
"
*
"
qmean6_z_score
"
*
"
interaction_norm_score
"
*
"
interaction_z_score
"
*
"
cbeta_norm_score
"
*
"
cbeta_z_score
"
*
"
packing_norm_score
"
*
"
packing_z_score
"
*
"
torsion_norm_score
"
*
"
torsion_z_score
"
*
"
ss_agreement_norm_score
"
*
"
ss_agreement_z_score
"
*
"
acc_agreement_norm_score
"
*
"
acc_agreement_z_score
"
*
"
avg_local_score
"
For
"
local_scores
"
you get another dictionary with chain names as keys
and lists with local scores as value. The local score lists have the
same length as the according SEQRES. Assuming that the residue numbers
in the model correspond to their location in the SEQRES, they will determine
the location of a per residue score in this result list. The underlying
assumption there is a SEQRES numbering scheme starting from one.
:param scorer: Scorer object from QMEAN from which to extract stuff.
:type scorer: :class:`QMEANScorer`
:param seqres: SEQRES sequences for every chain, this list must have the
exact same length as there are chains in the scored model.
Sequences must be sorted following the order of the chains.
:type seqres: :class:`list` of :class:`ost.seq.SequenceHandle`
"""
# QMEAN gives NaN if something is not defined. JSON prefers None
def
NaN_To_None
(
val
):
if
math
.
isnan
(
val
):
return
None
return
val
global_scores
=
dict
()
global_scores
[
"
qmean4_norm_score
"
]
=
NaN_To_None
(
scorer
.
qmean4_score
)
global_scores
[
"
qmean4_z_score
"
]
=
NaN_To_None
(
scorer
.
qmean4_z_score
)
try
:
global_scores
[
"
qmean6_norm_score
"
]
=
NaN_To_None
(
scorer
.
qmean6_score
)
global_scores
[
"
qmean6_z_score
"
]
=
NaN_To_None
(
scorer
.
qmean6_z_score
)
except
Exception
:
# pylint: disable=broad-except
global_scores
[
"
qmean6_norm_score
"
]
=
None
global_scores
[
"
qmean6_z_score
"
]
=
None
scores
=
scorer
.
qmean6_components
global_scores
[
"
interaction_norm_score
"
]
=
NaN_To_None
(
scores
[
"
interaction
"
])
global_scores
[
"
interaction_z_score
"
]
=
NaN_To_None
(
scores
[
"
interaction_z_score
"
]
)
global_scores
[
"
cbeta_norm_score
"
]
=
NaN_To_None
(
scores
[
"
cbeta
"
])
global_scores
[
"
cbeta_z_score
"
]
=
NaN_To_None
(
scores
[
"
cbeta_z_score
"
])
global_scores
[
"
packing_norm_score
"
]
=
NaN_To_None
(
scores
[
"
packing
"
])
global_scores
[
"
packing_z_score
"
]
=
NaN_To_None
(
scores
[
"
packing_z_score
"
])
global_scores
[
"
torsion_norm_score
"
]
=
NaN_To_None
(
scores
[
"
torsion
"
])
global_scores
[
"
torsion_z_score
"
]
=
NaN_To_None
(
scores
[
"
torsion_z_score
"
])
global_scores
[
"
ss_agreement_norm_score
"
]
=
NaN_To_None
(
scores
[
"
ss_agreement
"
]
)
global_scores
[
"
ss_agreement_z_score
"
]
=
NaN_To_None
(
scores
[
"
ss_agreement_z_score
"
]
)
global_scores
[
"
acc_agreement_norm_score
"
]
=
NaN_To_None
(
scores
[
"
acc_agreement
"
]
)
global_scores
[
"
acc_agreement_z_score
"
]
=
NaN_To_None
(
scores
[
"
acc_agreement_z_score
"
]
)
global_scores
[
"
avg_local_score
"
]
=
NaN_To_None
(
scorer
.
avg_local_score
)
global_scores
[
"
avg_local_score_error
"
]
=
NaN_To_None
(
scorer
.
avg_local_score_error
)
local_scores
=
dict
()
for
chn
,
seq
in
zip
(
scorer
.
model
.
chains
,
seqres
):
score_list
=
list
([
None
]
*
len
(
seq
))
per_chain_scores
=
scorer
.
local_scores
[
chn
.
GetName
()]
for
rnum
,
score
in
per_chain_scores
.
items
():
if
rnum
<
1
or
rnum
>
len
(
seq
):
raise
RuntimeError
(
"
Observed ResNum not matching provided SEQRES!
"
)
score_list
[
rnum
-
1
]
=
NaN_To_None
(
score
)
local_scores
[
chn
.
GetName
()]
=
score_list
result
=
dict
()
result
[
"
global_scores
"
]
=
global_scores
result
[
"
local_scores
"
]
=
local_scores
return
result
def
_GetOSTEntity
(
modelfile
,
load_seqres
,
force
):
"""
Read an OST entity either from mmCIF or PDB. Also fetch the sequence.
:param modelfile: The path to the file containing the structure.
:type modelfile: :class:`str`
:param force: Enforce a file format, takes
"
mmcif
"
or
"
pdb
"
.
:type force: :class:`str`
"""
# Determine file format. If not forced, look at suffix.
sformat
=
force
if
not
sformat
:
ext
=
modelfile
.
split
(
"
.
"
)
if
ext
[
-
1
]
==
"
gz
"
:
ext
=
ext
[:
-
1
]
if
len
(
ext
)
>
1
:
sformat
=
ext
[
-
1
].
lower
()
else
:
raise
RuntimeError
(
"
Could not determine format of file
'
%s
'
.
"
%
modelfile
)
# Load the structure
if
sformat
in
[
"
mmcif
"
,
"
cif
"
]:
sformat
=
"
mmcif
"
if
load_seqres
:
mdl_ent
,
mdl_seq
=
LoadMMCIF
(
modelfile
,
seqres
=
True
)
else
:
mdl_ent
=
LoadMMCIF
(
modelfile
,
seqres
=
False
)
mdl_seq
=
None
elif
sformat
in
[
"
pdb
"
]:
sformat
=
"
pdb
"
if
load_seqres
:
mdl_ent
,
mdl_seq
=
LoadPDB
(
modelfile
,
seqres
=
True
)
else
:
mdl_ent
=
LoadPDB
(
modelfile
,
seqres
=
False
)
mdl_seq
=
None
else
:
raise
RuntimeError
(
"
Unknow/ unsuppoted file extension found for file
'
%s
'"
%
modelfile
)
return
mdl_ent
.
Select
(
"
peptide=true
"
),
mdl_seq
,
sformat
class
_MolckToJsonLogger
(
LogSink
):
"""
Append Molck output to results object.
"""
def
__init__
(
self
):
LogSink
.
__init__
(
self
)
self
.
processed
=
_MolckToJsonLogger
.
SetupDict
()
def
__enter__
(
self
):
PushVerbosityLevel
(
LogLevel
.
Info
)
PushLogSink
(
self
)
return
self
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
PopVerbosityLevel
()
PopLogSink
()
def
LogMessage
(
self
,
message
,
severity
):
# pylint: disable=unused-argument
"""
Send a message to the logger.
"""
message
=
message
.
strip
()
if
message
.
endswith
(
"
is not a standard amino acid --> removed
"
):
res
=
message
.
split
()[
0
]
self
.
processed
[
"
removed_non_aa
"
].
append
(
res
)
else
:
raise
RuntimeError
(
"
Found unknown Molck output:
'
%s
'"
%
message
)
def
ToJson
(
self
):
"""
Return what the logger collected as JSON.
"""
return
self
.
processed
@staticmethod
def
SetupDict
():
"""
Create the empty JSON structure.
This is meant for the case the logger is not in use but you still want
the keys, etc. of the JSON output for the full QMEAN JSON format.
"""
return
{
"
removed_non_aa
"
:
[]}
'''
def _RunAccPred():
"""
Get solvent accesibility prediction.
"""
# get an alignment ready
aln_handle, aln_filename = tempfile.mkstemp(text=True,
dir=os.getenv(
'
TMP
'
, None))
def _PrepareSS_ACCPro(msa):
# write to alignment to tmp-file
os.write(aln_handle, (
"
%d
\n
"
% msa.GetCount()).encode())
for seq in msa.sequences:
os.write(aln_handle, (
"
%s
\n
"
% str(seq).replace(
'
-
'
,
'
.
'
)).encode())
os.close(aln_handle)
return aln_filename
'''
def
_main
():
"""
Run QMEAN DisCO
"""
opts
=
_ParseArgs
()
load_seqres
=
True
if
opts
.
seqres
:
load_seqres
=
False
mdl_ent
,
mdl_seq
,
fmt
=
_GetOSTEntity
(
opts
.
model
,
load_seqres
,
opts
.
format
)
# We need to be careful with Molck since it alters the input structure.
# There should not be a problem with proper mmCIF files but with PDB.
# rm_non_std - removes non-amino acid residues like HOH and ligands. If
# those residues appear within a peptide chain, OST gets
# problems dealing with the sequence corresponding to that
# chain. It is safe to remove water and ligands since QMEAN
# does not touch them.
# molck_out = _MolckToJsonLogger.SetupDict()
# if fmt == "pdb":
# with _MolckToJsonLogger() as molcklogger:
# Molck(
# mdl_ent,
# conop.GetDefaultLib(),
# MolckSettings(
# rm_unk_atoms=False,
# rm_non_std=True,
# rm_hyd_atoms=False,
# rm_oxt_atoms=False,
# rm_zero_occ_atoms=False,
# colored=False,
# map_nonstd_res=False,
# assign_elem=False,
# ),
# )
# molck_out = molcklogger.ToJson()
if
opts
.
seqres
:
mdl_seq
=
LoadSequenceList
(
opts
.
seqres
,
format
=
"
fasta
"
)
mdl_ent
=
_RenumberModel
(
mdl_ent
,
mdl_seq
)
#_RunAccPred()
qmean_scorer
=
QMEANScorer
(
mdl_ent
,
psipred
=
None
,
accpro
=
None
,
dc
=
None
,
use_nn
=
True
)
scores_json
=
_AssembleResultJSON
(
qmean_scorer
,
mdl_seq
)
result_json
=
_TurnIntoQMEANServerJSON
(
scores_json
,
mdl_ent
,
mdl_seq
,
os
.
path
.
basename
(
opts
.
model
),
not
load_seqres
,
)
if
not
opts
.
output
:
out_path
=
"
%s.json
"
%
os
.
path
.
splitext
(
opts
.
model
)[
0
]
else
:
out_path
=
opts
.
output
with
open
(
out_path
,
"
w
"
)
as
ofh
:
json
.
dump
(
result_json
,
ofh
,
indent
=
2
,
sort_keys
=
True
)
if
__name__
==
"
__main__
"
:
_main
()
# LocalWords: OST mmCIF PDB param Molck HOH ligands QMEAN SEQRES
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment