Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
openstructure
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
openstructure
Commits
1dfd44ba
Commit
1dfd44ba
authored
5 years ago
by
Studer Gabriel
Browse files
Options
Downloads
Patches
Plain Diff
updated CAD score binding - format of result has changed, check doc for further details
parent
ed4aa651
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
CHANGELOG.txt
+2
-0
2 additions, 0 deletions
CHANGELOG.txt
modules/bindings/pymod/cadscore.py
+214
-47
214 additions, 47 deletions
modules/bindings/pymod/cadscore.py
with
216 additions
and
47 deletions
CHANGELOG.txt
+
2
−
0
View file @
1dfd44ba
...
@@ -4,6 +4,8 @@ Changes in Release 1.x.x
...
@@ -4,6 +4,8 @@ Changes in Release 1.x.x
* Use system provided SQLite3 library
* Use system provided SQLite3 library
* C++ wrapper around C++ implementation of TMalign
* C++ wrapper around C++ implementation of TMalign
* Database for efficient in-memory lookup of coordinates and sequences
* Database for efficient in-memory lookup of coordinates and sequences
* Updated CAD score binding with change in result format, check documentation
for further details
Changes in Release 1.9.0
Changes in Release 1.9.0
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
...
...
This diff is collapsed.
Click to expand it.
modules/bindings/pymod/cadscore.py
+
214
−
47
View file @
1dfd44ba
...
@@ -28,8 +28,8 @@ Proteins. 2012 Aug 30. [Epub ahead of print]
...
@@ -28,8 +28,8 @@ Proteins. 2012 Aug 30. [Epub ahead of print]
Authors: Valerio Mariani, Alessandro Barbato
Authors: Valerio Mariani, Alessandro Barbato
"""
"""
import
subprocess
,
os
,
tempfile
,
platform
import
subprocess
,
os
,
tempfile
,
platform
,
re
from
ost
import
settings
,
io
from
ost
import
settings
,
io
,
mol
def
_SetupFiles
(
model
,
reference
):
def
_SetupFiles
(
model
,
reference
):
# create temporary directory
# create temporary directory
...
@@ -37,7 +37,8 @@ def _SetupFiles(model,reference):
...
@@ -37,7 +37,8 @@ def _SetupFiles(model,reference):
dia
=
'
PDB
'
dia
=
'
PDB
'
for
chain
in
model
.
chains
:
for
chain
in
model
.
chains
:
if
chain
.
name
==
"
"
:
if
chain
.
name
==
"
"
:
raise
RuntimeError
(
"
One of the chains in the model has no name. Cannot calculate CAD score
"
)
raise
RuntimeError
(
"
One of the chains in the model has no name. Cannot
"
"
calculate CAD score
"
)
if
len
(
chain
.
name
)
>
1
:
if
len
(
chain
.
name
)
>
1
:
dia
=
'
CHARMM
'
dia
=
'
CHARMM
'
break
;
break
;
...
@@ -49,7 +50,8 @@ def _SetupFiles(model,reference):
...
@@ -49,7 +50,8 @@ def _SetupFiles(model,reference):
dia
=
'
PDB
'
dia
=
'
PDB
'
for
chain
in
reference
.
chains
:
for
chain
in
reference
.
chains
:
if
chain
.
name
==
"
"
:
if
chain
.
name
==
"
"
:
raise
RuntimeError
(
"
One of the chains in the reference has no name. Cannot calculate CAD score
"
)
raise
RuntimeError
(
"
One of the chains in the reference has no name.
"
"
Cannot calculate CAD score
"
)
if
len
(
chain
.
name
)
>
1
:
if
len
(
chain
.
name
)
>
1
:
dia
=
'
CHARMM
'
dia
=
'
CHARMM
'
break
;
break
;
...
@@ -64,7 +66,6 @@ def _CleanupFiles(dir_name):
...
@@ -64,7 +66,6 @@ def _CleanupFiles(dir_name):
import
shutil
import
shutil
shutil
.
rmtree
(
dir_name
)
shutil
.
rmtree
(
dir_name
)
class
CADResult
:
class
CADResult
:
"""
"""
Holds the result of running CAD
Holds the result of running CAD
...
@@ -77,78 +78,244 @@ class CADResult:
...
@@ -77,78 +78,244 @@ class CADResult:
Dictionary containing local CAD
'
s atom-atom (AA) scores.
Dictionary containing local CAD
'
s atom-atom (AA) scores.
:type: dictionary (key: chain.resnum (e.g.: A.24), value: CAD local AA score (see CAD Documentation online)
:type: dictionary (key: tuple(chain, resnum) (e.g.:
(
"
A
"
, ost.mol.ResNum(24)), value: CAD local AA score
(see CAD Documentation online)
"""
"""
def
__init__
(
self
,
globalAA
,
localAA
):
def
__init__
(
self
,
globalAA
,
localAA
):
self
.
globalAA
=
globalAA
self
.
globalAA
=
globalAA
self
.
localAA
=
localAA
self
.
localAA
=
localAA
def
_ParseCADGlobal
(
lines
):
def
_ParseCADGlobal
(
lines
):
interesting_lines
=
lines
[
1
]
header
=
lines
[
0
].
split
()
aa
=
float
(
interesting_lines
.
split
()[
10
])
aa_idx
=
header
.
index
(
"
AA
"
)
return
aa
aa_score
=
float
(
lines
[
1
].
split
()[
aa_idx
])
return
aa_score
def
_ParseCADLocal
(
lines
):
def
_ParseCADLocal
(
lines
):
local_scores_idx
=
None
for
line_idx
in
range
(
len
(
lines
)):
if
"
local_scores
"
in
lines
[
line_idx
]:
local_scores_idx
=
line_idx
break
if
local_scores_idx
==
None
:
raise
RuntimeError
(
"
Failed to parse local cadscores
"
)
local_aa_dict
=
{}
for
line_idx
in
range
(
local_scores_idx
+
2
,
len
(
lines
)):
items
=
lines
[
line_idx
].
split
()
local_aa
=
float
(
items
[
2
])
if
local_aa
<
0.0
:
continue
# invalid CAD score
key
=
(
items
[
0
],
mol
.
ResNum
(
int
(
items
[
1
])))
local_aa_dict
[
key
]
=
local_aa
return
local_aa_dict
def
_ParseVoronotaGlobal
(
lines
):
return
float
(
lines
[
0
].
split
()[
4
])
def
_ParseVoronotaLocal
(
lines
):
local_aa_dict
=
{}
local_aa_dict
=
{}
for
lin
in
lines
[
11
:]:
chain_name_regex
=
r
'
c\<\D+\>
'
items
=
lin
.
split
()
resnum_regex
=
r
'
r\<\d+\>
'
chain
=
items
[
0
]
insertion_code_regex
=
r
'
i\<\D\>
'
resnum
=
int
(
items
[
1
])
for
line
in
lines
:
key
=
chain
+
'
.
'
+
str
(
resnum
)
local_aa
=
float
(
line
.
split
()[
-
1
])
aa
=
float
(
items
[
2
])
if
local_aa
<
0.0
:
local_aa_dict
[
key
]
=
aa
continue
# invalid CAD score
chain_data
=
re
.
findall
(
chain_name_regex
,
line
)
resnum_data
=
re
.
findall
(
resnum_regex
,
line
)
insertion_code_data
=
re
.
findall
(
insertion_code_regex
,
line
)
resnum
=
None
if
len
(
insertion_code_data
)
==
0
:
resnum
=
mol
.
ResNum
(
int
(
resnum_data
[
0
][
1
:].
strip
(
'
><
'
)))
else
:
resnum
=
mol
.
ResNum
(
int
(
resnum_data
[
0
][
1
:].
strip
(
'
><
'
)),
insertion_code_data
[
0
][
1
:].
strip
(
'
><
'
))
key
=
(
chain_data
[
0
][
1
:].
strip
(
'
><
'
),
resnum
)
local_aa_dict
[
key
]
=
local_aa
return
local_aa_dict
return
local_aa_dict
def
_RunCAD
(
max_iter
,
tmp_dir
):
def
_RunCAD
(
tmp_dir
,
mode
,
cad_bin_path
,
old_regime
):
model_filename
=
os
.
path
.
join
(
tmp_dir
,
'
model.pdb
'
)
model_filename
=
os
.
path
.
join
(
tmp_dir
,
'
model.pdb
'
)
reference_filename
=
os
.
path
.
join
(
tmp_dir
,
'
reference.pdb
'
)
reference_filename
=
os
.
path
.
join
(
tmp_dir
,
'
reference.pdb
'
)
globalAA
=
None
localAA
=
None
if
platform
.
system
()
==
"
Windows
"
:
if
platform
.
system
()
==
"
Windows
"
:
raise
RuntimeError
(
'
CAD score not available on Windows
'
)
raise
RuntimeError
(
'
CAD score not available on Windows
'
)
if
mode
==
"
classic
"
:
cad_calc_path
=
None
cad_read_g_path
=
None
cad_read_l_path
=
None
if
cad_bin_path
:
cad_calc_path
=
settings
.
Locate
(
'
CADscore_calc.bash
'
,
search_paths
=
[
cad_bin_path
],
search_system_paths
=
False
)
cad_read_g_path
=
settings
.
Locate
(
'
CADscore_read_global_scores.bash
'
,
search_paths
=
[
cad_bin_path
],
search_system_paths
=
False
)
cad_read_l_path
=
settings
.
Locate
(
'
CADscore_read_local_scores.bash
'
,
search_paths
=
[
cad_bin_path
],
search_system_paths
=
False
)
# also try to locate the actual executable that is called from the
# bash scripts
executable_path
=
settings
.
Locate
(
'
voroprot2
'
,
search_paths
=
[
cad_bin_path
],
search_system_paths
=
False
)
else
:
cad_calc_path
=
settings
.
Locate
(
'
CADscore_calc.bash
'
)
cad_read_g_path
=
settings
.
Locate
(
'
CADscore_read_global_scores.bash
'
)
cad_read_l_path
=
settings
.
Locate
(
'
CADscore_read_local_scores.bash
'
)
# also try to locate the actual executable that is called from the
# bash scripts
executable_path
=
settings
.
Locate
(
'
voroprot2
'
)
command1
=
"
\"
%s
\"
-m
\"
%s
\"
-t
\"
%s
\"
-D
\"
%s
\"
"
%
(
cad_calc_path
,
model_filename
,
reference_filename
,
os
.
path
.
join
(
tmp_dir
,
"
cadtemp
"
))
command2
=
"
\"
%s
\"
-D
\"
%s
\"
"
%
(
cad_read_g_path
,
os
.
path
.
join
(
tmp_dir
,
"
cadtemp
"
))
command3
=
"
\"
%s
\"
-m
\"
%s
\"
-t
\"
%s
\"
-D
\"
%s
\"
-c AA
"
%
(
cad_read_l_path
,
model_filename
,
reference_filename
,
os
.
path
.
join
(
tmp_dir
,
"
cadtemp
"
))
ps1
=
subprocess
.
Popen
(
command1
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
)
ps1
.
wait
()
ps2
=
subprocess
.
Popen
(
command2
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
)
ps2
.
wait
()
lines
=
ps2
.
stdout
.
readlines
()
try
:
globalAA
=
_ParseCADGlobal
(
lines
)
except
:
raise
RuntimeError
(
"
CAD calculation failed
"
)
ps3
=
subprocess
.
Popen
(
command3
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
)
ps3
.
wait
()
lines
=
ps3
.
stdout
.
readlines
()
try
:
localAA
=
_ParseCADLocal
(
lines
)
except
:
raise
RuntimeError
(
"
CAD calculation failed
"
)
elif
mode
==
"
voronota
"
:
local_score_filename
=
os
.
path
.
join
(
tmp_dir
,
"
local_scores.txt
"
)
voronota_cadscore_path
=
None
if
cad_bin_path
:
voronota_cadscore_path
=
settings
.
Locate
(
"
voronota-cadscore
"
,
search_paths
=
[
cad_bin_path
],
search_system_paths
=
False
)
# also try to locate the actual executable that is called from the
# bash script
executable_path
=
settings
.
Locate
(
"
voronota
"
,
search_paths
=
[
cad_bin_path
],
search_system_paths
=
False
)
else
:
voronota_cadscore_path
=
settings
.
Locate
(
"
voronota-cadscore
"
)
# also try to locate the actual executable that is called from the
# bash script
executable_path
=
settings
.
Locate
(
"
voronota
"
)
cmd
=
[
voronota_cadscore_path
,
'
-m
'
,
model_filename
,
'
-t
'
,
reference_filename
,
'
--contacts-query-by-code AA
'
,
'
--output-residue-scores
'
,
local_score_filename
]
if
old_regime
:
cmd
.
append
(
"
--old-regime
"
)
cmd
=
'
'
.
join
(
cmd
)
ps
=
subprocess
.
Popen
(
cmd
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
)
ps
.
wait
()
try
:
globalAA
=
_ParseVoronotaGlobal
(
ps
.
stdout
.
readlines
())
except
:
raise
RuntimeError
(
"
CAD calculation failed
"
)
try
:
localAA
=
_ParseVoronotaLocal
(
open
(
local_score_filename
).
readlines
())
except
:
raise
RuntimeError
(
"
CAD calculation failed
"
)
else
:
else
:
cad_calc_path
=
settings
.
Locate
(
'
CADscore_calc.bash
'
)
raise
RuntimeError
(
"
Invalid CAD mode! Allowed are:
"
cad_read_g_path
=
settings
.
Locate
(
'
CADscore_read_global_scores.bash
'
)
"
[
\"
classic
\"
,
\"
voronota
\"
]
"
)
cad_read_l_path
=
settings
.
Locate
(
'
CADscore_read_local_scores.bash
'
)
command1
=
"
\"
%s
\"
-o %i -m
\"
%s
\"
-t
\"
%s
\"
-D
\"
%s
\"
"
%
(
cad_calc_path
,
max_iter
,
model_filename
,
reference_filename
,
os
.
path
.
join
(
tmp_dir
,
"
cadtemp
"
))
command2
=
"
\"
%s
\"
-D
\"
%s
\"
"
%
(
cad_read_g_path
,
os
.
path
.
join
(
tmp_dir
,
"
cadtemp
"
))
command3
=
"
\"
%s
\"
-m
\"
%s
\"
-t
\"
%s
\"
-D
\"
%s
\"
-c AA
"
%
(
cad_read_l_path
,
model_filename
,
reference_filename
,
os
.
path
.
join
(
tmp_dir
,
"
cadtemp
"
))
ps1
=
subprocess
.
Popen
(
command1
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
)
ps1
.
wait
()
ps2
=
subprocess
.
Popen
(
command2
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
)
ps2
.
wait
()
lines
=
ps2
.
stdout
.
readlines
()
try
:
globalAA
=
_ParseCADGlobal
(
lines
)
except
:
raise
RuntimeError
(
"
CAD calculation failed
"
)
ps3
=
subprocess
.
Popen
(
command3
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
)
ps3
.
wait
()
lines
=
ps3
.
stdout
.
readlines
()
try
:
localAA
=
_ParseCADLocal
(
lines
)
except
:
raise
RuntimeError
(
"
CAD calculation failed
"
)
return
CADResult
(
globalAA
,
localAA
)
return
CADResult
(
globalAA
,
localAA
)
def
_HasInsertionCodes
(
model
,
reference
):
for
r
in
model
.
residues
:
if
r
.
GetNumber
().
GetInsCode
()
!=
"
\0
"
:
print
r
return
True
for
r
in
reference
.
residues
:
if
r
.
GetNumber
().
GetInsCode
()
!=
"
\0
"
:
print
r
return
True
return
False
def
CADScore
(
model
,
reference
,
max_iter
=
300
):
def
_MapLabels
(
model
,
cad_results
,
label
):
for
k
,
v
in
cad_results
.
localAA
.
iteritems
():
r
=
model
.
FindResidue
(
k
[
0
],
k
[
1
])
if
not
r
.
IsValid
():
raise
RuntimeError
(
"
Failed to map cadscore on residues:
"
+
"
CAD score estimated for residue in chain
\"
"
+
k
[
0
]
+
"
\"
with ResNum
"
+
str
(
k
[
1
])
+
"
. Residue
"
+
"
could not be found in model.
"
)
r
.
SetFloatProp
(
label
,
v
)
def
CADScore
(
model
,
reference
,
mode
=
"
classic
"
,
label
=
"
localcad
"
,
old_regime
=
False
,
cad_bin_path
=
None
):
"""
"""
Calculates global and local atom-atom (AA) CAD Scores
Calculates global and local atom-atom (AA) CAD Scores.
You can either access the original implementation available from
https://bitbucket.org/kliment/cadscore/downloads/
or the new implementation which is part of the Voronota package
available from https://bitbucket.org/kliment/voronota/downloads/.
The scores of the two implementations differ but strongly correlate
as the contacts between atoms are estimated differently. When using
the
"
voronota
"
*mode* you can minimize those discrepancies by
setting the *old_regime* flag to True.
Furthermore, the
"
voronota
"
*mode* generates per-residue scores that
are inverted when compared to the classical implementation
(0.0: bad, 1.0 good).
:param model: The model structure.
:param model: The model structure.
:type model: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
:type model: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
:param reference: The reference structure
:param reference: The reference structure
:type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
:type reference: :class:`~ost.mol.EntityView` or
:param max_iter: Optional. The maximum number of iteration for the tessellation algorithm before giving up. By default 300
:class:`~ost.mol.EntityHandle`
:param mode: What CAD score implementation to use, must be one in
[
"
classic
"
,
"
voronota
"
]
:param label: Local CAD scores will be mapped on residues of model as
float property with this name
:type label: :class:`str`
:param old_regime: Only has an effect if *mode* is
"
voronota
"
. If set to true,
the discrepancies between the two modes is minimized but
the behaviour of inverted scores persists.
:type old_regime: :class:`bool`
:param cad_bin_path: Path to search for the required executables
([
"
CADscore_calc.bash
"
,
"
CADscore_read_global_scores.bash
"
,
"
CADscore_read_local_scores.bash
"
] for
"
classic
"
*mode*
or [
"
voronota-cadscore
"
] for
"
voronota
"
*mode*). If not
set, the env path is searched.
:type cad_bin_path: :class:`str`
:returns: The result of the CAD score calculation
:returns: The result of the CAD score calculation
:rtype: :class:`CADResult`
:rtype: :class:`CADResult`
:raises: :class:`~ost.settings.FileNotFound` if any of the CAD score exacutables could not be located.
:raises: :class:`~ost.settings.FileNotFound` if any of the CAD score
executables could not be located.
:raises: :class:`RuntimeError` if the calculation failed
:raises: :class:`RuntimeError` if the calculation failed
"""
"""
if
mode
==
"
classic
"
and
_HasInsertionCodes
(
model
,
reference
):
raise
RuntimeError
(
"
The classic CAD score implementation does not support
"
"
insertion codes in residues
"
)
tmp_dir_name
=
_SetupFiles
(
model
,
reference
)
tmp_dir_name
=
_SetupFiles
(
model
,
reference
)
result
=
_RunCAD
(
max_iter
,
tmp_dir_name
)
result
=
_RunCAD
(
tmp_dir_na
me
,
mode
,
cad_bin_path
,
old_regi
me
)
_CleanupFiles
(
tmp_dir_name
)
_CleanupFiles
(
tmp_dir_name
)
_MapLabels
(
model
,
result
,
label
)
return
result
return
result
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment