Skip to content
Snippets Groups Projects
Commit 1dfd44ba authored by Studer Gabriel's avatar Studer Gabriel
Browse files

updated CAD score binding - format of result has changed, check doc for further details

parent ed4aa651
No related branches found
No related tags found
No related merge requests found
...@@ -4,6 +4,8 @@ Changes in Release 1.x.x ...@@ -4,6 +4,8 @@ Changes in Release 1.x.x
* Use system provided SQLite3 library * Use system provided SQLite3 library
* C++ wrapper around C++ implementation of TMalign * C++ wrapper around C++ implementation of TMalign
* Database for efficient in-memory lookup of coordinates and sequences * Database for efficient in-memory lookup of coordinates and sequences
* Updated CAD score binding with change in result format, check documentation
for further details
Changes in Release 1.9.0 Changes in Release 1.9.0
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
......
...@@ -28,8 +28,8 @@ Proteins. 2012 Aug 30. [Epub ahead of print] ...@@ -28,8 +28,8 @@ Proteins. 2012 Aug 30. [Epub ahead of print]
Authors: Valerio Mariani, Alessandro Barbato Authors: Valerio Mariani, Alessandro Barbato
""" """
import subprocess, os, tempfile, platform import subprocess, os, tempfile, platform, re
from ost import settings, io from ost import settings, io, mol
def _SetupFiles(model,reference): def _SetupFiles(model,reference):
# create temporary directory # create temporary directory
...@@ -37,7 +37,8 @@ def _SetupFiles(model,reference): ...@@ -37,7 +37,8 @@ def _SetupFiles(model,reference):
dia = 'PDB' dia = 'PDB'
for chain in model.chains: for chain in model.chains:
if chain.name==" ": if chain.name==" ":
raise RuntimeError("One of the chains in the model has no name. Cannot calculate CAD score") raise RuntimeError("One of the chains in the model has no name. Cannot "
"calculate CAD score")
if len(chain.name) > 1: if len(chain.name) > 1:
dia = 'CHARMM' dia = 'CHARMM'
break; break;
...@@ -49,7 +50,8 @@ def _SetupFiles(model,reference): ...@@ -49,7 +50,8 @@ def _SetupFiles(model,reference):
dia = 'PDB' dia = 'PDB'
for chain in reference.chains: for chain in reference.chains:
if chain.name==" ": if chain.name==" ":
raise RuntimeError("One of the chains in the reference has no name. Cannot calculate CAD score") raise RuntimeError("One of the chains in the reference has no name. "
"Cannot calculate CAD score")
if len(chain.name) > 1: if len(chain.name) > 1:
dia = 'CHARMM' dia = 'CHARMM'
break; break;
...@@ -64,7 +66,6 @@ def _CleanupFiles(dir_name): ...@@ -64,7 +66,6 @@ def _CleanupFiles(dir_name):
import shutil import shutil
shutil.rmtree(dir_name) shutil.rmtree(dir_name)
class CADResult: class CADResult:
""" """
Holds the result of running CAD Holds the result of running CAD
...@@ -77,78 +78,244 @@ class CADResult: ...@@ -77,78 +78,244 @@ class CADResult:
Dictionary containing local CAD's atom-atom (AA) scores. Dictionary containing local CAD's atom-atom (AA) scores.
:type: dictionary (key: chain.resnum (e.g.: A.24), value: CAD local AA score (see CAD Documentation online) :type: dictionary (key: tuple(chain, resnum) (e.g.:
("A", ost.mol.ResNum(24)), value: CAD local AA score
(see CAD Documentation online)
""" """
def __init__(self, globalAA, localAA): def __init__(self, globalAA, localAA):
self.globalAA=globalAA self.globalAA=globalAA
self.localAA=localAA self.localAA=localAA
def _ParseCADGlobal(lines): def _ParseCADGlobal(lines):
interesting_lines=lines[1] header = lines[0].split()
aa=float(interesting_lines.split()[10]) aa_idx = header.index("AA")
return aa aa_score=float(lines[1].split()[aa_idx])
return aa_score
def _ParseCADLocal(lines): def _ParseCADLocal(lines):
local_scores_idx = None
for line_idx in range(len(lines)):
if "local_scores" in lines[line_idx]:
local_scores_idx = line_idx
break
if local_scores_idx == None:
raise RuntimeError("Failed to parse local cadscores")
local_aa_dict={}
for line_idx in range(local_scores_idx+2, len(lines)):
items=lines[line_idx].split()
local_aa = float(items[2])
if local_aa < 0.0:
continue # invalid CAD score
key = (items[0], mol.ResNum(int(items[1])))
local_aa_dict[key] = local_aa
return local_aa_dict
def _ParseVoronotaGlobal(lines):
return float(lines[0].split()[4])
def _ParseVoronotaLocal(lines):
local_aa_dict={} local_aa_dict={}
for lin in lines[11:]: chain_name_regex = r'c\<\D+\>'
items=lin.split() resnum_regex = r'r\<\d+\>'
chain=items[0] insertion_code_regex = r'i\<\D\>'
resnum=int(items[1]) for line in lines:
key=chain+'.'+str(resnum) local_aa = float(line.split()[-1])
aa=float(items[2]) if local_aa < 0.0:
local_aa_dict[key]=aa continue # invalid CAD score
chain_data = re.findall(chain_name_regex, line)
resnum_data = re.findall(resnum_regex, line)
insertion_code_data = re.findall(insertion_code_regex, line)
resnum = None
if len(insertion_code_data) == 0:
resnum = mol.ResNum(int(resnum_data[0][1:].strip('><')))
else:
resnum = mol.ResNum(int(resnum_data[0][1:].strip('><')),
insertion_code_data[0][1:].strip('><'))
key = (chain_data[0][1:].strip('><'), resnum)
local_aa_dict[key] = local_aa
return local_aa_dict return local_aa_dict
def _RunCAD(max_iter, tmp_dir): def _RunCAD(tmp_dir, mode, cad_bin_path, old_regime):
model_filename=os.path.join(tmp_dir, 'model.pdb') model_filename=os.path.join(tmp_dir, 'model.pdb')
reference_filename=os.path.join(tmp_dir, 'reference.pdb') reference_filename=os.path.join(tmp_dir, 'reference.pdb')
globalAA = None
localAA = None
if platform.system() == "Windows": if platform.system() == "Windows":
raise RuntimeError('CAD score not available on Windows') raise RuntimeError('CAD score not available on Windows')
if mode == "classic":
cad_calc_path = None
cad_read_g_path = None
cad_read_l_path = None
if cad_bin_path:
cad_calc_path = settings.Locate('CADscore_calc.bash',
search_paths=[cad_bin_path],
search_system_paths=False)
cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash',
search_paths=[cad_bin_path],
search_system_paths=False)
cad_read_l_path=settings.Locate('CADscore_read_local_scores.bash',
search_paths=[cad_bin_path],
search_system_paths=False)
# also try to locate the actual executable that is called from the
# bash scripts
executable_path = settings.Locate('voroprot2',
search_paths=[cad_bin_path],
search_system_paths=False)
else:
cad_calc_path = settings.Locate('CADscore_calc.bash')
cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash')
cad_read_l_path = settings.Locate('CADscore_read_local_scores.bash')
# also try to locate the actual executable that is called from the
# bash scripts
executable_path = settings.Locate('voroprot2')
command1="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\"" %(cad_calc_path,
model_filename,
reference_filename,
os.path.join(tmp_dir,
"cadtemp"))
command2="\"%s\" -D \"%s\"" %(cad_read_g_path, os.path.join(tmp_dir,
"cadtemp"))
command3="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\" -c AA" %(cad_read_l_path,
model_filename,
reference_filename,
os.path.join(tmp_dir,
"cadtemp"))
ps1=subprocess.Popen(command1, shell=True, stdout=subprocess.PIPE)
ps1.wait()
ps2=subprocess.Popen(command2, shell=True, stdout=subprocess.PIPE)
ps2.wait()
lines=ps2.stdout.readlines()
try:
globalAA=_ParseCADGlobal(lines)
except:
raise RuntimeError("CAD calculation failed")
ps3=subprocess.Popen(command3, shell=True, stdout=subprocess.PIPE)
ps3.wait()
lines=ps3.stdout.readlines()
try:
localAA=_ParseCADLocal(lines)
except:
raise RuntimeError("CAD calculation failed")
elif mode == "voronota":
local_score_filename = os.path.join(tmp_dir, "local_scores.txt")
voronota_cadscore_path = None
if cad_bin_path:
voronota_cadscore_path = settings.Locate("voronota-cadscore",
search_paths=[cad_bin_path],
search_system_paths=False)
# also try to locate the actual executable that is called from the
# bash script
executable_path = settings.Locate("voronota",
search_paths=[cad_bin_path],
search_system_paths=False)
else:
voronota_cadscore_path = settings.Locate("voronota-cadscore")
# also try to locate the actual executable that is called from the
# bash script
executable_path = settings.Locate("voronota")
cmd = [voronota_cadscore_path, '-m', model_filename, '-t',
reference_filename, '--contacts-query-by-code AA',
'--output-residue-scores', local_score_filename]
if old_regime:
cmd.append("--old-regime")
cmd = ' '.join(cmd)
ps = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
ps.wait()
try:
globalAA = _ParseVoronotaGlobal(ps.stdout.readlines())
except:
raise RuntimeError("CAD calculation failed")
try:
localAA = _ParseVoronotaLocal(open(local_score_filename).readlines())
except:
raise RuntimeError("CAD calculation failed")
else: else:
cad_calc_path=settings.Locate('CADscore_calc.bash') raise RuntimeError("Invalid CAD mode! Allowed are: "
cad_read_g_path=settings.Locate('CADscore_read_global_scores.bash') "[\"classic\", \"voronota\"]")
cad_read_l_path=settings.Locate('CADscore_read_local_scores.bash')
command1="\"%s\" -o %i -m \"%s\" -t \"%s\" -D \"%s\"" %(cad_calc_path, max_iter, model_filename, reference_filename, os.path.join(tmp_dir,"cadtemp"))
command2="\"%s\" -D \"%s\"" %(cad_read_g_path, os.path.join(tmp_dir,"cadtemp"))
command3="\"%s\" -m \"%s\" -t \"%s\" -D \"%s\" -c AA" %(cad_read_l_path, model_filename, reference_filename,os.path.join(tmp_dir,"cadtemp"))
ps1=subprocess.Popen(command1, shell=True, stdout=subprocess.PIPE)
ps1.wait()
ps2=subprocess.Popen(command2, shell=True, stdout=subprocess.PIPE)
ps2.wait()
lines=ps2.stdout.readlines()
try:
globalAA=_ParseCADGlobal(lines)
except:
raise RuntimeError("CAD calculation failed")
ps3=subprocess.Popen(command3, shell=True, stdout=subprocess.PIPE)
ps3.wait()
lines=ps3.stdout.readlines()
try:
localAA=_ParseCADLocal(lines)
except:
raise RuntimeError("CAD calculation failed")
return CADResult(globalAA,localAA) return CADResult(globalAA,localAA)
def _HasInsertionCodes(model, reference):
for r in model.residues:
if r.GetNumber().GetInsCode() != "\0":
print r
return True
for r in reference.residues:
if r.GetNumber().GetInsCode() != "\0":
print r
return True
return False
def CADScore(model, reference, max_iter=300): def _MapLabels(model, cad_results, label):
for k,v in cad_results.localAA.iteritems():
r = model.FindResidue(k[0], k[1])
if not r.IsValid():
raise RuntimeError("Failed to map cadscore on residues: " +
"CAD score estimated for residue in chain \"" +
k[0] + "\" with ResNum " + str(k[1]) + ". Residue " +
"could not be found in model.")
r.SetFloatProp(label, v)
def CADScore(model, reference, mode = "classic", label = "localcad",
old_regime = False, cad_bin_path = None):
""" """
Calculates global and local atom-atom (AA) CAD Scores Calculates global and local atom-atom (AA) CAD Scores.
You can either access the original implementation available from
https://bitbucket.org/kliment/cadscore/downloads/
or the new implementation which is part of the Voronota package
available from https://bitbucket.org/kliment/voronota/downloads/.
The scores of the two implementations differ but strongly correlate
as the contacts between atoms are estimated differently. When using
the "voronota" *mode* you can minimize those discrepancies by
setting the *old_regime* flag to True.
Furthermore, the "voronota" *mode* generates per-residue scores that
are inverted when compared to the classical implementation
(0.0: bad, 1.0 good).
:param model: The model structure. :param model: The model structure.
:type model: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle` :type model: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
:param reference: The reference structure :param reference: The reference structure
:type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle` :type reference: :class:`~ost.mol.EntityView` or
:param max_iter: Optional. The maximum number of iteration for the tessellation algorithm before giving up. By default 300 :class:`~ost.mol.EntityHandle`
:param mode: What CAD score implementation to use, must be one in
["classic", "voronota"]
:param label: Local CAD scores will be mapped on residues of model as
float property with this name
:type label: :class:`str`
:param old_regime: Only has an effect if *mode* is "voronota". If set to true,
the discrepancies between the two modes is minimized but
the behaviour of inverted scores persists.
:type old_regime: :class:`bool`
:param cad_bin_path: Path to search for the required executables
(["CADscore_calc.bash",
"CADscore_read_global_scores.bash",
"CADscore_read_local_scores.bash"] for "classic" *mode*
or ["voronota-cadscore"] for "voronota" *mode*). If not
set, the env path is searched.
:type cad_bin_path: :class:`str`
:returns: The result of the CAD score calculation :returns: The result of the CAD score calculation
:rtype: :class:`CADResult` :rtype: :class:`CADResult`
:raises: :class:`~ost.settings.FileNotFound` if any of the CAD score exacutables could not be located. :raises: :class:`~ost.settings.FileNotFound` if any of the CAD score
executables could not be located.
:raises: :class:`RuntimeError` if the calculation failed :raises: :class:`RuntimeError` if the calculation failed
""" """
if mode == "classic" and _HasInsertionCodes(model, reference):
raise RuntimeError("The classic CAD score implementation does not support "
"insertion codes in residues")
tmp_dir_name=_SetupFiles(model, reference) tmp_dir_name=_SetupFiles(model, reference)
result=_RunCAD(max_iter, tmp_dir_name) result=_RunCAD(tmp_dir_name, mode, cad_bin_path, old_regime)
_CleanupFiles(tmp_dir_name) _CleanupFiles(tmp_dir_name)
_MapLabels(model, result, label)
return result return result
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment