Something went wrong on our end
tmtools.py 11.14 KiB
#------------------------------------------------------------------------------
# This file is part of the OpenStructure project <www.openstructure.org>
#
# Copyright (C) 2008-2015 by the OpenStructure authors
#
# This library is free software; you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation; either version 3.0 of the License, or (at your option)
# any later version.
# This library is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#------------------------------------------------------------------------------
"""
Wrappers for the tmalign and tmscore utilities.
References:
tmscore: Yang Zhang and Jeffrey Skolnick, Proteins 2004 57: 702-710
tmalign: Y. Zhang and J. Skolnick, Nucl. Acids Res. 2005 33, 2302-9
Authors: Pascal Benkert, Marco Biasini
"""
import subprocess, os, tempfile, platform
from ost import settings, io, geom, seq
def _SetupFiles(models):
# create temporary directory
tmp_dir_name=tempfile.mkdtemp()
dia = 'PDB'
for index, model in enumerate(models):
for chain in model.chains:
if len(chain.name) > 1:
dia = 'CHARMM'
break;
for res in chain.residues:
if len(res.name) > 3:
dia = 'CHARMM'
break;
io.SavePDB(model, os.path.join(tmp_dir_name, 'model%02d.pdb' % (index+1)), dialect=dia)
return tmp_dir_name
def _CleanupFiles(dir_name):
import shutil
shutil.rmtree(dir_name)
class TMAlignResult:
"""
Holds the result of running TMalign
.. attribute:: rmsd
The RMSD of the common Calpha atoms of both structures
.. attribute:: transform
The transform that superposes the model onto the reference structure.
:type: :class:`~ost.geom.Mat4`
.. attribute:: alignment
The alignment of the structures, that is the pairing of Calphas of both
structures. Since the programs only read ATOM records, residues consisting
of HETATMs (MSE) are not included in the alignment.
:type: :class:`~ost.seq.AlignmentHandle`
.. attribute:: tm_score
The TM-score of the structural superposition
"""
def __init__(self, rmsd, tm_score, aligned_length, transform,
ref_sequence, alignment):
self.rmsd=rmsd
self.tm_score=tm_score
self.aligned_length=aligned_length
self.transform=transform
self.ref_sequence =ref_sequence
self.alignment=alignment
def _ParseTmAlign(lines,lines_matrix):
info_line=lines[12].split(',')
aln_length=float(info_line[0].split('=')[1].strip())
rmsd=float(info_line[1].split('=')[1].strip())
tm_score=float(lines[14].split('=')[1].split('(')[0].strip())
tf1=[float(i.strip()) for i in lines_matrix[2].split()]
tf2=[float(i.strip()) for i in lines_matrix[3].split()]
tf3=[float(i.strip()) for i in lines_matrix[4].split()]
rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
tf2[4], tf3[2], tf3[3], tf3[4])
tf=geom.Mat4(rot)
tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
seq1 = seq.CreateSequence("1",lines[18].strip())
seq2 = seq.CreateSequence("2",lines[20].strip())
alignment = seq.CreateAlignment()
alignment.AddSequence(seq2)
alignment.AddSequence(seq1)
return TMAlignResult(rmsd, tm_score, aln_length, tf, seq2, alignment)
def _RunTmAlign(tmalign, tmp_dir):
model1_filename=os.path.join(tmp_dir, 'model01.pdb')
model2_filename=os.path.join(tmp_dir, 'model02.pdb')
if platform.system() == "Windows":
tmalign_path=settings.Locate('tmalign.exe', explicit_file_name=tmalign)
command="\"%s\" %s %s -m %s" %(os.path.normpath(tmalign_path), model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
else:
tmalign_path=settings.Locate('tmalign', explicit_file_name=tmalign)
command="\"%s\" \"%s\" \"%s\" -m \"%s\"" %(tmalign_path, model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
ps.wait()
lines=ps.stdout.readlines()
if (len(lines))<22:
_CleanupFiles(tmp_dir)
raise RuntimeError("tmalign superposition failed")
matrix_file=open(os.path.join(tmp_dir,'matrix.txt'))
lines_matrix=matrix_file.readlines()
matrix_file.close()
return _ParseTmAlign(lines,lines_matrix)
class MMAlignResult:
def __init__(self, rmsd, aligned_length, tm_score, transform, alignment):
self.rmsd=rmsd
self.tm_score=tm_score
self.aligned_length=aligned_length
self.transform=transform
self.alignment=alignment
def _ParseMmAlign(lines):
info_line=lines[10].split(',')
aln_length=float(info_line[0].split('=')[1].strip())
rmsd=float(info_line[1].split('=')[1].strip())
tm_score=float(info_line[2].split('=')[1].strip())
tf1=[float(i.strip()) for i in lines[14].split()]
tf2=[float(i.strip()) for i in lines[15].split()]
tf3=[float(i.strip()) for i in lines[16].split()]
rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
tf2[4], tf3[2], tf3[3], tf3[4])
tf=geom.Mat4(rot)
tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
seq1 = seq.CreateSequence("1",lines[19].strip())
seq2 = seq.CreateSequence("2",lines[21].strip())
alignment = seq.CreateAlignment()
alignment.AddSequence(seq2)
alignment.AddSequence(seq1)
return MMAlignResult(rmsd, tm_score, aln_length, tf, seq2, alignment)
def _RunMmAlign(mmalign, tmp_dir):
model1_filename=os.path.join(tmp_dir, 'model01.pdb')
model2_filename=os.path.join(tmp_dir, 'model02.pdb')
if platform.system() == "Windows":
mmalign_path=settings.Locate('mmalign.exe', explicit_file_name=mmalign)
command="\"%s\" %s %s" %(os.path.normpath(mmalign_path), model1_filename, model2_filename)
else:
mmalign_path=settings.Locate('MMalign', explicit_file_name=mmalign)
command="\"%s\" \"%s\" \"%s\"" %(mmalign_path, model1_filename, model2_filename)
ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
ps.wait()
lines=ps.stdout.readlines()
if (len(lines))<22:
_CleanupFiles(tmp_dir)
raise RuntimeError("mmalign superposition failed")
return _ParseMmAlign(lines)
class TMScoreResult:
"""
Holds the result of running TMscore
.. attribute:: rmsd_common
The RMSD of the common Calpha atoms of both structures
.. attribute:: rmsd_below_five
The RMSD of all Calpha atoms that can be superposed below five Angstroem
.. attribute:: tm_score
The TM-score of the structural superposition
.. attribute:: transform
The transform that superposes the model onto the reference structure.
:type: :class:`~ost.geom.Mat4`
.. attribute:: gdt_ha
The GDT_HA of the model to the reference structure.
.. attribute:: gdt_ts
The GDT_TS of the model to the reference structure.
"""
def __init__(self, rmsd_common, tm_score, max_sub,
gdt_ts, gdt_ha, rmsd_below_five, transform):
self.rmsd_common=rmsd_common
self.tm_score=tm_score
self.max_sub=max_sub
self.gdt_ts=gdt_ts
self.gdt_ha=gdt_ha
self.rmsd_below_five=rmsd_below_five
self.transform=transform
def _ParseTmScore(lines):
tf1=[float(i.strip()) for i in lines[23].split()]
tf2=[float(i.strip()) for i in lines[24].split()]
tf3=[float(i.strip()) for i in lines[25].split()]
rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
tf2[4], tf3[2], tf3[3], tf3[4])
tf=geom.Mat4(rot)
tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
result=TMScoreResult(float(lines[14].split()[-1].strip()),
float(lines[16].split()[2].strip()),
float(lines[17].split()[1].strip()),
float(lines[18].split()[1].strip()),
float(lines[19].split()[1].strip()),
float(lines[27].split()[-1].strip()),
tf)
return result
def _RunTmScore(tmscore, tmp_dir):
model1_filename=os.path.join(tmp_dir, 'model01.pdb')
model2_filename=os.path.join(tmp_dir, 'model02.pdb')
if platform.system() == "Windows":
tmscore_path=settings.Locate('tmscore.exe', explicit_file_name=tmscore)
command="\"%s\" %s %s" %(os.path.normpath(tmscore_path), model1_filename,
model2_filename)
else:
tmscore_path=settings.Locate('tmscore', explicit_file_name=tmscore)
command="\"%s\" \"%s\" \"%s\"" % (tmscore_path, model1_filename,
model2_filename)
ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
ps.wait()
lines=ps.stdout.readlines()
if (len(lines))<22:
_CleanupFiles(tmp_dir)
raise RuntimeError("tmscore superposition failed")
return _ParseTmScore(lines)
def TMAlign(model1, model2, tmalign=None):
"""
Performs a sequence independent superposition of model1 onto model2, the
reference.
:param model1: The model structure. If the superposition is successful, will
be superposed onto the reference structure
:type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
:param model2: The reference structure
:type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
:param tmalign: If not None, the path to the tmalign executable.
:returns: The result of the tmscore superposition
:rtype: :class:`TMAlignResult`
:raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
:raises: :class:`RuntimeError` if the superposition failed
"""
tmp_dir_name=_SetupFiles((model1, model2))
result=_RunTmAlign(tmalign, tmp_dir_name)
model1.handle.EditXCS().ApplyTransform(result.transform)
_CleanupFiles(tmp_dir_name)
return result
def MMAlign(model1, model2, mmalign=None):
"""
Run tmalign on two protein structures
"""
tmp_dir_name=_SetupFiles((model1, model2))
result=_RunMmAlign(mmalign, tmp_dir_name)
model1.handle.EditXCS().ApplyTransform(result.transform)
_CleanupFiles(tmp_dir_name)
return result
def TMScore(model1, model2, tmscore=None):
"""
Performs a sequence dependent superposition of model1 onto model2,
the reference.
:param model1: The model structure. If the superposition is successful, will
be superposed onto the reference structure
:type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
:param model2: The reference structure
:type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
:param tmscore: If not None, the path to the tmscore executable.
:returns: The result of the tmscore superposition
:rtype: :class:`TMScoreResult`
:raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
:raises: :class:`RuntimeError` if the superposition failed
"""
tmp_dir_name=_SetupFiles((model1, model2))
result=_RunTmScore(tmscore, tmp_dir_name)
model1.handle.EditXCS().ApplyTransform(result.transform)
_CleanupFiles(tmp_dir_name)
return result