Skip to content
Snippets Groups Projects
Commit 5c60eb76 authored by Bienchen's avatar Bienchen
Browse files

Went through HHblits code

parent a79a6deb
No related branches found
No related tags found
No related merge requests found
...@@ -17,40 +17,40 @@ from ost import settings, seq ...@@ -17,40 +17,40 @@ from ost import settings, seq
from ost.bindings import utils from ost.bindings import utils
class HHblitsHit: class HHblitsHit:
""" """
A hit found by HHblits A hit found by HHblits
.. attribute:: hit_id .. attribute:: hit_id
String identifying the hit String identifying the hit
.. attribute:: aln .. attribute:: aln
Pairwise alignment containing the aligned part between the query and the Pairwise alignment containing the aligned part between the query and the
target. First sequence is the query, the second sequence the target. target. First sequence is the query, the second sequence the target.
:type: :class:`ost.seq.AlignmentHandle` :type: :class:`ost.seq.AlignmentHandle`
.. attribute:: evalue .. attribute:: evalue
The E-value of the alignment The E-value of the alignment
.. attribute:: prob .. attribute:: prob
The probability of the alignment (between 0 and 100) The probability of the alignment (between 0 and 100)
.. attribute:: score .. attribute:: score
The alignment score The alignment score
""" """
def __init__(self, hit_id, aln, score, ss_score,evalue, pvalue, prob): def __init__(self, hit_id, aln, score, ss_score,evalue, pvalue, prob):
self.hit_id = hit_id self.hit_id = hit_id
self.aln = aln self.aln = aln
self.score = score self.score = score
self.ss_score = ss_score self.ss_score = ss_score
self.evalue = evalue self.evalue = evalue
self.prob = prob self.prob = prob
self.pvalue = pvalue self.pvalue = pvalue
class HHblitsHeader: class HHblitsHeader:
def __init__(self): def __init__(self):
...@@ -62,25 +62,29 @@ class HHblitsHeader: ...@@ -62,25 +62,29 @@ class HHblitsHeader:
self.command = '' self.command = ''
def ParseHeaderLine(line): def ParseHeaderLine(line):
# First, we seek the start of the identifier, that is, the first whitespace '''Fetch header content.
# after the hit number + 1. Since the identifier may contain whitespaces
# itself, we cannot split the whole line First, we seek the start of the identifier, that is, the first whitespace
for i in range(0, len(line)): after the hit number + 1. Since the identifier may contain whitespaces
if line[i].isdigit(): itself, we cannot split the whole line
break '''
for i in range(i, len(line)): for i in range(0, len(line)):
if line[i] == ' ': if line[i].isdigit():
break break
assert len(line)-i >= 31 and line[i+1] != ' ' for i in range(i, len(line)):
hit_id = line[i+1:i+31].strip() if line[i] == ' ':
fields = line[i+32:].split() break
prob = float(fields[0]) assert len(line)-i >= 31 and line[i+1] != ' '
evalue = float(fields[1]) hit_id = line[i+1:i+31].strip()
pvalue = float(fields[2]) fields = line[i+32:].split()
score = float(fields[3]) prob = float(fields[0])
ss_score = float(fields[4]) evalue = float(fields[1])
offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0])) pvalue = float(fields[2])
return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob), offsets) score = float(fields[3])
ss_score = float(fields[4])
offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
offsets)
def ParseHHblitsOutput(output): def ParseHHblitsOutput(output):
""" """
...@@ -444,45 +448,45 @@ class HHblits: ...@@ -444,45 +448,45 @@ class HHblits:
def A3MToCS(self, a3m_file, cs_file=None, options={}): def A3MToCS(self, a3m_file, cs_file=None, options={}):
""" """
Converts the A3M alignment file to a column state sequence file. If Converts the A3M alignment file to a column state sequence file. If
cs_file is not given, the output file will be set to cs_file is not given, the output file will be set to
<file-basename>.seq219. <file-basename>.seq219.
:param a3m_file: A3M file to be converted. :param a3m_file: A3M file to be converted.
:type a3m_file: :class:`str` :type a3m_file: :class:`str`
:param cs_file: Output file name (may be omitted) :param cs_file: Output file name (may be omitted)
:type cs_file: :class:`str` :type cs_file: :class:`str`
:param options: dictionary of options to *cstranslate*, must come with the :param options: dictionary of options to *cstranslate*, must come with
right amount of '-' in front. the right amount of '-' in front.
:type options: :class:`dict` :type options: :class:`dict`
""" """
cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate') cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate')
if not cs_file: if not cs_file:
cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0] cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
if os.path.exists(cs_file): if os.path.exists(cs_file):
return cs_file return cs_file
o = list() o = list()
for k, v in options.iteritems(): for k, v in options.iteritems():
if type(v) == type(True): if type(v) == type(True):
if v == True: if v == True:
o.append('%s' % str(k)) o.append('%s' % str(k))
else: else:
o.append('%s %s' % (str(k), str(v))) o.append('%s %s' % (str(k), str(v)))
o = ' '.join(o) o = ' '.join(o)
cs_cmd = '%s -i %s -o %s %s' % (cstranslate, a3m_file, cs_file, o) cs_cmd = '%s -i %s -o %s %s' % (cstranslate, a3m_file, cs_file, o)
ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file)) ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
job = subprocess.Popen(cs_cmd, shell=True, job = subprocess.Popen(cs_cmd, shell=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout=subprocess.PIPE, stderr=subprocess.PIPE)
sout, serr = job.communicate() sout, serr = job.communicate()
#lines = serr.splitlines() #lines = serr.splitlines()
#for l in lines: #for l in lines:
# print l # print l
lines = sout.splitlines() lines = sout.splitlines()
for l in lines: for l in lines:
if l in 'Wrote abstract state sequence to %s' % cs_file: if l in 'Wrote abstract state sequence to %s' % cs_file:
return cs_file return cs_file
ost.LogWarning('Creating column state sequence file (%s) failed' % \ ost.LogWarning('Creating column state sequence file (%s) failed' % \
cs_file) cs_file)
def CleanupFailed(self): def CleanupFailed(self):
'''In case something went wrong, call to make sure everything is clean. '''In case something went wrong, call to make sure everything is clean.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment