Went through HHblits code

5c60eb76 · Bienchen · a79a6deb · 5c60eb76
Commit 5c60eb76 authored 9 years ago by Bienchen
--- a/modules/bindings/pymod/hhblits.py
+++ b/modules/bindings/pymod/hhblits.py
@@ -17,40 +17,40 @@ from ost import settings, seq
 from ost.bindings import utils
 class HHblitsHit:
-  """
+    """
-  A hit found by HHblits
+    A hit found by HHblits
-  .. attribute:: hit_id
+    .. attribute:: hit_id
-    String identifying the hit
+      String identifying the hit
-  .. attribute:: aln
+    .. attribute:: aln
-    Pairwise alignment containing the aligned part between the query and the
+      Pairwise alignment containing the aligned part between the query and the
-    target. First sequence is the query, the second sequence the target.
+      target. First sequence is the query, the second sequence the target.
-    :type: :class:`ost.seq.AlignmentHandle`
+      :type: :class:`ost.seq.AlignmentHandle`
-  .. attribute:: evalue
+    .. attribute:: evalue
-    The E-value of the alignment
+      The E-value of the alignment
-  .. attribute:: prob
+    .. attribute:: prob
-    The probability of the alignment (between 0 and 100)
+      The probability of the alignment (between 0 and 100)
-  .. attribute:: score
+    .. attribute:: score
-    The alignment score
+      The alignment score
-  """
+    """
-  def __init__(self, hit_id, aln, score, ss_score,evalue, pvalue, prob):
+    def __init__(self, hit_id, aln, score, ss_score,evalue, pvalue, prob):
-      self.hit_id = hit_id
+        self.hit_id = hit_id
-      self.aln = aln
+        self.aln = aln
-      self.score = score
+        self.score = score
-      self.ss_score = ss_score
+        self.ss_score = ss_score
-      self.evalue = evalue
+        self.evalue = evalue
-      self.prob = prob
+        self.prob = prob
-      self.pvalue = pvalue
+        self.pvalue = pvalue
 class HHblitsHeader:
    def __init__(self):
@@ -62,25 +62,29 @@ class HHblitsHeader:
        self.command = ''
 def ParseHeaderLine(line):
-  # First, we seek the start of the identifier, that is, the first whitespace
+    '''Fetch header content.
-  # after the hit number + 1. Since the identifier may contain whitespaces
-  # itself, we cannot split the whole line
+    First, we seek the start of the identifier, that is, the first whitespace
-  for i in range(0, len(line)):
+    after the hit number + 1. Since the identifier may contain whitespaces
-    if line[i].isdigit():
+    itself, we cannot split the whole line
-      break
+    '''
-  for i in range(i, len(line)):
+    for i in range(0, len(line)):
-    if line[i] == ' ':
+        if line[i].isdigit():
-      break
+            break
-  assert len(line)-i >= 31 and line[i+1] != ' '
+    for i in range(i, len(line)):
-  hit_id = line[i+1:i+31].strip()
+        if line[i] == ' ':
-  fields = line[i+32:].split()
+            break
-  prob = float(fields[0])
+    assert len(line)-i >= 31 and line[i+1] != ' '
-  evalue = float(fields[1])
+    hit_id = line[i+1:i+31].strip()
-  pvalue = float(fields[2])
+    fields = line[i+32:].split()
-  score = float(fields[3])
+    prob = float(fields[0])
-  ss_score = float(fields[4])
+    evalue = float(fields[1])
-  offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
+    pvalue = float(fields[2])
-  return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob), offsets)
+    score = float(fields[3])
+    ss_score = float(fields[4])
+    offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
+    return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
+            offsets)
 def ParseHHblitsOutput(output):
    """
@@ -444,45 +448,45 @@ class HHblits:
    def A3MToCS(self, a3m_file, cs_file=None, options={}):
-      """
+        """
-      Converts the A3M alignment file to a column state sequence file. If
+        Converts the A3M alignment file to a column state sequence file. If
-      cs_file is not given, the output file will be set to
+        cs_file is not given, the output file will be set to
-      <file-basename>.seq219.
+        <file-basename>.seq219.
-      :param a3m_file: A3M file to be converted.
+        :param a3m_file: A3M file to be converted.
-      :type a3m_file: :class:`str`
+        :type a3m_file: :class:`str`
-      :param cs_file: Output file name (may be omitted)
+        :param cs_file: Output file name (may be omitted)
-      :type cs_file: :class:`str`
+        :type cs_file: :class:`str`
-      :param options: dictionary of options to *cstranslate*, must come with the
+        :param options: dictionary of options to *cstranslate*, must come with
-                      right amount of '-' in front.
+                        the right amount of '-' in front.
-      :type options: :class:`dict`
+        :type options: :class:`dict`
-      """
+        """
-      cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate')
+        cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate')
-      if not cs_file:
+        if not cs_file:
-          cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
+            cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
-      if os.path.exists(cs_file):
+        if os.path.exists(cs_file):
-          return cs_file
+            return cs_file
-      o = list()
+        o = list()
-      for k, v in options.iteritems():
+        for k, v in options.iteritems():
-          if type(v) == type(True):
+            if type(v) == type(True):
-              if v == True:
+                if v == True:
-                  o.append('%s' % str(k))
+                    o.append('%s' % str(k))
-          else:
+            else:
-              o.append('%s %s' % (str(k), str(v)))
+                o.append('%s %s' % (str(k), str(v)))
-      o = ' '.join(o)
+        o = ' '.join(o)
-      cs_cmd = '%s -i %s -o %s %s' % (cstranslate, a3m_file, cs_file, o)
+        cs_cmd = '%s -i %s -o %s %s' % (cstranslate, a3m_file, cs_file, o)
-      ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
+        ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
-      job = subprocess.Popen(cs_cmd, shell=True,
+        job = subprocess.Popen(cs_cmd, shell=True,
-                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-      sout, serr = job.communicate()
+        sout, serr = job.communicate()
-      #lines = serr.splitlines()
+        #lines = serr.splitlines()
-      #for l in lines:
+        #for l in lines:
-      #    print l
+        #    print l
-      lines = sout.splitlines()
+        lines = sout.splitlines()
-      for l in lines:
+        for l in lines:
-          if l in 'Wrote abstract state sequence to %s' % cs_file:
+            if l in 'Wrote abstract state sequence to %s' % cs_file:
-              return cs_file
+                return cs_file
-      ost.LogWarning('Creating column state sequence file (%s) failed' % \
+        ost.LogWarning('Creating column state sequence file (%s) failed' % \
-                     cs_file)
+                       cs_file)
    def CleanupFailed(self):
        '''In case something went wrong, call to make sure everything is clean.