From 1d21459921d855b9120a466fa1daf2adf91c5e6a Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Tue, 21 Jul 2020 08:20:22 +0200
Subject: [PATCH] Separate bindings for HHblits2 and HHblits3

HHblits2 is considered to be deprecated. If the raw hhblits binding
is imported, the module autodetects the hhblits version and imports
the appropriate version of the binding.
---
 modules/bindings/pymod/CMakeLists.txt |   2 +
 modules/bindings/pymod/hhblits.py     | 866 +-------------------------
 modules/bindings/pymod/hhblits2.py    | 792 +++++++++++++++++++++++
 modules/bindings/pymod/hhblits3.py    | 844 +++++++++++++++++++++++++
 4 files changed, 1671 insertions(+), 833 deletions(-)
 create mode 100644 modules/bindings/pymod/hhblits2.py
 create mode 100644 modules/bindings/pymod/hhblits3.py

diff --git a/modules/bindings/pymod/CMakeLists.txt b/modules/bindings/pymod/CMakeLists.txt
index 17bd42b19..65f91db60 100644
--- a/modules/bindings/pymod/CMakeLists.txt
+++ b/modules/bindings/pymod/CMakeLists.txt
@@ -9,6 +9,8 @@ clustalw.py
 utils.py
 naccess.py
 hhblits.py
+hhblits2.py
+hhblits3.py
 blast.py
 cadscore.py
 kclust.py
diff --git a/modules/bindings/pymod/hhblits.py b/modules/bindings/pymod/hhblits.py
index 0e34a97c1..b9b847237 100644
--- a/modules/bindings/pymod/hhblits.py
+++ b/modules/bindings/pymod/hhblits.py
@@ -1,844 +1,44 @@
-'''HHblits wrapper classes and functions.
-'''
-
+from ost import settings
+from ost import bindings
 import subprocess
-import datetime
-import os
-import shutil
-import tempfile
-
-import ost
-from ost import settings, seq
-from ost.bindings import utils
-
-class HHblitsHit:
-    """
-    A hit found by HHblits
-
-    .. attribute:: hit_id
-
-      String identifying the hit
-
-      :type: :class:`str`
-
-    .. attribute:: aln
-
-      Pairwise alignment containing the aligned part between the query and the
-      target. First sequence is the query, the second sequence the target.
-
-      :type: :class:`~ost.seq.AlignmentHandle`
-
-    .. attribute:: score
-
-      The alignment score
-
-      :type: :class:`float`
-
-    .. attribute:: ss_score
-
-      The secondary structure score
-
-      :type: :class:`float`
-
-    .. attribute:: evalue
-
-      The E-value of the alignment
-
-      :type: :class:`float`
-
-    .. attribute:: pvalue
-
-      The P-value of the alignment
-
-      :type: :class:`float`
-
-    .. attribute:: prob
-
-      The probability of the alignment (between 0 and 100)
-
-      :type: :class:`float`
-    """
-    def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
-        self.hit_id = hit_id
-        self.aln = aln
-        self.score = score
-        self.ss_score = ss_score
-        self.evalue = evalue
-        self.prob = prob
-        self.pvalue = pvalue
-
-class HHblitsHeader:
-    """Stats from the beginning of search output.
-
-    .. attribute:: query
-
-      The name of the query sequence
-
-      :type: :class:`str`
-
-    .. attribute:: match_columns
-
-      Total of aligned Match columns
-
-      :type: :class:`int`
-
-    .. attribute:: n_eff
-
-      Value of the ``-neff`` option
-
-      :type: :class:`float`
-
-    .. attribute:: searched_hmms
-
-      Number of profiles searched
-
-      :type: :class:`int`
-
-    .. attribute:: date
-
-      Execution date
-
-      :type: :class:`datetime.datetime`
-
-    .. attribute:: command
-
-      Command used to run
-
-      :type: :class:`str`
-    """
-    def __init__(self):
-        self.query = ''
-        self.match_columns = 0
-        self.n_eff = 0
-        self.searched_hmms = 0
-        self.date = None
-        self.command = ''
-
-def ParseHeaderLine(line):
-    '''Fetch header content.
-
-    First, we seek the start of the identifier, that is, the first whitespace
-    after the hit number + 1. Since the identifier may contain whitespaces
-    itself, we cannot split the whole line
-
-    :param line: Line from the output header.
-    :type line: :class:`str`
-
-    :return: Hit information and query/template offsets
-    :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`))
-    '''
-    for i in range(0, len(line)):
-        if line[i].isdigit():
-            break
-    for i in range(i, len(line)):
-        if line[i] == ' ':
-            break
-    assert len(line)-i >= 31 and line[i+1] != ' '
-    hit_id = line[i+1:i+31].strip()
-    fields = line[i+32:].split()
-    prob = float(fields[0])
-    evalue = float(fields[1])
-    pvalue = float(fields[2])
-    score = float(fields[3])
-    ss_score = float(fields[4])
-    offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
-    return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
-            offsets)
-
-def ParseHHblitsOutput(output):
-    """
-    Parses the HHblits output as produced by :meth:`HHblits.Search` and returns
-    the header of the search results and a list of hits.
-
-    :param output: Iterable containing the lines of the HHblits output file
-    :type output: iterable (e.g. an open file handle)
-
-    :return: a tuple of the header of the search results and the hits
-    :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`)
-    """
-    lines = iter(output)
-    def _ParseHeaderSection(lines):
-        value_start_column = 14
-        date_pattern = '%a %b %d %H:%M:%S %Y'
-        header = HHblitsHeader()
-        line = next(lines)
-        assert line.startswith('Query')
-        header.query = line[value_start_column:].strip()
-        line = next(lines)
-        assert line.startswith('Match_columns')
-        header.match_columns = int(line[value_start_column:].strip())
-
-        line = next(lines)
-        assert line.startswith('No_of_seqs')
-
-        line = next(lines)
-        assert line.startswith('Neff')
-        header.n_eff = float(line[value_start_column:].strip())
-
-        line = next(lines)
-        assert line.startswith('Searched_HMMs')
-        header.searched_hmms = int(line[value_start_column:].strip())
-
-        line = next(lines)
-        assert line.startswith('Date')
-        value = line[value_start_column:].strip()
-        header.date = datetime.datetime.strptime(value, date_pattern)
-
-        line = next(lines)
-        assert line.startswith('Command')
-        header.command = line[value_start_column:].strip()
-
-        line = next(lines)
-        assert len(line.strip()) == 0
-        return header
-
-    def _ParseTableOfContents(lines):
-        line = next(lines)
-        assert line.startswith(' No Hit')
-        hits = []
-        while True:
-            line = next(lines)
-            if len(line.strip()) == 0:
-                return hits
-            hits.append(ParseHeaderLine(line))
-        return hits
-
-    def _ParseResultBody(query_id, hits, lines):
-        entry_index = None
-        query_str = ''
-        templ_str = ''
-        def _MakeAln(query_id, hit_id, query_string, templ_string,
-                     q_offset, t_offset):
-            s1 = seq.CreateSequence(query_id, query_string)
-            s1.offset = q_offset-1
-            s2 = seq.CreateSequence(hit_id, templ_string)
-            s2.offset = t_offset-1
-            return seq.CreateAlignment(s1, s2)
-        try:
-            while True:
-                # Lines which we are interested in:
-                # - "Done!" -> end of list
-                # - "No ..." -> next item in list
-                # - "T <hit_id> <start> <data> <end>"
-                # - "Q <query_id> <start> <data> <end>"
-                # -> rest is to be skipped
-                line = next(lines)
-                if len(line.strip()) == 0:
-                    continue
-                if line.startswith('Done!'):
-                    if len(query_str) > 0:
-                        hits[entry_index][0].aln = _MakeAln(\
-                            query_id, hits[entry_index][0].hit_id,
-                            query_str, templ_str, *hits[entry_index][1])
-                    return [h for h, o in hits]
-                if line.startswith('No '):
-                    if len(query_str) > 0:
-                        hits[entry_index][0].aln = _MakeAln(\
-                            query_id, hits[entry_index][0].hit_id,
-                            query_str, templ_str, *hits[entry_index][1])
-                    entry_index = int(line[3:].strip())-1
-                    line = next(lines)
-                    hits[entry_index][0].hit_id = line[1:].strip()
-                    query_str = ''
-                    templ_str = ''
-                    # skip the next line. It doesn't contain information we
-                    # don't already know
-                    next(lines)
-                    continue
-                assert entry_index != None
-                # Skip all "T ..." and "Q ..." lines besides the one we want
-                if line[1:].startswith(' Consensus'):
-                    continue
-                if line[1:].startswith(' ss_pred'):
-                    continue
-                if line[1:].startswith(' ss_conf'):
-                    continue
-                if line[1:].startswith(' ss_dssp'):
-                    continue
-                if line.startswith('T '):
-                    for start_pos in range(22, len(line)):
-                        if line[start_pos].isalpha() or line[start_pos] == '-':
-                            break
-                    end_pos = line.find(' ', start_pos)
-                    # this can fail if we didn't skip all other "T ..." lines
-                    if end_pos == -1:
-                        error_str = "Unparsable line '%s' for entry No %d" \
-                                    % (line.strip(), entry_index + 1)
-                        raise AssertionError(error_str)
-                    templ_str += line[start_pos:end_pos]
-                if line.startswith('Q '):
-                    for start_pos in range(22, len(line)):
-                        if line[start_pos].isalpha() or line[start_pos] == '-':
-                            break
-                    end_pos = line.find(' ', start_pos)
-                    # this can fail if we didn't skip all other "Q ..." lines
-                    if end_pos == -1:
-                        error_str = "Unparsable line '%s' for entry No %d" \
-                                    % (line.strip(), entry_index + 1)
-                        raise AssertionError(error_str)
-                    query_str += line[start_pos:end_pos]
-        except StopIteration:
-            if len(query_str) > 0:
-                hits[entry_index][0].aln = _MakeAln(query_id,
-                                                    hits[entry_index][0].hit_id,
-                                                    query_str, templ_str,
-                                                    *hits[entry_index][1])
-            return [h for h, o in hits]
-    header = _ParseHeaderSection(lines)
-    # parse the table of contents. This is neccessary as some of the properties
-    # (i.e. start of alignment) we need are only given there. From the TOC we
-    # create a list of hits that is then further filled with data when we parse
-    # the actual result body
-    hits = _ParseTableOfContents(lines)
-    return header, _ParseResultBody(header.query, hits, lines)
-
-def ParseA3M(a3m_file):
-    '''
-    Parse secondary structure information and the multiple sequence alignment 
-    out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
-    
-    :param a3m_file: Iterable containing the lines of the A3M file
-    :type a3m_file: iterable (e.g. an open file handle)
-    
-    :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
-             (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
-             If not available, "ss_pred" and "ss_conf" entries are set to None.
-    '''
-    profile_dict = dict()
-    state = 'NONE'
-    pred_seq_txt = ''
-    conf_seq_txt = ''
-    msa_seq = list()
-    msa_head = list()
-    for line in a3m_file:
-        if len(line.rstrip()) == 0:
-            continue
-        elif line.startswith('>ss_pred'):
-            state = 'sspred'
-            continue
-        elif line.startswith('>ss_conf'):
-            state = 'ssconf'
-            continue
-        elif line[0] == '>':
-            msa_seq.append('')
-            msa_head.append(line[1:].rstrip())
-            state = 'msa'
-            continue
-
-        if state == 'sspred':
-            pred_seq_txt += line.rstrip()
-        elif state == 'ssconf':
-            conf_seq_txt += line.rstrip()
-        elif state == 'msa':
-            msa_seq[len(msa_seq)-1] += line.rstrip()
-
-    if len(pred_seq_txt) > 0:
-        profile_dict['ss_pred'] = list()
-        profile_dict['ss_conf'] = list()
-        for i in range(0, len(pred_seq_txt)):
-            profile_dict['ss_pred'].append(pred_seq_txt[i])
-            profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
-    else:
-        profile_dict['ss_pred'] = None
-        profile_dict['ss_conf'] = None    
-
-    # post processing
-    # MSA
-    profile_dict['msa'] = None
-    if len(msa_seq) > 1:
-        t = msa_seq[0]
-        al = seq.AlignmentList()
-        for i in range(1, len(msa_seq)):
-            qs = ''
-            ts = ''
-            k = 0
-            for c in msa_seq[i]:
-                if c.islower():
-                    qs += '-'
-                    ts += c.upper()
-                else:
-                    qs += t[k]
-                    ts += c
-                    k += 1
-            nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), 
-                                     seq.CreateSequence(msa_head[i], ts))
-            al.append(nl)
-        profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
-            al, seq.CreateSequence(msa_head[0], t))
-    return profile_dict
-
-
-def ParseHHM(profile):
-    '''
-    Parse secondary structure information and the MSA out of an HHM profile as
-    produced by :meth:`HHblits.A3MToProfile`.
-
-    :param profile: Opened file handle holding the profile.
-    :type profile: :class:`file`
-
-    :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
-             (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
-             "consensus" (:class:`~ost.seq.SequenceHandle`).
-             If not available, "ss_pred" and "ss_conf" entries are set to None.
-    '''
-    profile_dict = dict()
-    state = 'NONE'
-    pred_seq_txt = ''
-    conf_seq_txt = ''
-    consensus_txt = ''
-    msa_seq = list()
-    msa_head = list()
-    for line in profile:
-        if len(line.rstrip()) == 0:
-            continue
-        if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
-            state = 'sspred'
-            continue
-        elif line.rstrip() == '>ss_conf PSIPRED confidence values':
-            state = 'ssconf'
-            continue
-        elif line.rstrip() == '>Consensus':
-            state = 'consensus'
-            continue
-        elif line[0] == '>':
-            if state == 'consensus' or state == 'msa':
-                msa_seq.append('')
-                msa_head.append(line[1:].rstrip())
-            else:
-                raise IOError('Profile file "%s" is missing ' % profile.name+
-                              'the "Consensus" section')
-            state = 'msa'
-            continue
-        elif line[0] == '#':
-            state = 'NONE'
-            continue
-
-        if state == 'sspred':
-            pred_seq_txt += line.rstrip()
-        elif state == 'ssconf':
-            conf_seq_txt += line.rstrip()
-        elif state == 'msa':
-            msa_seq[len(msa_seq)-1] += line.rstrip()
-        elif state == 'consensus':
-            consensus_txt += line.rstrip()
-
-    if len(pred_seq_txt) > 0:
-        profile_dict['ss_pred'] = list()
-        profile_dict['ss_conf'] = list()
-        for i in range(0, len(pred_seq_txt)):
-            profile_dict['ss_pred'].append(pred_seq_txt[i])
-            profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
-    else:
-        profile_dict['ss_pred'] = None
-        profile_dict['ss_conf'] = None
-
-    # post processing
-    # MSA
-    profile_dict['msa'] = None
-    if len(msa_seq):
-        t = msa_seq[0]
-        al = seq.AlignmentList()
-        for i in range(1, len(msa_seq)):
-            qs = ''
-            ts = ''
-            k = 0
-            for c in msa_seq[i]:
-                if c.islower():
-                    qs += '-'
-                    ts += c.upper()
-                else:
-                    qs += t[k]
-                    ts += c
-                    k += 1
-            nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
-                                     seq.CreateSequence(msa_head[i], ts))
-            al.append(nl)
-        profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
-            al, seq.CreateSequence(msa_head[0], t))
-    # Consensus
-    profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
-
-    return profile_dict
-
-
-class HHblits:
-    """
-    Initialise a new HHblits "search" for the given query. Query may either
-    be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
-    query is the actual query sequence, in the latter case, the query is the
-    filename to the file containing the query.
-
-    :param query: Query sequence as file or sequence.
-    :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
-    :param hhsuite_root: Path to the top-level directory of your hhsuite
-                         installation.
-    :type hhsuite_root: :class:`str`
-    :param hhblits_bin: Name of the hhblits binary. Will only be used if
-                        :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
-    :type hhblits_bin: :class:`str`
-    :param working_dir: Directory for temporary files. Will be created if not
-                        present but **not** automatically deleted.
-    :type working_dir: :class:`str`
-    """
-    OUTPUT_PREFIX = 'query_hhblits'
-    def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
-        self.query = query
-        self.hhsuite_root = hhsuite_root
-        if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')):
-            self.bin_dir = os.path.join(self.hhsuite_root, 'bin')
-            self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits')
-        else:
-            self.hhblits_bin = settings.Locate('hhblits',
-                                               explicit_file_name=hhblits_bin)
-            self.bin_dir = os.path.dirname(self.hhblits_bin)
-            # guess root folder (note: this may fail in future)
-            self.hhsuite_root = os.path.dirname(self.bin_dir)
-                                             
-        if working_dir:
-            self.needs_cleanup = False
-            self.working_dir = working_dir
-            if not os.path.exists(working_dir):
-                os.mkdir(working_dir)
-            if isinstance(query, str):
-                self.filename = os.path.abspath(os.path.join(
-                    self.working_dir, os.path.basename(query)))
-                if self.filename != os.path.abspath(query):
-                    shutil.copy(query, self.filename)
-            else:
-                self.filename = os.path.join(self.working_dir,
-                                             '%s.fasta' % HHblits.OUTPUT_PREFIX)
-                ost.io.SaveSequence(query, self.filename)
-        else:
-            self.needs_cleanup = True
-            if isinstance(query, str):
-                self.working_dir = tempfile.mkdtemp()
-                self.filename = os.path.abspath(os.path.join(
-                    self.working_dir, os.path.basename(query)))
-                shutil.copy(query, self.filename)
-            else:
-                tmp_dir = utils.TempDirWithFiles((query,))
-                self.working_dir = tmp_dir.dirname
-                self.filename = tmp_dir.files[0]
-
-    def BuildQueryMSA(self, nrdb, options={}, a3m_file=None, assign_ss=True):
-        """Builds the MSA for the query sequence.
-
-        This function directly uses hhblits of hhtools. While in theory it would
-        be possible to do this by PSI-blasting on our own, hhblits is supposed
-        to be faster. Also it is supposed to prevent alignment corruption. The
-        alignment corruption is caused by low-scoring terminal alignments that
-        draw the sequences found by PSI-blast away from the optimum. By removing
-        these low scoring ends, part of the alignment corruption can be
-        suppressed.
-
-        The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
-        already produced, hhblits is not called again and the existing file path
-        is returned (neglecting the *assign_ss* flag!!!).
-
-        :param nrdb: Database to be align against; has to be an hhblits database
-        :type nrdb: :class:`str`
-
-        :param options: Dictionary of options to *hhblits*, one "-" is added in
-                        front of every key. Boolean True values add flag without
-                        value. Merged with default options 
-                        {'cpu': 1, 'n': 1, 'e': 0.001}, where 'n' defines the 
-                        number of iterations.
-        :type options: :class:`dict`
-
-        :param a3m_file: a path of a3m_file to be used, optional
-        :type a3m_file: :class:`str`
-
-        :param assign_ss: HHblits does not assign predicted secondary structure 
-                          by default. You can optionally assign it with the
-                          addss.pl script provided by the HH-suite. However, 
-                          your HH-suite installation requires you to specify
-                          paths to PSIRED etc. We refer to the HH-suite user
-                          guide for further instructions.
-        :type assign_ss:  :class:`bool`
-
-        :return: The path to the A3M file containing the MSA
-        :rtype: :class:`str`
-        """
-        if a3m_file is None:
-            a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0]
-        else:
-            a3m_file = os.path.abspath(a3m_file)
-        if os.path.exists(a3m_file):
-            ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file)
-            return a3m_file
-        ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root)
-        full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
-                                 os.path.split(nrdb)[1])
-        # create MSA
-        opts = {'cpu' : 1,     # no. of cpus used
-                'n'   : 1,     # no. of iterations
-                'e'   : 0.001} # evalue threshold
-        opts.update(options)
-        opt_cmd, _ = _ParseOptions(opts)
-        hhblits_cmd = '%s -i %s -oa3m %s -d %s %s' % \
-                      (self.hhblits_bin, self.filename, a3m_file, full_nrdb,
-                       opt_cmd)
-
-        p = subprocess.run(hhblits_cmd, shell=True, cwd=self.working_dir,
-                           stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-        lines = p.stdout.decode().splitlines()
-        for line in lines:
-            ost.LogVerbose(line.strip())
-
-        lines = p.stderr.decode().splitlines()
-        for line in lines:
-            ost.LogError(line.strip())
-
-        if not os.path.exists(a3m_file):
-            raise RuntimeError('Building query profile failed, no output')
-
-        if assign_ss:
-            return self.AssignSSToA3M(a3m_file)
-        else:
-            return a3m_file
-
-    def AssignSSToA3M(self, a3m_file):
-        """
-        HHblits does not assign predicted secondary structure by default. 
-        You can optionally assign it with the addss.pl script provided by the 
-        HH-suite. However, your HH-suite installation requires you to specify
-        paths to PSIRED etc. We refer to the HH-suite user guide for further 
-        instructions.
-
-        :param a3m_file:  Path to file you want to assign secondary structure to
-        :type a3m_file:  :class:`str`
-        """
-
-        a3m_file = os.path.abspath(a3m_file)
-        addss_cmd = "perl %s %s" % (os.path.join(self.hhsuite_root, 
-                                                 'scripts/addss.pl'), 
-                                    a3m_file)
-        env = dict(os.environ)
-        env.update({'PERL5LIB' : os.path.join(self.hhsuite_root, 'scripts'),
-                    'HHLIB' : os.path.join(self.hhsuite_root),
-                    'PATH' : '%s:%s' % (os.path.join(self.hhsuite_root, 'bin'),
-                                        os.environ['PATH'])})
-
-        p = subprocess.run(addss_cmd, shell=True, cwd=self.working_dir,
-                           env=env, stdout=subprocess.PIPE,
-                           stderr=subprocess.PIPE)
-
-        lines = p.stdout.decode().splitlines()
-        for line in lines:
-            ost.LogVerbose(line.strip())
-            if 'error' in line.lower() or 'bad interpreter' in line.lower():
-                raise RuntimeError('Predicting secondary structure for MSA '+
-                                   '(%s) failed, on command: %s' % (a3m_file, line))
-
-        lines = p.stderr.decode().splitlines()
-        for line in lines:
-            ost.LogError(line.strip())
-            if 'error' in line.lower() or 'bad interpreter' in line.lower():
-                raise RuntimeError('Predicting secondary structure for MSA '+
-                                   '(%s) failed, on command: %s' % (a3m_file, line))
-
-        return a3m_file
-
-    def A3MToProfile(self, a3m_file, hhm_file=None):
-        """
-        Converts the A3M alignment file to a hhm profile. If hhm_file is not
-        given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
-
-        The produced HHM file can be parsed by :func:`ParseHHM`.
-
-        If the file was already produced, the existing file path is returned
-        without recomputing it.
-
-        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
-        :type a3m_file: :class:`str`
-
-        :param hhm_file: Desired output file name 
-        :type hhm_file: :class:`str`
-
-        :return: Path to the profile file
-        :rtype: :class:`str`
-        """
-        hhmake = os.path.join(self.bin_dir, 'hhmake')
-        if not hhm_file:
-            hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
-        if os.path.exists(hhm_file):
-            return hhm_file
-        ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
-        p = subprocess.run('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
-                           shell=True, stdout=subprocess.PIPE,
-                           stderr=subprocess.PIPE)
-        lines = p.stdout.decode().splitlines()
-        for line in lines:
-            ost.LogVerbose(line.strip())
-        lines = p.stderr.decode().splitlines()
-        for line in lines:
-            ost.LogError(line.strip())
-
-        if p.returncode != 0:
-            raise IOError('could not convert a3m to hhm file')
-
-        if not os.path.exists(hhm_file):
-            raise RuntimeError('could not convert a3m to hhm file, no output')
-
-        return hhm_file
-
-    def A3MToCS(self, a3m_file, cs_file=None, options={}):
-        """
-        Converts the A3M alignment file to a column state sequence file. If
-        cs_file is not given, the output file will be set to
-        <:attr:`a3m_file`-basename>.seq219.
-
-        If the file was already produced, the existing file path is returned
-        without recomputing it.
-
-        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
-        :type a3m_file: :class:`str`
-
-        :param cs_file: Output file name (may be omitted)
-        :type cs_file: :class:`str`
-
-        :param options: Dictionary of options to *cstranslate*, one "-" is added
-                        in front of every key. Boolean True values add flag
-                        without value.
-        :type options: :class:`dict`
-
-        :return: Path to the column state sequence file
-        :rtype: :class:`str`
-        """
-        cstranslate = os.path.join(self.bin_dir, 'cstranslate')
-        if not cs_file:
-            cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
-        if os.path.exists(cs_file):
-            return cs_file
-        opt_cmd, _ = _ParseOptions(options)
-        cs_cmd = '%s -i %s -o %s %s' % (
-            cstranslate,
-            os.path.abspath(a3m_file),
-            os.path.abspath(cs_file),
-            opt_cmd)
-        ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
-        p = subprocess.run(cs_cmd, shell=True, cwd=self.working_dir,
-                           stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-        if not os.path.exists(cs_file):
-            raise RuntimeError('Creating column state sequence file failed, ' +
-                               'no output')
-
-        if b'Wrote abstract state sequence to' in p.stdout:
-            return cs_file
-        else:
-            raise RuntimeError('Creating column state sequence file failed')
-
-    def Cleanup(self):
-        """Delete temporary data.
-
-        Delete temporary data if no working dir was given. Controlled by
-        :attr:`needs_cleanup`.
-        """
-        if self.needs_cleanup and os.path.exists(self.working_dir):
-            shutil.rmtree(self.working_dir)
-
-    def CleanupFailed(self):
-        '''In case something went wrong, call to make sure everything is clean.
-
-        This will delete the working dir independently of :attr:`needs_cleanup`.
-        '''
-        store_needs_cleanup = self.needs_cleanup
-        self.needs_cleanup = True
-        self.Cleanup()
-        self.needs_cleanup = store_needs_cleanup
-
-    def Search(self, a3m_file, database, options={}, prefix=''):
-        """
-        Searches for templates in the given database. Before running the search,
-        the hhm file is copied. This makes it possible to launch several hhblits
-        instances at once. Upon success, the filename of the result file is
-        returned. This file may be parsed with :func:`ParseHHblitsOutput`.
-
-        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
-        :type a3m_file: :class:`str`
-
-        :param database: Search database, needs to be the common prefix of the
-                         database files
-        :type database: :class:`str`
-
-        :param options: Dictionary of options to *hhblits*, one "-" is added in
-                        front of every key. Boolean True values add flag without
-                        value. Merged with default options {'cpu': 1, 'n': 1},
-                        where 'n' defines the number of iterations.
-        :type options: :class:`dict`
+import re
 
-        :param prefix: Prefix to the result file
-        :type prefix: :class:`str`
+def GetHHblitsVersionString():
+    version_string = None
 
-        :return: The path to the result file
-        :rtype: :class:`str`
-        """
-        opts = {'cpu' : 1, # no. of cpus used
-                'n'   : 1}   # no. of iterations
-        opts.update(options)
-        opt_cmd, opt_str = _ParseOptions(opts)
-        base = os.path.basename(os.path.splitext(a3m_file)[0])
-        hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
-        hhr_file = os.path.join(self.working_dir, hhr_file)
-        search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s' % (
-            self.hhblits_bin,
-            opt_cmd,
-            os.path.abspath(a3m_file),
-            os.path.abspath(hhr_file),
-            os.path.join(os.path.abspath(os.path.split(database)[0]),
-                         os.path.split(database)[1]))
-        ost.LogInfo('searching %s' % database)
-        ost.LogVerbose(search_cmd)
-        p = subprocess.run(search_cmd, shell=True, cwd=self.working_dir,
-                           stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        lines = p.stdout.decode().splitlines()
-        for line in lines:
-            ost.LogVerbose(line.strip())
-        lines = p.stderr.decode().splitlines()
-        for line in lines:
-            ost.LogError(line.strip())
+    # raises if hhblits binary is not in path
+    hhblits_bin = settings.Locate('hhblits')
 
-        if p.returncode != 0:
-            raise RuntimeError('Sequence search failed')
+    # run hhblits to determine version
+    proc = subprocess.run([hhblits_bin, '-h'], stdout=subprocess.PIPE)
 
-        if not os.path.exists(hhr_file):
-            raise RuntimeError('Sequence search failed, no output')
+    # regular expression in the form
+    # HHblits, whatever except newline, x.y.z
+    # where x.y.z are the version numerals
+    version_line = re.search('HHblits[^\n]+\d+\.\d+\.\d+', proc.stdout.decode())
+    if version_line is not None:
+        version = re.search('\d+\.\d+\.\d+', version_line.group())
+        if version is not None:
+            version_string = version.group()
 
-        return hhr_file
+    return version_string
 
+hhblits_version_string = GetHHblitsVersionString()
 
-def _ParseOptions(opts):
-    """
-    :return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be
-             passed to command ("-" added in front of keys, options
-             separated by space) and opt_str (options separated by "_")
-             can be used for filenames.
-    :param opts: Dictionary of options, one "-" is added in front of every
-                 key. Boolean True values add flag without value.
-    """
-    opt_cmd = list()
-    opt_str = list()
-    for k, val in opts.items():
-        if type(val) == type(True):
-            if val == True:
-                opt_cmd.append('-%s' % str(k))
-                opt_str.append(str(k))
-        else:
-            opt_cmd.append('-%s %s' % (str(k), str(val)))
-            opt_str.append('%s%s' % (str(k), str(val)))
-    opt_cmd = ' '.join(opt_cmd)
-    opt_str = '_'.join(opt_str)
-    return opt_cmd, opt_str
+if hhblits_version_string is None:
+    raise RuntimeError('Could not determine HHblits version. Please '\
+                       'import the hhblits2 or hhblits3 binding explicitely.')
 
+hhblits_version = int(hhblits_version_string.split('.')[0])
 
-__all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader',
-           'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
-           'ParseHeaderLine']
+if hhblits_version == 2:
+    from ost.bindings.hhblits2 import *
+elif hhblits_version == 3:
+    from ost.bindings.hhblits3 import *
+else:
+    raise RuntimeError('Determined HHblits version to be %i. OpenStructure '\
+                       'only supports 2 or 3. If you think the detected '\
+                       'version is wrong and you have version 2 or 3, '\
+                       'import the hhblits2 or hhblits3 binding explicitely.'\
+                       %(hhblits_version))
 
-#  LocalWords:  HHblits MSA hhblits hhtools PSIPRED addss param nrdb str
-#  LocalWords:  cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
-#  LocalWords:  attr basename rtype cstranslate tuple HHblitsHeader meth aln
-#  LocalWords:  HHblitsHit iterable evalue pvalue neff hmms datetime
-#  LocalWords:  whitespace whitespaces
diff --git a/modules/bindings/pymod/hhblits2.py b/modules/bindings/pymod/hhblits2.py
new file mode 100644
index 000000000..ff08319a4
--- /dev/null
+++ b/modules/bindings/pymod/hhblits2.py
@@ -0,0 +1,792 @@
+'''HHblits wrapper classes and functions.
+'''
+
+import subprocess
+import datetime
+import os
+import shutil
+import tempfile
+
+import ost
+from ost import settings, seq
+from ost.bindings import utils
+
+class HHblitsHit:
+    """
+    A hit found by HHblits
+
+    .. attribute:: hit_id
+
+      String identifying the hit
+
+      :type: :class:`str`
+
+    .. attribute:: aln
+
+      Pairwise alignment containing the aligned part between the query and the
+      target. First sequence is the query, the second sequence the target.
+
+      :type: :class:`~ost.seq.AlignmentHandle`
+
+    .. attribute:: score
+
+      The alignment score
+
+      :type: :class:`float`
+
+    .. attribute:: ss_score
+
+      The secondary structure score
+
+      :type: :class:`float`
+
+    .. attribute:: evalue
+
+      The E-value of the alignment
+
+      :type: :class:`float`
+
+    .. attribute:: pvalue
+
+      The P-value of the alignment
+
+      :type: :class:`float`
+
+    .. attribute:: prob
+
+      The probability of the alignment (between 0 and 100)
+
+      :type: :class:`float`
+    """
+    def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
+        self.hit_id = hit_id
+        self.aln = aln
+        self.score = score
+        self.ss_score = ss_score
+        self.evalue = evalue
+        self.prob = prob
+        self.pvalue = pvalue
+
+class HHblitsHeader:
+    """Stats from the beginning of search output.
+
+    .. attribute:: query
+
+      The name of the query sequence
+
+      :type: :class:`str`
+
+    .. attribute:: match_columns
+
+      Total of aligned Match columns
+
+      :type: :class:`int`
+
+    .. attribute:: n_eff
+
+      Value of the ``-neff`` option
+
+      :type: :class:`float`
+
+    .. attribute:: searched_hmms
+
+      Number of profiles searched
+
+      :type: :class:`int`
+
+    .. attribute:: date
+
+      Execution date
+
+      :type: :class:`datetime.datetime`
+
+    .. attribute:: command
+
+      Command used to run
+
+      :type: :class:`str`
+    """
+    def __init__(self):
+        self.query = ''
+        self.match_columns = 0
+        self.n_eff = 0
+        self.searched_hmms = 0
+        self.date = None
+        self.command = ''
+
+def ParseHeaderLine(line):
+    '''Fetch header content.
+
+    First, we seek the start of the identifier, that is, the first whitespace
+    after the hit number + 1. Since the identifier may contain whitespaces
+    itself, we cannot split the whole line
+
+    :param line: Line from the output header.
+    :type line: :class:`str`
+
+    :return: Hit information and query/template offsets
+    :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`))
+    '''
+    for i in range(0, len(line)):
+        if line[i].isdigit():
+            break
+    for i in range(i, len(line)):
+        if line[i] == ' ':
+            break
+    assert len(line)-i >= 31 and line[i+1] != ' '
+    hit_id = line[i+1:i+31].strip()
+    fields = line[i+32:].split()
+    prob = float(fields[0])
+    evalue = float(fields[1])
+    pvalue = float(fields[2])
+    score = float(fields[3])
+    ss_score = float(fields[4])
+    offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
+    return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
+            offsets)
+
+def ParseHHblitsOutput(output):
+    """
+    Parses the HHblits output as produced by :meth:`HHblits.Search` and returns
+    the header of the search results and a list of hits.
+
+    :param output: Iterable containing the lines of the HHblits output file
+    :type output: iterable (e.g. an open file handle)
+
+    :return: a tuple of the header of the search results and the hits
+    :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`)
+    """
+    lines = iter(output)
+    def _ParseHeaderSection(lines):
+        value_start_column = 14
+        date_pattern = '%a %b %d %H:%M:%S %Y'
+        header = HHblitsHeader()
+        line = next(lines)
+        assert line.startswith('Query')
+        header.query = line[value_start_column:].strip()
+        line = next(lines)
+        assert line.startswith('Match_columns')
+        header.match_columns = int(line[value_start_column:].strip())
+
+        line = next(lines)
+        assert line.startswith('No_of_seqs')
+
+        line = next(lines)
+        assert line.startswith('Neff')
+        header.n_eff = float(line[value_start_column:].strip())
+
+        line = next(lines)
+        assert line.startswith('Searched_HMMs')
+        header.searched_hmms = int(line[value_start_column:].strip())
+
+        line = next(lines)
+        assert line.startswith('Date')
+        value = line[value_start_column:].strip()
+        header.date = datetime.datetime.strptime(value, date_pattern)
+
+        line = next(lines)
+        assert line.startswith('Command')
+        header.command = line[value_start_column:].strip()
+
+        line = next(lines)
+        assert len(line.strip()) == 0
+        return header
+
+    def _ParseTableOfContents(lines):
+        line = next(lines)
+        assert line.startswith(' No Hit')
+        hits = []
+        while True:
+            line = next(lines)
+            if len(line.strip()) == 0:
+                return hits
+            hits.append(ParseHeaderLine(line))
+        return hits
+
+    def _ParseResultBody(query_id, hits, lines):
+        entry_index = None
+        query_str = ''
+        templ_str = ''
+        def _MakeAln(query_id, hit_id, query_string, templ_string,
+                     q_offset, t_offset):
+            s1 = seq.CreateSequence(query_id, query_string)
+            s1.offset = q_offset-1
+            s2 = seq.CreateSequence(hit_id, templ_string)
+            s2.offset = t_offset-1
+            return seq.CreateAlignment(s1, s2)
+        try:
+            while True:
+                # Lines which we are interested in:
+                # - "Done!" -> end of list
+                # - "No ..." -> next item in list
+                # - "T <hit_id> <start> <data> <end>"
+                # - "Q <query_id> <start> <data> <end>"
+                # -> rest is to be skipped
+                line = next(lines)
+                if len(line.strip()) == 0:
+                    continue
+                if line.startswith('Done!'):
+                    if len(query_str) > 0:
+                        hits[entry_index][0].aln = _MakeAln(\
+                            query_id, hits[entry_index][0].hit_id,
+                            query_str, templ_str, *hits[entry_index][1])
+                    return [h for h, o in hits]
+                if line.startswith('No '):
+                    if len(query_str) > 0:
+                        hits[entry_index][0].aln = _MakeAln(\
+                            query_id, hits[entry_index][0].hit_id,
+                            query_str, templ_str, *hits[entry_index][1])
+                    entry_index = int(line[3:].strip())-1
+                    line = next(lines)
+                    hits[entry_index][0].hit_id = line[1:].strip()
+                    query_str = ''
+                    templ_str = ''
+                    # skip the next line. It doesn't contain information we
+                    # don't already know
+                    next(lines)
+                    continue
+                assert entry_index != None
+                # Skip all "T ..." and "Q ..." lines besides the one we want
+                if line[1:].startswith(' Consensus'):
+                    continue
+                if line[1:].startswith(' ss_pred'):
+                    continue
+                if line[1:].startswith(' ss_conf'):
+                    continue
+                if line[1:].startswith(' ss_dssp'):
+                    continue
+                if line.startswith('T '):
+                    for start_pos in range(22, len(line)):
+                        if line[start_pos].isalpha() or line[start_pos] == '-':
+                            break
+                    end_pos = line.find(' ', start_pos)
+                    # this can fail if we didn't skip all other "T ..." lines
+                    if end_pos == -1:
+                        error_str = "Unparsable line '%s' for entry No %d" \
+                                    % (line.strip(), entry_index + 1)
+                        raise AssertionError(error_str)
+                    templ_str += line[start_pos:end_pos]
+                if line.startswith('Q '):
+                    for start_pos in range(22, len(line)):
+                        if line[start_pos].isalpha() or line[start_pos] == '-':
+                            break
+                    end_pos = line.find(' ', start_pos)
+                    # this can fail if we didn't skip all other "Q ..." lines
+                    if end_pos == -1:
+                        error_str = "Unparsable line '%s' for entry No %d" \
+                                    % (line.strip(), entry_index + 1)
+                        raise AssertionError(error_str)
+                    query_str += line[start_pos:end_pos]
+        except StopIteration:
+            if len(query_str) > 0:
+                hits[entry_index][0].aln = _MakeAln(query_id,
+                                                    hits[entry_index][0].hit_id,
+                                                    query_str, templ_str,
+                                                    *hits[entry_index][1])
+            return [h for h, o in hits]
+    header = _ParseHeaderSection(lines)
+    # parse the table of contents. This is neccessary as some of the properties
+    # (i.e. start of alignment) we need are only given there. From the TOC we
+    # create a list of hits that is then further filled with data when we parse
+    # the actual result body
+    hits = _ParseTableOfContents(lines)
+    return header, _ParseResultBody(header.query, hits, lines)
+
+def ParseA3M(a3m_file):
+    '''
+    Parse secondary structure information and the multiple sequence alignment 
+    out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
+    
+    :param a3m_file: Iterable containing the lines of the A3M file
+    :type a3m_file: iterable (e.g. an open file handle)
+    
+    :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
+             (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
+    '''
+    profile_dict = dict()
+    state = 'NONE'
+    pred_seq_txt = ''
+    conf_seq_txt = ''
+    msa_seq = list()
+    msa_head = list()
+    for line in a3m_file:
+        if len(line.rstrip()) == 0:
+            continue
+        elif line.startswith('>ss_pred'):
+            state = 'sspred'
+            continue
+        elif line.startswith('>ss_conf'):
+            state = 'ssconf'
+            continue
+        elif line[0] == '>':
+            if state == 'ssconf' or state == 'msa':
+                msa_seq.append('')
+                msa_head.append(line[1:].rstrip())
+            else:
+                raise IOError('The A3M file is missing the "ss_conf" section')
+            state = 'msa'
+            continue
+
+        if state == 'sspred':
+            pred_seq_txt += line.rstrip()
+        elif state == 'ssconf':
+            conf_seq_txt += line.rstrip()
+        elif state == 'msa':
+            msa_seq[len(msa_seq)-1] += line.rstrip()
+
+    profile_dict['ss_pred'] = list()
+    profile_dict['ss_conf'] = list()
+    for i in range(0, len(pred_seq_txt)):
+        profile_dict['ss_pred'].append(pred_seq_txt[i])
+        profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
+    
+    # post processing
+    # MSA
+    profile_dict['msa'] = None
+    if len(msa_seq) > 1:
+        t = msa_seq[0]
+        al = seq.AlignmentList()
+        for i in range(1, len(msa_seq)):
+            qs = ''
+            ts = ''
+            k = 0
+            for c in msa_seq[i]:
+                if c.islower():
+                    qs += '-'
+                    ts += c.upper()
+                else:
+                    qs += t[k]
+                    ts += c
+                    k += 1
+            nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), 
+                                     seq.CreateSequence(msa_head[i], ts))
+            al.append(nl)
+        profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
+            al, seq.CreateSequence(msa_head[0], t))
+    return profile_dict
+
+
+def ParseHHM(profile):
+    '''
+    Parse secondary structure information and the MSA out of an HHM profile as
+    produced by :meth:`HHblits.A3MToProfile`.
+
+    :param profile: Opened file handle holding the profile.
+    :type profile: :class:`file`
+
+    :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
+             (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
+             "consensus" (:class:`~ost.seq.SequenceHandle`).
+    '''
+    profile_dict = dict()
+    state = 'NONE'
+    pred_seq_txt = ''
+    conf_seq_txt = ''
+    consensus_txt = ''
+    msa_seq = list()
+    msa_head = list()
+    for line in profile:
+        if len(line.rstrip()) == 0:
+            continue
+        if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
+            state = 'sspred'
+            continue
+        elif line.rstrip() == '>ss_conf PSIPRED confidence values':
+            state = 'ssconf'
+            continue
+        elif line.rstrip() == '>Consensus':
+            state = 'consensus'
+            continue
+        elif line[0] == '>':
+            if state == 'consensus' or state == 'msa':
+                msa_seq.append('')
+                msa_head.append(line[1:].rstrip())
+            else:
+                raise IOError('Profile file "%s" is missing ' % profile.name+
+                              'the "Consensus" section')
+            state = 'msa'
+            continue
+        elif line[0] == '#':
+            state = 'NONE'
+            continue
+
+        if state == 'sspred':
+            pred_seq_txt += line.rstrip()
+        elif state == 'ssconf':
+            conf_seq_txt += line.rstrip()
+        elif state == 'msa':
+            msa_seq[len(msa_seq)-1] += line.rstrip()
+        elif state == 'consensus':
+            consensus_txt += line.rstrip()
+
+    profile_dict['ss_pred'] = list()
+    profile_dict['ss_conf'] = list()
+    for i in range(0, len(pred_seq_txt)):
+        profile_dict['ss_pred'].append(pred_seq_txt[i])
+        profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
+
+    # post processing
+    # MSA
+    profile_dict['msa'] = None
+    if len(msa_seq):
+        t = msa_seq[0]
+        al = seq.AlignmentList()
+        for i in range(1, len(msa_seq)):
+            qs = ''
+            ts = ''
+            k = 0
+            for c in msa_seq[i]:
+                if c.islower():
+                    qs += '-'
+                    ts += c.upper()
+                else:
+                    qs += t[k]
+                    ts += c
+                    k += 1
+            nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
+                                     seq.CreateSequence(msa_head[i], ts))
+            al.append(nl)
+        profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
+            al, seq.CreateSequence(msa_head[0], t))
+        #print profile_dict['msa'].ToString(80)
+    # Consensus
+    profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
+
+    return profile_dict
+
+
+class HHblits:
+    """
+    Initialise a new HHblits "search" for the given query. Query may either
+    be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
+    query is the actual query sequence, in the latter case, the query is the
+    filename to the file containing the query.
+
+    :param query: Query sequence as file or sequence.
+    :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
+    :param hhsuite_root: Path to the top-level directory of your hhsuite
+                         installation.
+    :type hhsuite_root: :class:`str`
+    :param hhblits_bin: Name of the hhblits binary. Will only be used if
+                        :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
+    :type hhblits_bin: :class:`str`
+    :param working_dir: Directory for temporary files. Will be created if not
+                        present but **not** automatically deleted.
+    :type working_dir: :class:`str`
+    """
+    OUTPUT_PREFIX = 'query_hhblits'
+    def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
+        self.query = query
+        self.hhsuite_root = hhsuite_root
+        if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')):
+            self.bin_dir = os.path.join(self.hhsuite_root, 'bin')
+            self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits')
+        else:
+            self.hhblits_bin = settings.Locate('hhblits',
+                                               explicit_file_name=hhblits_bin)
+            self.bin_dir = os.path.dirname(self.hhblits_bin)
+            # guess root folder (note: this may fail in future)
+            self.hhsuite_root = os.path.dirname(self.bin_dir)
+        self.hhlib_dir = os.path.join(self.hhsuite_root, 'lib', 'hh')
+        if working_dir:
+            self.needs_cleanup = False
+            self.working_dir = working_dir
+            if not os.path.exists(working_dir):
+                os.mkdir(working_dir)
+            if isinstance(query, str):
+                self.filename = os.path.abspath(os.path.join(
+                    self.working_dir, os.path.basename(query)))
+                if self.filename != os.path.abspath(query):
+                    shutil.copy(query, self.filename)
+            else:
+                self.filename = os.path.join(self.working_dir,
+                                             '%s.fasta' % HHblits.OUTPUT_PREFIX)
+                ost.io.SaveSequence(query, self.filename)
+        else:
+            self.needs_cleanup = True
+            if isinstance(query, str):
+                self.working_dir = tempfile.mkdtemp()
+                self.filename = os.path.abspath(os.path.join(
+                    self.working_dir, os.path.basename(query)))
+                shutil.copy(query, self.filename)
+            else:
+                tmp_dir = utils.TempDirWithFiles((query,))
+                self.working_dir = tmp_dir.dirname
+                self.filename = tmp_dir.files[0]
+
+    def BuildQueryMSA(self, nrdb, options={}, a3m_file=None):
+        """Builds the MSA for the query sequence.
+
+        This function directly uses hhblits of hhtools. While in theory it would
+        be possible to do this by PSI-blasting on our own, hhblits is supposed
+        to be faster. Also it is supposed to prevent alignment corruption. The
+        alignment corruption is caused by low-scoring terminal alignments that
+        draw the sequences found by PSI-blast away from the optimum. By removing
+        these low scoring ends, part of the alignment corruption can be
+        suppressed.
+
+        hhblits does **not** call PSIPRED on the MSA to predict the secondary
+        structure of the query sequence. This is done by addss.pl of hhtools.
+        The predicted secondary structure is stored together with the sequences
+        identified by hhblits.
+
+        The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
+        already produced, hhblits is not called again and the existing file path
+        is returned.
+
+        :param nrdb: Database to be align against; has to be an hhblits database
+        :type nrdb: :class:`str`
+
+        :param options: Dictionary of options to *hhblits*, one "-" is added in
+                        front of every key. Boolean True values add flag without
+                        value. Merged with default options {'cpu': 1, 'n': 1},
+                        where 'n' defines the number of iterations.
+        :type options: :class:`dict`
+
+        :param a3m_file: a path of a3m_file to be used, optional
+        :type a3m_file: :class:`str`
+
+        :return: The path to the A3M file containing the MSA
+        :rtype: :class:`str`
+        """
+        if a3m_file is None:
+            a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0]
+        else:
+            a3m_file = os.path.abspath(a3m_file)
+        if os.path.exists(a3m_file):
+            ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file)
+            return a3m_file
+        ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root)
+        full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
+                                 os.path.split(nrdb)[1])
+        # create MSA
+        opts = {'cpu' : 1, # no. of cpus used
+                'n'   : 1}   # no. of iterations
+        opts.update(options)
+        opt_cmd, _ = _ParseOptions(opts)
+        hhblits_cmd = '%s -e 0.001 -i %s -oa3m %s -d %s %s' % \
+                      (self.hhblits_bin, self.filename, a3m_file, full_nrdb,
+                       opt_cmd)
+        job = subprocess.Popen(hhblits_cmd, shell=True, cwd=self.working_dir,
+                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        sout, _ = job.communicate()
+        lines = sout.decode().splitlines()
+        for line in lines:
+            ost.LogVerbose(line.strip())
+        if not os.path.exists(a3m_file):
+            ost.LogWarning('Building query profile failed, no output')
+            return a3m_file
+        # add secondary structure annotation
+        addss_cmd = "%s %s" % (os.path.join(self.hhsuite_root,
+                                            'lib/hh/scripts/addss.pl'),
+                               a3m_file)
+        env = dict(os.environ)
+        env.update({'PERL5LIB' : os.path.join(self.hhsuite_root,
+                                              'lib/hh/scripts'),
+                    'HHLIB' : self.hhlib_dir,
+                    'PATH' : '%s:%s' % (os.path.join(self.hhsuite_root, 'bin'),
+                                        os.environ['PATH'])})
+        job = subprocess.Popen(addss_cmd, shell=True, cwd=self.working_dir,
+                               env=env, stdout=subprocess.PIPE,
+                               stderr=subprocess.PIPE)
+        sout, serr = job.communicate()
+        lines = sout.decode().splitlines()
+        for line in lines:
+            if 'error' in line.lower():
+                ost.LogWarning('Predicting secondary structure for MSA '+
+                               '(%s) failed, on command: %s' % (a3m_file, line))
+                return a3m_file
+        return a3m_file
+
+    def A3MToProfile(self, a3m_file, hhm_file=None):
+        """
+        Converts the A3M alignment file to a hhm profile. If hhm_file is not
+        given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
+
+        The produced HHM file can be parsed by :func:`ParseHHM`.
+
+        If the file was already produced, the existing file path is returned
+        without recomputing it.
+
+        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
+        :type a3m_file: :class:`str`
+
+        :param hhm_file: Desired output file name 
+        :type hhm_file: :class:`str`
+
+        :return: Path to the profile file
+        :rtype: :class:`str`
+        """
+        hhmake = os.path.join(self.bin_dir, 'hhmake')
+        if not hhm_file:
+            hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
+        if os.path.exists(hhm_file):
+            return hhm_file
+        ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
+        os.putenv('HHLIB', self.hhlib_dir)
+        job = subprocess.Popen('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
+                               shell=True, stdout=subprocess.PIPE,
+                               stderr=subprocess.PIPE)
+        sout, serr = job.communicate()
+        lines = serr.decode().splitlines()
+        for line in lines:
+            ost.LogWarning(line)
+        lines = sout.decode().splitlines()
+        for line in lines:
+            ost.LogVerbose(line)
+        if job.returncode !=0:
+            raise IOError('could not convert a3m to hhm file')
+        return hhm_file
+
+    def A3MToCS(self, a3m_file, cs_file=None, options={}):
+        """
+        Converts the A3M alignment file to a column state sequence file. If
+        cs_file is not given, the output file will be set to
+        <:attr:`a3m_file`-basename>.seq219.
+
+        If the file was already produced, the existing file path is returned
+        without recomputing it.
+
+        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
+        :type a3m_file: :class:`str`
+
+        :param cs_file: Output file name (may be omitted)
+        :type cs_file: :class:`str`
+
+        :param options: Dictionary of options to *cstranslate*, one "-" is added
+                        in front of every key. Boolean True values add flag
+                        without value.
+        :type options: :class:`dict`
+
+        :return: Path to the column state sequence file
+        :rtype: :class:`str`
+        """
+        cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate')
+        if not cs_file:
+            cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
+        if os.path.exists(cs_file):
+            return cs_file
+        opt_cmd, _ = _ParseOptions(options)
+        cs_cmd = '%s -i %s -o %s %s' % (
+            cstranslate,
+            os.path.abspath(a3m_file),
+            os.path.abspath(cs_file),
+            opt_cmd)
+        ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
+        job = subprocess.Popen(cs_cmd, shell=True, cwd=self.working_dir,
+                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        sout, _ = job.communicate()
+        if b'Wrote abstract state sequence to' in sout:
+            return cs_file
+
+        ost.LogWarning('Creating column state sequence file (%s) failed' % \
+                       cs_file)
+
+    def Cleanup(self):
+        """Delete temporary data.
+
+        Delete temporary data if no working dir was given. Controlled by
+        :attr:`needs_cleanup`.
+        """
+        if self.needs_cleanup and os.path.exists(self.working_dir):
+            shutil.rmtree(self.working_dir)
+
+    def CleanupFailed(self):
+        '''In case something went wrong, call to make sure everything is clean.
+
+        This will delete the working dir independently of :attr:`needs_cleanup`.
+        '''
+        store_needs_cleanup = self.needs_cleanup
+        self.needs_cleanup = True
+        self.Cleanup()
+        self.needs_cleanup = store_needs_cleanup
+
+    def Search(self, a3m_file, database, options={}, prefix=''):
+        """
+        Searches for templates in the given database. Before running the search,
+        the hhm file is copied. This makes it possible to launch several hhblits
+        instances at once. Upon success, the filename of the result file is
+        returned. This file may be parsed with :func:`ParseHHblitsOutput`.
+
+        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
+        :type a3m_file: :class:`str`
+
+        :param database: Search database, needs to be the common prefix of the
+                         database files
+        :type database: :class:`str`
+
+        :param options: Dictionary of options to *hhblits*, one "-" is added in
+                        front of every key. Boolean True values add flag without
+                        value. Merged with default options {'cpu': 1, 'n': 1},
+                        where 'n' defines the number of iterations.
+        :type options: :class:`dict`
+
+        :param prefix: Prefix to the result file
+        :type prefix: :class:`str`
+
+        :return: The path to the result file
+        :rtype: :class:`str`
+        """
+        opts = {'cpu' : 1, # no. of cpus used
+                'n'   : 1}   # no. of iterations
+        opts.update(options)
+        opt_cmd, opt_str = _ParseOptions(opts)
+        base = os.path.basename(os.path.splitext(a3m_file)[0])
+        hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
+        hhr_file = os.path.join(self.working_dir, hhr_file)
+        search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s' % (
+            self.hhblits_bin,
+            opt_cmd,
+            os.path.abspath(a3m_file),
+            os.path.abspath(hhr_file),
+            os.path.join(os.path.abspath(os.path.split(database)[0]),
+                         os.path.split(database)[1]))
+        ost.LogInfo('searching %s' % database)
+        ost.LogVerbose(search_cmd)
+        job = subprocess.Popen(search_cmd, shell=True, cwd=self.working_dir,
+                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        sout, serr = job.communicate()
+        if job.returncode != 0:
+            lines = sout.decode().splitlines()
+            for line in lines:
+                ost.LogError(line.strip())
+            lines = serr.decode().splitlines()
+            for line in lines:
+                ost.LogError(line.strip())
+            return None
+        return hhr_file
+
+
+def _ParseOptions(opts):
+    """
+    :return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be
+             passed to command ("-" added in front of keys, options
+             separated by space) and opt_str (options separated by "_")
+             can be used for filenames.
+    :param opts: Dictionary of options, one "-" is added in front of every
+                 key. Boolean True values add flag without value.
+    """
+    opt_cmd = list()
+    opt_str = list()
+    for k, val in opts.items():
+        if type(val) == type(True):
+            if val == True:
+                opt_cmd.append('-%s' % str(k))
+                opt_str.append(str(k))
+        else:
+            opt_cmd.append('-%s %s' % (str(k), str(val)))
+            opt_str.append('%s%s' % (str(k), str(val)))
+    opt_cmd = ' '.join(opt_cmd)
+    opt_str = '_'.join(opt_str)
+    return opt_cmd, opt_str
+
+
+__all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader',
+           'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
+           'ParseHeaderLine']
+
+#  LocalWords:  HHblits MSA hhblits hhtools PSIPRED addss param nrdb str
+#  LocalWords:  cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
+#  LocalWords:  attr basename rtype cstranslate tuple HHblitsHeader meth aln
+#  LocalWords:  HHblitsHit iterable evalue pvalue neff hmms datetime
+#  LocalWords:  whitespace whitespaces
diff --git a/modules/bindings/pymod/hhblits3.py b/modules/bindings/pymod/hhblits3.py
new file mode 100644
index 000000000..0e34a97c1
--- /dev/null
+++ b/modules/bindings/pymod/hhblits3.py
@@ -0,0 +1,844 @@
+'''HHblits wrapper classes and functions.
+'''
+
+import subprocess
+import datetime
+import os
+import shutil
+import tempfile
+
+import ost
+from ost import settings, seq
+from ost.bindings import utils
+
+class HHblitsHit:
+    """
+    A hit found by HHblits
+
+    .. attribute:: hit_id
+
+      String identifying the hit
+
+      :type: :class:`str`
+
+    .. attribute:: aln
+
+      Pairwise alignment containing the aligned part between the query and the
+      target. First sequence is the query, the second sequence the target.
+
+      :type: :class:`~ost.seq.AlignmentHandle`
+
+    .. attribute:: score
+
+      The alignment score
+
+      :type: :class:`float`
+
+    .. attribute:: ss_score
+
+      The secondary structure score
+
+      :type: :class:`float`
+
+    .. attribute:: evalue
+
+      The E-value of the alignment
+
+      :type: :class:`float`
+
+    .. attribute:: pvalue
+
+      The P-value of the alignment
+
+      :type: :class:`float`
+
+    .. attribute:: prob
+
+      The probability of the alignment (between 0 and 100)
+
+      :type: :class:`float`
+    """
+    def __init__(self, hit_id, aln, score, ss_score, evalue, pvalue, prob):
+        self.hit_id = hit_id
+        self.aln = aln
+        self.score = score
+        self.ss_score = ss_score
+        self.evalue = evalue
+        self.prob = prob
+        self.pvalue = pvalue
+
+class HHblitsHeader:
+    """Stats from the beginning of search output.
+
+    .. attribute:: query
+
+      The name of the query sequence
+
+      :type: :class:`str`
+
+    .. attribute:: match_columns
+
+      Total of aligned Match columns
+
+      :type: :class:`int`
+
+    .. attribute:: n_eff
+
+      Value of the ``-neff`` option
+
+      :type: :class:`float`
+
+    .. attribute:: searched_hmms
+
+      Number of profiles searched
+
+      :type: :class:`int`
+
+    .. attribute:: date
+
+      Execution date
+
+      :type: :class:`datetime.datetime`
+
+    .. attribute:: command
+
+      Command used to run
+
+      :type: :class:`str`
+    """
+    def __init__(self):
+        self.query = ''
+        self.match_columns = 0
+        self.n_eff = 0
+        self.searched_hmms = 0
+        self.date = None
+        self.command = ''
+
+def ParseHeaderLine(line):
+    '''Fetch header content.
+
+    First, we seek the start of the identifier, that is, the first whitespace
+    after the hit number + 1. Since the identifier may contain whitespaces
+    itself, we cannot split the whole line
+
+    :param line: Line from the output header.
+    :type line: :class:`str`
+
+    :return: Hit information and query/template offsets
+    :rtype: (:class:`HHblitsHit`, (:class:`int`, :class:`int`))
+    '''
+    for i in range(0, len(line)):
+        if line[i].isdigit():
+            break
+    for i in range(i, len(line)):
+        if line[i] == ' ':
+            break
+    assert len(line)-i >= 31 and line[i+1] != ' '
+    hit_id = line[i+1:i+31].strip()
+    fields = line[i+32:].split()
+    prob = float(fields[0])
+    evalue = float(fields[1])
+    pvalue = float(fields[2])
+    score = float(fields[3])
+    ss_score = float(fields[4])
+    offsets = (int(fields[6].split('-')[0]), int(fields[7].split('-')[0]))
+    return (HHblitsHit(hit_id, None, score, ss_score, evalue, pvalue, prob),
+            offsets)
+
+def ParseHHblitsOutput(output):
+    """
+    Parses the HHblits output as produced by :meth:`HHblits.Search` and returns
+    the header of the search results and a list of hits.
+
+    :param output: Iterable containing the lines of the HHblits output file
+    :type output: iterable (e.g. an open file handle)
+
+    :return: a tuple of the header of the search results and the hits
+    :rtype: (:class:`HHblitsHeader`, :class:`list` of :class:`HHblitsHit`)
+    """
+    lines = iter(output)
+    def _ParseHeaderSection(lines):
+        value_start_column = 14
+        date_pattern = '%a %b %d %H:%M:%S %Y'
+        header = HHblitsHeader()
+        line = next(lines)
+        assert line.startswith('Query')
+        header.query = line[value_start_column:].strip()
+        line = next(lines)
+        assert line.startswith('Match_columns')
+        header.match_columns = int(line[value_start_column:].strip())
+
+        line = next(lines)
+        assert line.startswith('No_of_seqs')
+
+        line = next(lines)
+        assert line.startswith('Neff')
+        header.n_eff = float(line[value_start_column:].strip())
+
+        line = next(lines)
+        assert line.startswith('Searched_HMMs')
+        header.searched_hmms = int(line[value_start_column:].strip())
+
+        line = next(lines)
+        assert line.startswith('Date')
+        value = line[value_start_column:].strip()
+        header.date = datetime.datetime.strptime(value, date_pattern)
+
+        line = next(lines)
+        assert line.startswith('Command')
+        header.command = line[value_start_column:].strip()
+
+        line = next(lines)
+        assert len(line.strip()) == 0
+        return header
+
+    def _ParseTableOfContents(lines):
+        line = next(lines)
+        assert line.startswith(' No Hit')
+        hits = []
+        while True:
+            line = next(lines)
+            if len(line.strip()) == 0:
+                return hits
+            hits.append(ParseHeaderLine(line))
+        return hits
+
+    def _ParseResultBody(query_id, hits, lines):
+        entry_index = None
+        query_str = ''
+        templ_str = ''
+        def _MakeAln(query_id, hit_id, query_string, templ_string,
+                     q_offset, t_offset):
+            s1 = seq.CreateSequence(query_id, query_string)
+            s1.offset = q_offset-1
+            s2 = seq.CreateSequence(hit_id, templ_string)
+            s2.offset = t_offset-1
+            return seq.CreateAlignment(s1, s2)
+        try:
+            while True:
+                # Lines which we are interested in:
+                # - "Done!" -> end of list
+                # - "No ..." -> next item in list
+                # - "T <hit_id> <start> <data> <end>"
+                # - "Q <query_id> <start> <data> <end>"
+                # -> rest is to be skipped
+                line = next(lines)
+                if len(line.strip()) == 0:
+                    continue
+                if line.startswith('Done!'):
+                    if len(query_str) > 0:
+                        hits[entry_index][0].aln = _MakeAln(\
+                            query_id, hits[entry_index][0].hit_id,
+                            query_str, templ_str, *hits[entry_index][1])
+                    return [h for h, o in hits]
+                if line.startswith('No '):
+                    if len(query_str) > 0:
+                        hits[entry_index][0].aln = _MakeAln(\
+                            query_id, hits[entry_index][0].hit_id,
+                            query_str, templ_str, *hits[entry_index][1])
+                    entry_index = int(line[3:].strip())-1
+                    line = next(lines)
+                    hits[entry_index][0].hit_id = line[1:].strip()
+                    query_str = ''
+                    templ_str = ''
+                    # skip the next line. It doesn't contain information we
+                    # don't already know
+                    next(lines)
+                    continue
+                assert entry_index != None
+                # Skip all "T ..." and "Q ..." lines besides the one we want
+                if line[1:].startswith(' Consensus'):
+                    continue
+                if line[1:].startswith(' ss_pred'):
+                    continue
+                if line[1:].startswith(' ss_conf'):
+                    continue
+                if line[1:].startswith(' ss_dssp'):
+                    continue
+                if line.startswith('T '):
+                    for start_pos in range(22, len(line)):
+                        if line[start_pos].isalpha() or line[start_pos] == '-':
+                            break
+                    end_pos = line.find(' ', start_pos)
+                    # this can fail if we didn't skip all other "T ..." lines
+                    if end_pos == -1:
+                        error_str = "Unparsable line '%s' for entry No %d" \
+                                    % (line.strip(), entry_index + 1)
+                        raise AssertionError(error_str)
+                    templ_str += line[start_pos:end_pos]
+                if line.startswith('Q '):
+                    for start_pos in range(22, len(line)):
+                        if line[start_pos].isalpha() or line[start_pos] == '-':
+                            break
+                    end_pos = line.find(' ', start_pos)
+                    # this can fail if we didn't skip all other "Q ..." lines
+                    if end_pos == -1:
+                        error_str = "Unparsable line '%s' for entry No %d" \
+                                    % (line.strip(), entry_index + 1)
+                        raise AssertionError(error_str)
+                    query_str += line[start_pos:end_pos]
+        except StopIteration:
+            if len(query_str) > 0:
+                hits[entry_index][0].aln = _MakeAln(query_id,
+                                                    hits[entry_index][0].hit_id,
+                                                    query_str, templ_str,
+                                                    *hits[entry_index][1])
+            return [h for h, o in hits]
+    header = _ParseHeaderSection(lines)
+    # parse the table of contents. This is neccessary as some of the properties
+    # (i.e. start of alignment) we need are only given there. From the TOC we
+    # create a list of hits that is then further filled with data when we parse
+    # the actual result body
+    hits = _ParseTableOfContents(lines)
+    return header, _ParseResultBody(header.query, hits, lines)
+
+def ParseA3M(a3m_file):
+    '''
+    Parse secondary structure information and the multiple sequence alignment 
+    out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
+    
+    :param a3m_file: Iterable containing the lines of the A3M file
+    :type a3m_file: iterable (e.g. an open file handle)
+    
+    :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
+             (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
+             If not available, "ss_pred" and "ss_conf" entries are set to None.
+    '''
+    profile_dict = dict()
+    state = 'NONE'
+    pred_seq_txt = ''
+    conf_seq_txt = ''
+    msa_seq = list()
+    msa_head = list()
+    for line in a3m_file:
+        if len(line.rstrip()) == 0:
+            continue
+        elif line.startswith('>ss_pred'):
+            state = 'sspred'
+            continue
+        elif line.startswith('>ss_conf'):
+            state = 'ssconf'
+            continue
+        elif line[0] == '>':
+            msa_seq.append('')
+            msa_head.append(line[1:].rstrip())
+            state = 'msa'
+            continue
+
+        if state == 'sspred':
+            pred_seq_txt += line.rstrip()
+        elif state == 'ssconf':
+            conf_seq_txt += line.rstrip()
+        elif state == 'msa':
+            msa_seq[len(msa_seq)-1] += line.rstrip()
+
+    if len(pred_seq_txt) > 0:
+        profile_dict['ss_pred'] = list()
+        profile_dict['ss_conf'] = list()
+        for i in range(0, len(pred_seq_txt)):
+            profile_dict['ss_pred'].append(pred_seq_txt[i])
+            profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
+    else:
+        profile_dict['ss_pred'] = None
+        profile_dict['ss_conf'] = None    
+
+    # post processing
+    # MSA
+    profile_dict['msa'] = None
+    if len(msa_seq) > 1:
+        t = msa_seq[0]
+        al = seq.AlignmentList()
+        for i in range(1, len(msa_seq)):
+            qs = ''
+            ts = ''
+            k = 0
+            for c in msa_seq[i]:
+                if c.islower():
+                    qs += '-'
+                    ts += c.upper()
+                else:
+                    qs += t[k]
+                    ts += c
+                    k += 1
+            nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), 
+                                     seq.CreateSequence(msa_head[i], ts))
+            al.append(nl)
+        profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
+            al, seq.CreateSequence(msa_head[0], t))
+    return profile_dict
+
+
+def ParseHHM(profile):
+    '''
+    Parse secondary structure information and the MSA out of an HHM profile as
+    produced by :meth:`HHblits.A3MToProfile`.
+
+    :param profile: Opened file handle holding the profile.
+    :type profile: :class:`file`
+
+    :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
+             (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
+             "consensus" (:class:`~ost.seq.SequenceHandle`).
+             If not available, "ss_pred" and "ss_conf" entries are set to None.
+    '''
+    profile_dict = dict()
+    state = 'NONE'
+    pred_seq_txt = ''
+    conf_seq_txt = ''
+    consensus_txt = ''
+    msa_seq = list()
+    msa_head = list()
+    for line in profile:
+        if len(line.rstrip()) == 0:
+            continue
+        if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
+            state = 'sspred'
+            continue
+        elif line.rstrip() == '>ss_conf PSIPRED confidence values':
+            state = 'ssconf'
+            continue
+        elif line.rstrip() == '>Consensus':
+            state = 'consensus'
+            continue
+        elif line[0] == '>':
+            if state == 'consensus' or state == 'msa':
+                msa_seq.append('')
+                msa_head.append(line[1:].rstrip())
+            else:
+                raise IOError('Profile file "%s" is missing ' % profile.name+
+                              'the "Consensus" section')
+            state = 'msa'
+            continue
+        elif line[0] == '#':
+            state = 'NONE'
+            continue
+
+        if state == 'sspred':
+            pred_seq_txt += line.rstrip()
+        elif state == 'ssconf':
+            conf_seq_txt += line.rstrip()
+        elif state == 'msa':
+            msa_seq[len(msa_seq)-1] += line.rstrip()
+        elif state == 'consensus':
+            consensus_txt += line.rstrip()
+
+    if len(pred_seq_txt) > 0:
+        profile_dict['ss_pred'] = list()
+        profile_dict['ss_conf'] = list()
+        for i in range(0, len(pred_seq_txt)):
+            profile_dict['ss_pred'].append(pred_seq_txt[i])
+            profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
+    else:
+        profile_dict['ss_pred'] = None
+        profile_dict['ss_conf'] = None
+
+    # post processing
+    # MSA
+    profile_dict['msa'] = None
+    if len(msa_seq):
+        t = msa_seq[0]
+        al = seq.AlignmentList()
+        for i in range(1, len(msa_seq)):
+            qs = ''
+            ts = ''
+            k = 0
+            for c in msa_seq[i]:
+                if c.islower():
+                    qs += '-'
+                    ts += c.upper()
+                else:
+                    qs += t[k]
+                    ts += c
+                    k += 1
+            nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
+                                     seq.CreateSequence(msa_head[i], ts))
+            al.append(nl)
+        profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
+            al, seq.CreateSequence(msa_head[0], t))
+    # Consensus
+    profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
+
+    return profile_dict
+
+
+class HHblits:
+    """
+    Initialise a new HHblits "search" for the given query. Query may either
+    be a :class:`~ost.seq.SequenceHandle` or a string. In the former case, the
+    query is the actual query sequence, in the latter case, the query is the
+    filename to the file containing the query.
+
+    :param query: Query sequence as file or sequence.
+    :type query: :class:`~ost.seq.SequenceHandle` or :class:`str`
+    :param hhsuite_root: Path to the top-level directory of your hhsuite
+                         installation.
+    :type hhsuite_root: :class:`str`
+    :param hhblits_bin: Name of the hhblits binary. Will only be used if
+                        :attr:`hhsuite_root`:file:`/bin/hhblits` does not exist.
+    :type hhblits_bin: :class:`str`
+    :param working_dir: Directory for temporary files. Will be created if not
+                        present but **not** automatically deleted.
+    :type working_dir: :class:`str`
+    """
+    OUTPUT_PREFIX = 'query_hhblits'
+    def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
+        self.query = query
+        self.hhsuite_root = hhsuite_root
+        if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')):
+            self.bin_dir = os.path.join(self.hhsuite_root, 'bin')
+            self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits')
+        else:
+            self.hhblits_bin = settings.Locate('hhblits',
+                                               explicit_file_name=hhblits_bin)
+            self.bin_dir = os.path.dirname(self.hhblits_bin)
+            # guess root folder (note: this may fail in future)
+            self.hhsuite_root = os.path.dirname(self.bin_dir)
+                                             
+        if working_dir:
+            self.needs_cleanup = False
+            self.working_dir = working_dir
+            if not os.path.exists(working_dir):
+                os.mkdir(working_dir)
+            if isinstance(query, str):
+                self.filename = os.path.abspath(os.path.join(
+                    self.working_dir, os.path.basename(query)))
+                if self.filename != os.path.abspath(query):
+                    shutil.copy(query, self.filename)
+            else:
+                self.filename = os.path.join(self.working_dir,
+                                             '%s.fasta' % HHblits.OUTPUT_PREFIX)
+                ost.io.SaveSequence(query, self.filename)
+        else:
+            self.needs_cleanup = True
+            if isinstance(query, str):
+                self.working_dir = tempfile.mkdtemp()
+                self.filename = os.path.abspath(os.path.join(
+                    self.working_dir, os.path.basename(query)))
+                shutil.copy(query, self.filename)
+            else:
+                tmp_dir = utils.TempDirWithFiles((query,))
+                self.working_dir = tmp_dir.dirname
+                self.filename = tmp_dir.files[0]
+
+    def BuildQueryMSA(self, nrdb, options={}, a3m_file=None, assign_ss=True):
+        """Builds the MSA for the query sequence.
+
+        This function directly uses hhblits of hhtools. While in theory it would
+        be possible to do this by PSI-blasting on our own, hhblits is supposed
+        to be faster. Also it is supposed to prevent alignment corruption. The
+        alignment corruption is caused by low-scoring terminal alignments that
+        draw the sequences found by PSI-blast away from the optimum. By removing
+        these low scoring ends, part of the alignment corruption can be
+        suppressed.
+
+        The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
+        already produced, hhblits is not called again and the existing file path
+        is returned (neglecting the *assign_ss* flag!!!).
+
+        :param nrdb: Database to be align against; has to be an hhblits database
+        :type nrdb: :class:`str`
+
+        :param options: Dictionary of options to *hhblits*, one "-" is added in
+                        front of every key. Boolean True values add flag without
+                        value. Merged with default options 
+                        {'cpu': 1, 'n': 1, 'e': 0.001}, where 'n' defines the 
+                        number of iterations.
+        :type options: :class:`dict`
+
+        :param a3m_file: a path of a3m_file to be used, optional
+        :type a3m_file: :class:`str`
+
+        :param assign_ss: HHblits does not assign predicted secondary structure 
+                          by default. You can optionally assign it with the
+                          addss.pl script provided by the HH-suite. However, 
+                          your HH-suite installation requires you to specify
+                          paths to PSIRED etc. We refer to the HH-suite user
+                          guide for further instructions.
+        :type assign_ss:  :class:`bool`
+
+        :return: The path to the A3M file containing the MSA
+        :rtype: :class:`str`
+        """
+        if a3m_file is None:
+            a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0]
+        else:
+            a3m_file = os.path.abspath(a3m_file)
+        if os.path.exists(a3m_file):
+            ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file)
+            return a3m_file
+        ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root)
+        full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
+                                 os.path.split(nrdb)[1])
+        # create MSA
+        opts = {'cpu' : 1,     # no. of cpus used
+                'n'   : 1,     # no. of iterations
+                'e'   : 0.001} # evalue threshold
+        opts.update(options)
+        opt_cmd, _ = _ParseOptions(opts)
+        hhblits_cmd = '%s -i %s -oa3m %s -d %s %s' % \
+                      (self.hhblits_bin, self.filename, a3m_file, full_nrdb,
+                       opt_cmd)
+
+        p = subprocess.run(hhblits_cmd, shell=True, cwd=self.working_dir,
+                           stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        lines = p.stdout.decode().splitlines()
+        for line in lines:
+            ost.LogVerbose(line.strip())
+
+        lines = p.stderr.decode().splitlines()
+        for line in lines:
+            ost.LogError(line.strip())
+
+        if not os.path.exists(a3m_file):
+            raise RuntimeError('Building query profile failed, no output')
+
+        if assign_ss:
+            return self.AssignSSToA3M(a3m_file)
+        else:
+            return a3m_file
+
+    def AssignSSToA3M(self, a3m_file):
+        """
+        HHblits does not assign predicted secondary structure by default. 
+        You can optionally assign it with the addss.pl script provided by the 
+        HH-suite. However, your HH-suite installation requires you to specify
+        paths to PSIRED etc. We refer to the HH-suite user guide for further 
+        instructions.
+
+        :param a3m_file:  Path to file you want to assign secondary structure to
+        :type a3m_file:  :class:`str`
+        """
+
+        a3m_file = os.path.abspath(a3m_file)
+        addss_cmd = "perl %s %s" % (os.path.join(self.hhsuite_root, 
+                                                 'scripts/addss.pl'), 
+                                    a3m_file)
+        env = dict(os.environ)
+        env.update({'PERL5LIB' : os.path.join(self.hhsuite_root, 'scripts'),
+                    'HHLIB' : os.path.join(self.hhsuite_root),
+                    'PATH' : '%s:%s' % (os.path.join(self.hhsuite_root, 'bin'),
+                                        os.environ['PATH'])})
+
+        p = subprocess.run(addss_cmd, shell=True, cwd=self.working_dir,
+                           env=env, stdout=subprocess.PIPE,
+                           stderr=subprocess.PIPE)
+
+        lines = p.stdout.decode().splitlines()
+        for line in lines:
+            ost.LogVerbose(line.strip())
+            if 'error' in line.lower() or 'bad interpreter' in line.lower():
+                raise RuntimeError('Predicting secondary structure for MSA '+
+                                   '(%s) failed, on command: %s' % (a3m_file, line))
+
+        lines = p.stderr.decode().splitlines()
+        for line in lines:
+            ost.LogError(line.strip())
+            if 'error' in line.lower() or 'bad interpreter' in line.lower():
+                raise RuntimeError('Predicting secondary structure for MSA '+
+                                   '(%s) failed, on command: %s' % (a3m_file, line))
+
+        return a3m_file
+
+    def A3MToProfile(self, a3m_file, hhm_file=None):
+        """
+        Converts the A3M alignment file to a hhm profile. If hhm_file is not
+        given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
+
+        The produced HHM file can be parsed by :func:`ParseHHM`.
+
+        If the file was already produced, the existing file path is returned
+        without recomputing it.
+
+        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
+        :type a3m_file: :class:`str`
+
+        :param hhm_file: Desired output file name 
+        :type hhm_file: :class:`str`
+
+        :return: Path to the profile file
+        :rtype: :class:`str`
+        """
+        hhmake = os.path.join(self.bin_dir, 'hhmake')
+        if not hhm_file:
+            hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
+        if os.path.exists(hhm_file):
+            return hhm_file
+        ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
+        p = subprocess.run('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
+                           shell=True, stdout=subprocess.PIPE,
+                           stderr=subprocess.PIPE)
+        lines = p.stdout.decode().splitlines()
+        for line in lines:
+            ost.LogVerbose(line.strip())
+        lines = p.stderr.decode().splitlines()
+        for line in lines:
+            ost.LogError(line.strip())
+
+        if p.returncode != 0:
+            raise IOError('could not convert a3m to hhm file')
+
+        if not os.path.exists(hhm_file):
+            raise RuntimeError('could not convert a3m to hhm file, no output')
+
+        return hhm_file
+
+    def A3MToCS(self, a3m_file, cs_file=None, options={}):
+        """
+        Converts the A3M alignment file to a column state sequence file. If
+        cs_file is not given, the output file will be set to
+        <:attr:`a3m_file`-basename>.seq219.
+
+        If the file was already produced, the existing file path is returned
+        without recomputing it.
+
+        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
+        :type a3m_file: :class:`str`
+
+        :param cs_file: Output file name (may be omitted)
+        :type cs_file: :class:`str`
+
+        :param options: Dictionary of options to *cstranslate*, one "-" is added
+                        in front of every key. Boolean True values add flag
+                        without value.
+        :type options: :class:`dict`
+
+        :return: Path to the column state sequence file
+        :rtype: :class:`str`
+        """
+        cstranslate = os.path.join(self.bin_dir, 'cstranslate')
+        if not cs_file:
+            cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
+        if os.path.exists(cs_file):
+            return cs_file
+        opt_cmd, _ = _ParseOptions(options)
+        cs_cmd = '%s -i %s -o %s %s' % (
+            cstranslate,
+            os.path.abspath(a3m_file),
+            os.path.abspath(cs_file),
+            opt_cmd)
+        ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
+        p = subprocess.run(cs_cmd, shell=True, cwd=self.working_dir,
+                           stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        if not os.path.exists(cs_file):
+            raise RuntimeError('Creating column state sequence file failed, ' +
+                               'no output')
+
+        if b'Wrote abstract state sequence to' in p.stdout:
+            return cs_file
+        else:
+            raise RuntimeError('Creating column state sequence file failed')
+
+    def Cleanup(self):
+        """Delete temporary data.
+
+        Delete temporary data if no working dir was given. Controlled by
+        :attr:`needs_cleanup`.
+        """
+        if self.needs_cleanup and os.path.exists(self.working_dir):
+            shutil.rmtree(self.working_dir)
+
+    def CleanupFailed(self):
+        '''In case something went wrong, call to make sure everything is clean.
+
+        This will delete the working dir independently of :attr:`needs_cleanup`.
+        '''
+        store_needs_cleanup = self.needs_cleanup
+        self.needs_cleanup = True
+        self.Cleanup()
+        self.needs_cleanup = store_needs_cleanup
+
+    def Search(self, a3m_file, database, options={}, prefix=''):
+        """
+        Searches for templates in the given database. Before running the search,
+        the hhm file is copied. This makes it possible to launch several hhblits
+        instances at once. Upon success, the filename of the result file is
+        returned. This file may be parsed with :func:`ParseHHblitsOutput`.
+
+        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
+        :type a3m_file: :class:`str`
+
+        :param database: Search database, needs to be the common prefix of the
+                         database files
+        :type database: :class:`str`
+
+        :param options: Dictionary of options to *hhblits*, one "-" is added in
+                        front of every key. Boolean True values add flag without
+                        value. Merged with default options {'cpu': 1, 'n': 1},
+                        where 'n' defines the number of iterations.
+        :type options: :class:`dict`
+
+        :param prefix: Prefix to the result file
+        :type prefix: :class:`str`
+
+        :return: The path to the result file
+        :rtype: :class:`str`
+        """
+        opts = {'cpu' : 1, # no. of cpus used
+                'n'   : 1}   # no. of iterations
+        opts.update(options)
+        opt_cmd, opt_str = _ParseOptions(opts)
+        base = os.path.basename(os.path.splitext(a3m_file)[0])
+        hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
+        hhr_file = os.path.join(self.working_dir, hhr_file)
+        search_cmd = '%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s' % (
+            self.hhblits_bin,
+            opt_cmd,
+            os.path.abspath(a3m_file),
+            os.path.abspath(hhr_file),
+            os.path.join(os.path.abspath(os.path.split(database)[0]),
+                         os.path.split(database)[1]))
+        ost.LogInfo('searching %s' % database)
+        ost.LogVerbose(search_cmd)
+        p = subprocess.run(search_cmd, shell=True, cwd=self.working_dir,
+                           stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        lines = p.stdout.decode().splitlines()
+        for line in lines:
+            ost.LogVerbose(line.strip())
+        lines = p.stderr.decode().splitlines()
+        for line in lines:
+            ost.LogError(line.strip())
+
+        if p.returncode != 0:
+            raise RuntimeError('Sequence search failed')
+
+        if not os.path.exists(hhr_file):
+            raise RuntimeError('Sequence search failed, no output')
+
+        return hhr_file
+
+
+def _ParseOptions(opts):
+    """
+    :return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be
+             passed to command ("-" added in front of keys, options
+             separated by space) and opt_str (options separated by "_")
+             can be used for filenames.
+    :param opts: Dictionary of options, one "-" is added in front of every
+                 key. Boolean True values add flag without value.
+    """
+    opt_cmd = list()
+    opt_str = list()
+    for k, val in opts.items():
+        if type(val) == type(True):
+            if val == True:
+                opt_cmd.append('-%s' % str(k))
+                opt_str.append(str(k))
+        else:
+            opt_cmd.append('-%s %s' % (str(k), str(val)))
+            opt_str.append('%s%s' % (str(k), str(val)))
+    opt_cmd = ' '.join(opt_cmd)
+    opt_str = '_'.join(opt_str)
+    return opt_cmd, opt_str
+
+
+__all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader',
+           'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
+           'ParseHeaderLine']
+
+#  LocalWords:  HHblits MSA hhblits hhtools PSIPRED addss param nrdb str
+#  LocalWords:  cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
+#  LocalWords:  attr basename rtype cstranslate tuple HHblitsHeader meth aln
+#  LocalWords:  HHblitsHit iterable evalue pvalue neff hmms datetime
+#  LocalWords:  whitespace whitespaces
-- 
GitLab