Skip to content
Snippets Groups Projects
Select Git revision
  • 2eca1d9af782276fb2c1ad177bba66bd88a8d0a9
  • master default protected
  • develop protected
  • cmake_boost_refactor
  • ubuntu_ci
  • mmtf
  • non-orthogonal-maps
  • no_boost_filesystem
  • data_viewer
  • 2.11.1
  • 2.11.0
  • 2.10.0
  • 2.9.3
  • 2.9.2
  • 2.9.1
  • 2.9.0
  • 2.8.0
  • 2.7.0
  • 2.6.1
  • 2.6.0
  • 2.6.0-rc4
  • 2.6.0-rc3
  • 2.6.0-rc2
  • 2.6.0-rc
  • 2.5.0
  • 2.5.0-rc2
  • 2.5.0-rc
  • 2.4.0
  • 2.4.0-rc2
29 results

hhsearch.py

Blame
  • user avatar
    juergen authored
    git-svn-id: https://dng.biozentrum.unibas.ch/svn/openstructure/trunk@1811 5a81b35b-ba03-0410-adc8-b2c5c5119f08
    d8ede101
    History
    hhsearch.py 3.50 KiB
    """
    Author: Marco Biasini
    """
    
    import re
    from ost import seq
    
    class HHSearchHit:
      def __init__(self, summary, alignment):
        self.summary=summary
        self.alignment=alignment
    
    class HitSummary:
      def __init__(self, pdb_id, chain, prob, e_value, query_start, query_end, 
                   template_start, template_end):
       self.prob=prob
       self.pdb_id=pdb_id
       self.chain=chain
       self.e_value=e_value   
       self.query_start=query_start
       self.query_end=query_end
       self.template_start=template_start
       self.template_end=template_end
    
    class HHSearchResult:
      """
      Read HHSearch result file. The result is stored in a list of hh search hits.
      
      Usage:
      
      result=HHSearchResult('output.hhr')
      for hit in result.hits:
        print hit.pdb_id, hit.chain
        print hit.alignment.ToString(80)
      """
      def __init__(self, filename, pipe_separated=False):
        self.pipe_separated=pipe_separated    
        self._Read(filename)
      def _Read(self, filename):
        ifile=open(filename)
        self._ReadHeader(ifile)
        summaries=self._ReadHitSummaries(ifile)
        self.hits=self._ReadHitDetails(ifile, summaries)
        
      def _ReadHeader(self, ifile):
        header_lines=[]
        for line in ifile:
          stripped_line=line.strip()
          if stripped_line=='':
            break
          header_lines.append(stripped_line)
        for header_line in header_lines:
          var, value=re.split('\s+', header_line, 1)
          setattr(self, var.lower(), value)
          
      def _ReadHitSummaries(self, ifile):
        summary_lines=[]
        skip_header=True
        for line in ifile:
          if skip_header==True:
            skip_header=False
            continue
    
          stripped_line=line.strip()
          if stripped_line=='':
            break
          summary_lines.append(line)
        summaries=[]
        for summary_line in summary_lines:
          pdb_id, chain=(None, None)
          if self.pipe_separated:
            parts=summary_line[4:37].split('|')
            pdb_id=parts[1][:4]
            chain=parts[1][4]
          else:
            pdb_id=summary_line[4:8]
            chain=summary_line[9]
          prob=float(summary_line[36:40])
          e_value=0.0
          query_range=summary_line[76:84].split('-')
          query_start=int(query_range[0].strip())
          query_end=int(query_range[1].strip())
          template_range=summary_line[86:94].split('-')
          template_start=int(template_range[0].strip())
          template_end=int(template_range[1].strip())
          summaries.append(HitSummary(pdb_id, chain, prob, e_value, query_start,
                                      query_end, template_start, template_end))
        return summaries
    
      def _ReadHitDetails(self, ifile, summaries):
        hits=[]
        for summary in summaries:
          alignment=self._ReadHitDetail(ifile)
          hits.append(HHSearchHit(summary, alignment))
        return hits
      def _ReadHitDetail(self, ifile):
        skip_header=True
        q_seq, t_seq=('', '')
        for line in ifile:
          if skip_header==True:
            if line.startswith('>'):
              skip_header=False
            continue
          if line.startswith('No'):
            break
          if line.strip()=='':
            continue        
          if line.startswith('Q ss_pred') or line.startswith('Q Consensus'):
            continue
          if line.startswith('T ss_pred') or line.startswith('T Consensus'):
            continue
          if line.startswith(' '):
            continue
          if line.startswith('Q'):
            q_seq+=re.split('\s+', line)[3]
          if line.startswith('T'):
            t_seq+=re.split('\s+', line)[3]        
        ali=seq.AlignmentHandle()
        ali.AddSequence(seq.Sequence.FromString('query', q_seq))
        ali.AddSequence(seq.Sequence.FromString('target', t_seq))    
        return ali