Skip to content
Snippets Groups Projects
Select Git revision
  • c3c9fe360c50c5566c647fb3058251de1cd86040
  • master default protected
  • develop protected
  • cmake_boost_refactor
  • ubuntu_ci
  • mmtf
  • non-orthogonal-maps
  • no_boost_filesystem
  • data_viewer
  • 2.11.1
  • 2.11.0
  • 2.10.0
  • 2.9.3
  • 2.9.2
  • 2.9.1
  • 2.9.0
  • 2.8.0
  • 2.7.0
  • 2.6.1
  • 2.6.0
  • 2.6.0-rc4
  • 2.6.0-rc3
  • 2.6.0-rc2
  • 2.6.0-rc
  • 2.5.0
  • 2.5.0-rc2
  • 2.5.0-rc
  • 2.4.0
  • 2.4.0-rc2
29 results

dssp.py

Blame
  • clustalw.py 5.49 KiB
    from ost.bindings import utils
    from ost import settings, io, seq, LogError
    import os
    import subprocess
    
    def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False, 
                 clustalw_option_string=False):
      '''
      Runs a ClustalW multiple sequence alignment. The results are returned as a
      :class:`~ost.seq.AlignmentHandle` instance.
      
      There are two ways to use this function:
      
       - align exactly two sequences:
       
          :param seq1: sequence_one
          :type seq1: :class:`~ost.seq.SequenceHandle` or :class:`str`
          
          :param seq2: sequence_two
          :type seq2: :class:`~ost.seq.SequenceHandle` or :class:`str`
      
          The two sequences can be specified as two separate function parameters 
          (`seq1`, `seq2`). The type of both parameters can be either
          :class:`~ost.seq.SequenceHandle` or :class:`str`, but must be the same for
          both parameters.
          
       - align two or more sequences:
       
          :param seq1: sequence_list
          :type seq1: :class:`~ost.seq.SequenceList`
          
          :param seq2: must be :class:`None`
          
          Two or more sequences can be specified by using a
          :class:`~ost.seq.SequenceList`. It is then passed as the first function 
          parameter (`seq1`). The second parameter (`seq2`) must be :class:`None`.
          
           
      :param clustalw: path to ClustalW executable (used in :func:`~ost.settings.Locate`)
      :type clustalw: :class:`str`
      :param nopgap: turn residue-specific gaps off
      :type nopgap: :class:`bool`
      :param clustalw_option_string: additional ClustalW flags (see http://www.clustal.org/download/clustalw_help.txt)
      :type clustalw_option_string: :class:`str`
      :param keep_files: do not delete temporary files
      :type keep_files: :class:`bool`
    
      .. note ::
       
        - In the passed sequences ClustalW will convert lowercase to uppercase, and
          change all '.' to '-'. OST will convert and '?' to 'X' before aligning
          sequences with ClustalW.
        - If a :attr:`sequence name <ost.seq.SequenceHandle.name>` contains spaces,
          only the part before the space is considered as sequence name. To avoid
          surprises, you should remove spaces from the sequence name.
        - Sequence names must be unique (:class:`ValueError` exception raised
          otherwise).
    
      ClustalW will accept only IUB/IUPAC amino acid and nucleic acid codes:
    
      ======= ======================= ======= ============================ 
      Residue  Name                   Residue  Name 
      ======= ======================= ======= ============================
         A    alanine                    P    proline
         B    aspartate or asparagine    Q    glutamine
         C    cystine                    R    arginine
         D    aspartate                  S    serine
         E    glutamate                  T    threonine
         F    phenylalanine              U    selenocysteine
         G    glycine                    V    valine
         H    histidine                  W    tryptophan
         I    isoleucine                 Y    tyrosine
         K    lysine                     Z    glutamate or glutamine
         L    leucine                    X    any
         M    methionine                 \\*   translation stop
         N    asparagine                 \\-   gap of indeterminate length
      ======= ======================= ======= ============================ 
    
      '''
      clustalw_path=settings.Locate(('clustalw', 'clustalw2'), 
                                    explicit_file_name=clustalw)
      
      if seq2!=None:
        if isinstance(seq1, seq.SequenceHandle) and isinstance(seq2, seq.SequenceHandle):
          seq_list=seq.CreateSequenceList()
          seq_list.AddSequence(seq1)
          seq_list.AddSequence(seq2)
        elif isinstance(seq1, str) and isinstance(seq2, str):
          seqh1=seq.CreateSequence("seq1", seq1)
          seqh2=seq.CreateSequence("seq2", seq2)
          seq_list=seq.CreateSequenceList()
          seq_list.AddSequence(seqh1)
          seq_list.AddSequence(seqh2)
        else:
          LogError("WARNING: Specify at least two Sequences")
          return
      elif isinstance(seq1, seq.SequenceList):
        seq_list=seq1
      else:
        LogError("WARNING: Specify either two SequenceHandles or one SequenceList")
        return
    
      sequence_names = set()
      for s in seq_list:
        # we cut out anything after a space to be consistent with ClustalW behaviour
        sequence_names.add(s.GetName().split(' ')[0])
      if len(sequence_names) < len(seq_list):
        raise ValueError("ClustalW can only process sequences with unique identifiers!")
    
    
      new_list = seq.CreateSequenceList()
      for s in seq_list:
        ss = s.Copy()
        for i,c in enumerate(ss):
          if c=='?':
            ss[i]='X'
        new_list.AddSequence(ss)
    
      seq_list = new_list
    
      
      temp_dir=utils.TempDirWithFiles((seq_list,))
      out=os.path.join(temp_dir.dirname, 'out.fasta')
      command='%s -infile="%s" -output=fasta -outfile="%s"' % (clustalw_path,
                                                               temp_dir.files[0],
                                                               out)
      if nopgap:
        command+=" -nopgap"
      if clustalw_option_string!=False:
        command=command+" "+clustalw_option_string  #see useful flags: http://toolkit.tuebingen.mpg.de/clustalw/help_params
    
      subprocess.run(command, shell=True, stdout=subprocess.DEVNULL) 
    
      aln=io.LoadAlignment(out)
      
    
      for sequence in seq_list:
        for seq_num,aln_seq in enumerate(aln.sequences):
          if aln_seq.GetName()==sequence.GetName():
            break
        aln.SetSequenceOffset(seq_num,sequence.offset)
        if sequence.HasAttachedView():
          aln.AttachView(seq_num,sequence.GetAttachedView().Copy())
    
      if not keep_files:
        temp_dir.Cleanup()
      
      return aln