Skip to content
Snippets Groups Projects
Commit 23a01b0b authored by BIOPZ-Bertoni Martino's avatar BIOPZ-Bertoni Martino
Browse files

added a semiglobal alignment. equal to global alignment but without

starting and ending gaps penalty. good when 1 sequence is much
smaller then the other or you expect the 2 sequences just to overlap
a bit.
parent 7d03a05a
Branches
Tags
No related merge requests found
......@@ -26,6 +26,7 @@
#include <ost/seq/alg/subst_weight_matrix.hh>
#include <ost/seq/alg/local_align.hh>
#include <ost/seq/alg/global_align.hh>
#include <ost/seq/alg/semiglobal_align.hh>
#include <ost/seq/alg/entropy.hh>
using namespace boost::python;
using namespace ost::seq;
......@@ -59,6 +60,8 @@ BOOST_PYTHON_MODULE(_ost_seq_alg)
arg("gap_open")=-5, arg("gap_ext")=-2));
def("GlobalAlign", &GlobalAlign,(arg("seq1"),arg("seq2"),arg("subst_weight"),
arg("gap_open")=-5, arg("gap_ext")=-2));
def("SemiGlobalAlign", &SemiGlobalAlign,(arg("seq1"),arg("seq2"),arg("subst_weight"),
arg("gap_open")=-5, arg("gap_ext")=-2));
def("ShannonEntropy", &ShannonEntropy, (arg("aln"), arg("ignore_gaps")=true));
}
......@@ -9,12 +9,14 @@ merge_pairwise_alignments.hh
conservation.hh
local_align.hh
global_align.hh
semiglobal_align.hh
)
set(OST_SEQ_ALG_SOURCES
merge_pairwise_alignments.cc
local_align.cc
global_align.cc
semiglobal_align.cc
entropy.cc
sequence_identity.cc
ins_del.cc
......
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2011 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#include <ost/log.hh>
#include "impl/align_impl.hh"
#include "semiglobal_align.hh"
namespace ost { namespace seq { namespace alg {
void SemiTraceback(impl::AlnMat& mat, int max_i, int max_j,
const ConstSequenceHandle& s1,
const ConstSequenceHandle& s2,
AlignmentList& alignments)
{
int i = (max_i > 0 ? max_i-1 : 0);
int j = (max_j > 0 ? max_j-1 : 0);
String aln_str1;
String aln_str2;
while (i > 0 || j > 0) {
impl::SetRoute(mat, i, j, s1, s2, aln_str1, aln_str2);
}
impl::StoreStrAsAln(aln_str1, aln_str2, s1, s2, i, j, alignments);
}
AlignmentList SemiGlobalAlign(const ConstSequenceHandle& s1,
const ConstSequenceHandle& s2,
alg::SubstWeightMatrixPtr& subst,
int gap_open,int gap_ext)
{
impl::AlnMat mat(s1.GetLength()+1, s2.GetLength()+1);
// init first column of matrix no start gap penalty
if (mat.GetHeight() > 1) {
mat(0, 1).score = mat(0, 0).score;
mat(0, 1).from = impl::INS1;
}
for (int j = 2; j < mat.GetHeight(); ++j) {
mat(0, j).score = mat(0, j-1).score;
mat(0, j).from = impl::INS1;
}
// fill the alignment matrix
for (int i = 0; i < mat.GetWidth()-1; ++i) {
mat(i+1, 0).score = mat(i, 0).score;
mat(i+1, 0).from = impl::INS2;
for (int j = 0; j < mat.GetHeight()-1; ++j) {
char c1=s1[i];
char c2=s2[j];
short weight=subst->GetWeight(c1, c2);
int diag=weight+mat(i, j).score;
int ins1=mat(i+1, j).score
+(mat(i+1, j).from==impl::INS1 ? gap_ext : gap_open);
int ins2=mat(i, j+1).score
+(mat(i, j+1).from==impl::INS2 ? gap_ext : gap_open);
// ignore end gaps (if one of the two seqs is done)
if (j+1==mat.GetHeight()-1){
ins2=mat(i, j+1).score;
}
if (i+1==mat.GetWidth()-1){
ins1=mat(i+1, j).score;
}
if (diag>=ins1) {
if (diag>=ins2) {
mat(i+1, j+1).score=diag;
mat(i+1, j+1).from=impl::DIAG;
} else {
mat(i+1, j+1).score=ins2;
mat(i+1, j+1).from=impl::INS2;
}
} else if (ins1>ins2) {
mat(i+1, j+1).score=ins1;
mat(i+1, j+1).from=impl::INS1;
} else {
mat(i+1, j+1).score=ins2;
mat(i+1, j+1).from=impl::INS2;
}
}
}
// write traceback matrix in debug mode
#if !defined(NDEBUG)
DbgWriteAlnMatrix(mat,s1, s2);
#endif
AlignmentList alignments;
SemiTraceback(mat, mat.GetWidth(), mat.GetHeight(), s1, s2, alignments);
LOG_DEBUG(alignments.back().ToString(80));
return alignments;
}
}}}
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2011 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#ifndef OST_SEQ_ALG_SEMIGLOBAL_ALIGN_HH
#define OST_SEQ_ALG_SEMIGLOBAL_ALIGN_HH
#include <ost/seq/alignment_handle.hh>
#include <ost/seq/alg/subst_weight_matrix.hh>
#include "module_config.hh"
namespace ost { namespace seq { namespace alg {
AlignmentList DLLEXPORT_OST_SEQ_ALG SemiGlobalAlign(const ConstSequenceHandle& s1,
const ConstSequenceHandle& s2,
SubstWeightMatrixPtr& subst,
int gap_open=-5,int gap_ext=-2);
}}}
#endif
......@@ -5,6 +5,7 @@ set(OST_SEQ_ALG_UNIT_TESTS
test_renumber.py
test_local_align.py
test_global_align.py
test_semiglobal_align.py
test_weight_matrix.py
test_aligntoseqres.py
)
......
import unittest
from ost import *
from ost import settings
from ost import seq
from ost.bindings.clustalw import *
class TestSemiGlobalAlign(unittest.TestCase):
def testSemiGlobalAlignment(self):
seq_a=seq.CreateSequence('A', 'abcdefghijklmnok')
seq_b=seq.CreateSequence('B', 'cdehijk')
alns=seq.alg.SemiGlobalAlign(seq_a, seq_b, seq.alg.BLOSUM62)
self.assertEqual(len(alns), 1)
self.assertEqual(alns[0].sequences[0].name, 'A')
self.assertEqual(alns[0].sequences[1].name, 'B')
self.assertEqual(str(alns[0].sequences[0]), 'abcdefghijklmnok')
self.assertEqual(str(alns[0].sequences[1]), '--cde--hijk-----')
self.assertEqual(alns[0].sequences[0].offset, 0)
self.assertEqual(alns[0].sequences[1].offset, 0)
if __name__ == "__main__":
from ost import testutils
testutils.RunTests()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment