Skip to content
Snippets Groups Projects
Commit 4caf1cc3 authored by marco's avatar marco
Browse files

added MergePairwiseAlignments

git-svn-id: https://dng.biozentrum.unibas.ch/svn/openstructure/trunk@2137 5a81b35b-ba03-0410-adc8-b2c5c5119f08
parent 61c1f3ff
No related branches found
No related tags found
No related merge requests found
add_subdirectory(src)
add_subdirectory(pymod)
\ No newline at end of file
add_subdirectory(pymod)
add_subdirectory(tests)
\ No newline at end of file
......@@ -18,9 +18,12 @@
//------------------------------------------------------------------------------
#include <boost/python.hpp>
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
using namespace boost::python;
#include <ost/seq/alg/merge_pairwise_alignments.hh>
#include <ost/seq/alg/sequence_identity.hh>
#include <ost/seq/alg/ins_del.hh>
using namespace boost::python;
using namespace ost::seq;
using namespace ost::seq::alg;
......@@ -43,5 +46,6 @@ BOOST_PYTHON_MODULE(_seq_alg)
.def("GetDeletions", &InsDel::GetDeletions)
.def("GetInsertions", &InsDel::GetInsertions)
;
def("MergePairwiseAlignments", &MergePairwiseAlignments);
export_Align();
}
......@@ -6,9 +6,11 @@ subst_weight_matrix.hh
local_align.hh
alignment_opts.hh
global_align.hh
merge_pairwise_alignments.hh
)
set(OST_SEQ_ALG_SOURCES
merge_pairwise_alignments.cc
global_align.cc
sequence_identity.cc
ins_del.cc
......
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2010 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#include <sstream>
#include <map>
#include <ost/integrity_error.hh>
#include "merge_pairwise_alignments.hh"
/*
Author: Marco Biasini
*/
namespace ost { namespace seq { namespace alg {
typedef std::map<int, int> ShiftMap;
namespace {
void update_shifts(const AlignmentHandle& aln,
const SequenceHandle& ref_seq,
ShiftMap& shifts)
{
ConstSequenceHandle s1=aln.GetSequence(0);
if (s1.GetGaplessString()!=ref_seq.GetString()) {
throw IntegrityError("The gapless version of '"+s1.GetString()+
"' is not identical to the reference sequence.");
}
for (int i=0; i<s1.GetLength(); ++i) {
int shift=0;
int j=i;
while (j<s1.GetLength() && s1.GetOneLetterCode(j)=='-') {
shift++;
j++;
}
if (shift>0) {
assert(i>0);
int res_index=s1.GetResidueIndex(i-1);
ShiftMap::iterator p=shifts.find(res_index);
if (p!=shifts.end()) {
p->second=std::max(p->second, shift);
} else {
shifts.insert(std::make_pair(res_index, shift));
}
}
i=j;
}
}
SequenceHandle shift_reference(const SequenceHandle& ref_seq,
const ShiftMap& shifts)
{
std::ostringstream new_sequence;
String ref_str=ref_seq.GetString();
int last=0;
for (ShiftMap::const_iterator i=shifts.begin(),e=shifts.end();i!=e; ++i) {
new_sequence << ref_str.substr(last, i->first-last+1)
<< String(i->second, '-');
last=i->first+1;
}
new_sequence << ref_str.substr(last);
SequenceHandle s=CreateSequence(ref_seq.GetName(),
new_sequence.str());
if (ref_seq.HasAttachedView())
s.AttachView(ref_seq.GetAttachedView());
s.SetSequenceOffset(s.GetSequenceOffset());
return s;
}
SequenceHandle realign_sequence(const AlignmentHandle& aln,
const ShiftMap& shifts)
{
std::ostringstream new_sequence;
ConstSequenceHandle s1=aln.GetSequence(0);
ConstSequenceHandle s2=aln.GetSequence(1);
for (int i=0; i<s1.GetLength(); ++i) {
int shift=0;
int j=i;
while (j<s1.GetLength() && s1.GetOneLetterCode(j)=='-') {
shift++;
++j;
}
if (i>0 && s1.GetOneLetterCode(i-1)!='-') {
ShiftMap::const_iterator p=shifts.find(s1.GetResidueIndex(i-1));
if (p!=shifts.end()) {
int d=p->second-shift;
assert(d>=0);
new_sequence << String(d, '-');
}
}
new_sequence << s2.GetOneLetterCode(i);
}
SequenceHandle s=CreateSequence(s2.GetName(), new_sequence.str());
if (s2.HasAttachedView())
s.AttachView(s2.GetAttachedView());
s.SetSequenceOffset(s2.GetSequenceOffset());
return s;
}
}
AlignmentHandle MergePairwiseAlignments(const AlignmentList& pairwise_alns,
const SequenceHandle& ref_seq)
{
ShiftMap shifts;
for (AlignmentList::const_iterator i=pairwise_alns.begin(),
e=pairwise_alns.end(); i!=e; ++i) {
update_shifts(*i, ref_seq, shifts);
}
AlignmentHandle merged=CreateAlignment();
merged.AddSequence(shift_reference(ref_seq, shifts));
for (AlignmentList::const_iterator i=pairwise_alns.begin(),
e=pairwise_alns.end(); i!=e; ++i) {
SequenceHandle new_seq=realign_sequence(*i, shifts);
merged.AddSequence(new_seq);
}
return merged;
}
}}}
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2010 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#ifndef OST_SEQ_ALG_MERGE_PAIRWISE_ALIGNMENTS_HH
#define OST_SEQ_ALG_MERGE_PAIRWISE_ALIGNMENTS_HH
#include <ost/seq/alignment_handle.hh>
#include <ost/seq/alg/module_config.hh>
/*
Author: Marco Biasini
*/
namespace ost { namespace seq { namespace alg {
/// \brief merge a list of pairwise alignments into one multiple sequence
/// alignment
///
/// All sequences in the pairwise sequence alignments are a realigned to the
/// reference sequence. This is useful to merge the results of a BLAST or HMM
/// database search into one multiple sequence alignment.
///
/// The method does not produce the optimal multiple sequence alignemnt for all
/// the sequences.
///
/// \param pairwise_alignments is a list of AlignmentHandles, each containing
/// two sequences
/// \param ref_seq is the reference sequence. The reference sequence must not
/// contain any gaps.
AlignmentHandle DLLEXPORT_OST_SEQ_ALG
MergePairwiseAlignments(const AlignmentList& pairwise_alns,
const SequenceHandle& ref_seq);
}}}
#endif
set(OST_SEQ_ALG_UNIT_TESTS
test_merge_pairwise_alignments.cc
tests.cc
)
ost_unittest(seq_alg "${OST_SEQ_ALG_UNIT_TESTS}")
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2010 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
/*
* Authors: Marco Biasini, Juergen Haas
*/
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
#include <ost/seq/alg/merge_pairwise_alignments.hh>
using namespace ost;
using namespace ost::seq;
BOOST_AUTO_TEST_SUITE(ost_seq_alg)
BOOST_AUTO_TEST_CASE(merge_pairwise_alignments_one)
{
SequenceHandle ref=CreateSequence("REF", "abcdefghijklmn");
SequenceHandle s1=CreateSequence("S1", "abcd---efghijklmn");
SequenceHandle s2=CreateSequence("S2", "abcdxyzefghijklmn");
AlignmentHandle aln1=CreateAlignment();
aln1.AddSequence(s1);
aln1.AddSequence(s2);
SequenceHandle s3=CreateSequence("S1", "abcdefghij---klmn");
SequenceHandle s4=CreateSequence("S2", "abcdefghijxyzklmn");
AlignmentHandle aln2=CreateAlignment();
aln2.AddSequence(s3);
aln2.AddSequence(s4);
AlignmentList l;
l.push_back(aln1);
l.push_back(aln2);
AlignmentHandle m=alg::MergePairwiseAlignments(l, ref);
ConstSequenceList seqs=m.GetSequences();
BOOST_CHECK_EQUAL(seqs[0].GetString(), "abcd---efghij---klmn");
BOOST_CHECK_EQUAL(seqs[1].GetString(), "abcdxyzefghij---klmn");
BOOST_CHECK_EQUAL(seqs[2].GetString(), "abcd---efghijxyzklmn");
}
BOOST_AUTO_TEST_CASE(merge_pairwise_alignments_two)
{
SequenceHandle ref=CreateSequence("REF", "abcdefghijklmn");
SequenceHandle s1=CreateSequence("S1", "abcd---efghijklmn");
SequenceHandle s2=CreateSequence("S2", "abcdxyzefghijklmn");
AlignmentHandle aln1=CreateAlignment();
aln1.AddSequence(s1);
aln1.AddSequence(s2);
SequenceHandle s3=CreateSequence("S1", "abcd-efghijklmn");
SequenceHandle s4=CreateSequence("S2", "abcdyefghijklmn");
AlignmentHandle aln2=CreateAlignment();
aln2.AddSequence(s3);
aln2.AddSequence(s4);
AlignmentList l;
l.push_back(aln1);
l.push_back(aln2);
AlignmentHandle m=alg::MergePairwiseAlignments(l, ref);
ConstSequenceList seqs=m.GetSequences();
BOOST_CHECK_EQUAL(seqs[0].GetString(), "abcd---efghijklmn");
BOOST_CHECK_EQUAL(seqs[1].GetString(), "abcdxyzefghijklmn");
BOOST_CHECK_EQUAL(seqs[2].GetString(), "abcd--yefghijklmn");
}
BOOST_AUTO_TEST_SUITE_END()
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2010 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#define BOOST_TEST_DYN_LINK
#define BOOST_TEST_MODULE ost_seq_alg
#include <boost/test/unit_test.hpp>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment