Skip to content
Snippets Groups Projects
Commit caea23dd authored by Valerio Mariani's avatar Valerio Mariani
Browse files

More fixes, docs and unitests for MergePairwiseAlignment

parent bb4c106b
No related branches found
No related tags found
No related merge requests found
:mod:`seq.alg <ost.seq.alg>` -- Algorithms for Sequences
================================================================================
.. module:: ost.seq.alg
:synopsis: Algorithms for sequences
.. function:: MergePairwiseAlignments(pairwise_alns, ref_seq)
Merge a list of pairwise alignments into a multiple sequence alignments. This
function uses the reference sequence as the anchor and inserts gaps where
needed. This is also known as the *star method*.
The resulting multiple sequence alignment provides a simple way to map between
residues of pairwise alignments, e.g. to compare distances in two structural
templates.
There are a few things to keep in mind when using this function:
- The reference sequence mustn't contain any gaps
- The first sequence of each pairwise alignments corresponds to the reference
sequence. Apart from the presence of gaps, these two sequences must be
completely identical.
- If the reference sequence has an offset, the first sequence of each pairwise alignment
must have the same offset. This offset is inherited by the first sequence of the final
output alignment.
- The resulting multiple sequence alignment is by no means optimal. For
better results, consider using a multiple-sequence alignment program such
as MUSCLE or ClustalW.
- Residues in columns where the reference sequence has gaps should not be
considered as aligned. There is no information in the pairwise alignment to
guide the merging, the result is undefined.
.. autofunction:: AlignToSEQRES
.. autofunction:: AlignmentFromChainView
.. function:: LocalAlign(seq1, seq2, subst_weight, gap_open=-5, gap_ext=-2)
Performs a Smith/Waterman local alignment of *seq1* and *seq2* and returns
the best-scoring alignments as a list of pairwise alignments.
**Example:**
.. code-block:: python
seq_a=seq.CreateSequence('A', 'acdefghiklmn')
seq_b=seq.CreateSequence('B', 'acdhiklmn')
alns=seq.alg.LocalAlign(seq_a, seq_b, seq.alg.BLOSUM62)
print alns[0].ToString(80)
# >>> A acdefghiklmn
# >>> B acd---hiklmn
:param seq1: A valid sequence
:type seq1: :class:`~ost.seq.ConstSequenceHandle`
:param seq2: A valid sequence
:type seq2: :class:`~ost.seq.ConstSequenceHandle`
:param subst_weigth: The substitution weights matrix
:type subst_weight: :class:`SubstWeightMatrix`
:param gap_open: The gap opening penalty. Must be a negative number
:param gap_ext: The gap extension penalty. Must be a negative number
:returns: list of best-scoring, non-overlapping alignments of *seq1* and
*seq2*. Since alignments always start with a replacement, the start is
stored in the sequence offset of the two sequences.
.. function:: GlobalAlign(seq1, seq2, subst_weight, gap_open=-5, gap_ext=-2)
Performs a Needleman/Wunsch global alignment of *seq1* and *seq2* and returns
the best-scoring alignment.
**Example:**
.. code-block:: python
seq_a=seq.CreateSequence('A', 'acdefghiklmn')
seq_b=seq.CreateSequence('B', 'acdhiklmn')
alns=seq.alg.GlobalAlign(seq_a, seq_b, seq.alg.BLOSUM62)
print alns[0].ToString(80)
# >>> A acdefghiklmn
# >>> B acd---hiklmn
:param seq1: A valid sequence
:type seq1: :class:`~ost.seq.ConstSequenceHandle`
:param seq2: A valid sequence
:type seq2: :class:`~ost.seq.ConstSequenceHandle`
:param subst_weigth: The substitution weights matrix
:type subst_weight: :class:`SubstWeightMatrix`
:param gap_open: The gap opening penalty. Must be a negative number
:param gap_ext: The gap extension penalty. Must be a negative number
:returns: best-scoring alignment of *seq1* and *seq2*.
...@@ -90,7 +90,7 @@ SequenceHandle shift_reference(const ConstSequenceHandle& ref_seq, ...@@ -90,7 +90,7 @@ SequenceHandle shift_reference(const ConstSequenceHandle& ref_seq,
new_sequence.str()); new_sequence.str());
if (ref_seq.HasAttachedView()) if (ref_seq.HasAttachedView())
s.AttachView(ref_seq.GetAttachedView()); s.AttachView(ref_seq.GetAttachedView());
s.SetOffset(s.GetOffset()); s.SetOffset(ref_seq.GetOffset());
return s; return s;
} }
...@@ -134,7 +134,23 @@ SequenceHandle realign_sequence(const AlignmentHandle& aln, ...@@ -134,7 +134,23 @@ SequenceHandle realign_sequence(const AlignmentHandle& aln,
AlignmentHandle MergePairwiseAlignments(const AlignmentList& pairwise_alns, AlignmentHandle MergePairwiseAlignments(const AlignmentList& pairwise_alns,
const ConstSequenceHandle& ref_seq) const ConstSequenceHandle& ref_seq)
{ {
int ref_offset=ref_seq.GetOffset();
int alignment_counter=0;
for (AlignmentList::const_iterator i=pairwise_alns.begin(), e=pairwise_alns.end(); i!=e; ++i) {
AlignmentHandle ali_handle=*i;
ConstSequenceHandle seq_handle=ali_handle.GetSequence(0);
if (seq_handle.GetOffset()!=ref_offset) {
String error_string;
std::ostringstream error_ss(error_string);
error_ss << "The offset of the first sequence in alignment " << alignment_counter <<
"is not identical to the offset of the reference sequence." << std::endl;
throw IntegrityError(error_string);
}
alignment_counter+=1;
}
ShiftMap shifts; ShiftMap shifts;
for (AlignmentList::const_iterator i=pairwise_alns.begin(), for (AlignmentList::const_iterator i=pairwise_alns.begin(),
e=pairwise_alns.end(); i!=e; ++i) { e=pairwise_alns.end(); i!=e; ++i) {
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
#include <ost/seq/alg/merge_pairwise_alignments.hh> #include <ost/seq/alg/merge_pairwise_alignments.hh>
#include <ost/integrity_error.hh>
using namespace ost; using namespace ost;
...@@ -107,4 +107,86 @@ BOOST_AUTO_TEST_CASE(merge_pairwise_alignments_three) ...@@ -107,4 +107,86 @@ BOOST_AUTO_TEST_CASE(merge_pairwise_alignments_three)
BOOST_CHECK_EQUAL(seqs[1].GetString(), "xyabcdefghijk"); BOOST_CHECK_EQUAL(seqs[1].GetString(), "xyabcdefghijk");
BOOST_CHECK_EQUAL(seqs[2].GetString(), "-zabcdefghijk"); BOOST_CHECK_EQUAL(seqs[2].GetString(), "-zabcdefghijk");
} }
BOOST_AUTO_TEST_CASE(merge_pairwise_alignments_four)
{
SequenceHandle ref=CreateSequence("REF", "abcdefghijk");
SequenceHandle s1=CreateSequence("S1", "abcdefghijk--");
SequenceHandle s2=CreateSequence("S2", "abcdefghijkxy");
AlignmentHandle aln1=CreateAlignment();
aln1.AddSequence(s1);
aln1.AddSequence(s2);
SequenceHandle s3=CreateSequence("S1", "-abcdefghijk");
SequenceHandle s4=CreateSequence("S2", "zabcdefghijk");
AlignmentHandle aln2=CreateAlignment();
aln2.AddSequence(s3);
aln2.AddSequence(s4);
AlignmentList l;
l.push_back(aln1);
l.push_back(aln2);
AlignmentHandle m=alg::MergePairwiseAlignments(l, ref);
ConstSequenceList seqs=m.GetSequences();
BOOST_CHECK_EQUAL(seqs[0].GetString(), "-abcdefghijk--");
BOOST_CHECK_EQUAL(seqs[1].GetString(), "-abcdefghijkxy");
BOOST_CHECK_EQUAL(seqs[2].GetString(), "zabcdefghijk--");
}
BOOST_AUTO_TEST_CASE(merge_pairwise_alignments_five)
{
SequenceHandle ref=CreateSequence("REF", "abcdefghijk");
ref.SetOffset(4);
SequenceHandle s1=CreateSequence("S1", "abcdefghijk--");
SequenceHandle s2=CreateSequence("S2", "abcdefghijkxy");
s1.SetOffset(4);
AlignmentHandle aln1=CreateAlignment();
aln1.AddSequence(s1);
aln1.AddSequence(s2);
SequenceHandle s3=CreateSequence("S1", "-abcdefghijk");
SequenceHandle s4=CreateSequence("S2", "zabcdefghijk");
s3.SetOffset(4);
AlignmentHandle aln2=CreateAlignment();
aln2.AddSequence(s3);
aln2.AddSequence(s4);
AlignmentList l;
l.push_back(aln1);
l.push_back(aln2);
AlignmentHandle m=alg::MergePairwiseAlignments(l, ref);
ConstSequenceList seqs=m.GetSequences();
BOOST_CHECK_EQUAL(seqs[0].GetString(), "-abcdefghijk--");
BOOST_CHECK_EQUAL(seqs[0].GetOffset(), 4);
BOOST_CHECK_EQUAL(seqs[1].GetString(), "-abcdefghijkxy");
BOOST_CHECK_EQUAL(seqs[2].GetString(), "zabcdefghijk--");
}
BOOST_AUTO_TEST_CASE(merge_pairwise_alignments_six)
{
SequenceHandle ref=CreateSequence("REF", "abcdefghijk");
ref.SetOffset(4);
SequenceHandle s1=CreateSequence("S1", "abcdefghijk--");
SequenceHandle s2=CreateSequence("S2", "abcdefghijkxy");
s1.SetOffset(5);
AlignmentHandle aln1=CreateAlignment();
aln1.AddSequence(s1);
aln1.AddSequence(s2);
SequenceHandle s3=CreateSequence("S1", "-abcdefghijk");
SequenceHandle s4=CreateSequence("S2", "zabcdefghijk");
s3.SetOffset(4);
AlignmentHandle aln2=CreateAlignment();
aln2.AddSequence(s3);
aln2.AddSequence(s4);
AlignmentList l;
l.push_back(aln1);
l.push_back(aln2);
BOOST_CHECK_THROW (alg::MergePairwiseAlignments(l, ref),IntegrityError);
}
BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment