From 1c702b94be5e394a844455c5ab05d633da08f8dc Mon Sep 17 00:00:00 2001
From: Marco Biasini <marco.biasini@unibas.ch>
Date: Fri, 8 Apr 2011 13:28:00 +0200
Subject: [PATCH] add seq.Match and use it in MergePairwiseAlignments

---
 .../seq/alg/src/merge_pairwise_alignments.cc  |  2 +-
 modules/seq/base/doc/seq.rst                  | 12 ++++++++++-
 modules/seq/base/pymod/export_sequence.cc     |  4 ++++
 modules/seq/base/src/sequence_handle.cc       | 21 +++++++++++++++++++
 modules/seq/base/src/sequence_handle.hh       |  6 ++++++
 modules/seq/base/tests/test_sequence.cc       |  9 ++++++++
 6 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/modules/seq/alg/src/merge_pairwise_alignments.cc b/modules/seq/alg/src/merge_pairwise_alignments.cc
index 6bba7edd0..5636f32c9 100644
--- a/modules/seq/alg/src/merge_pairwise_alignments.cc
+++ b/modules/seq/alg/src/merge_pairwise_alignments.cc
@@ -38,7 +38,7 @@ void update_shifts(const AlignmentHandle& aln,
                    ShiftMap& shifts)
 {
   ConstSequenceHandle s1=aln.GetSequence(0);  
-  if (s1.GetGaplessString()!=ref_seq.GetString()) {
+  if (!Match(s1.GetGaplessString(), ref_seq.GetString())) {
     throw IntegrityError("The gapless version of '"+s1.GetString()+
                          "' is not identical to the reference sequence.");
   }
diff --git a/modules/seq/base/doc/seq.rst b/modules/seq/base/doc/seq.rst
index f42a2f574..64ac6d3a7 100644
--- a/modules/seq/base/doc/seq.rst
+++ b/modules/seq/base/doc/seq.rst
@@ -184,7 +184,7 @@ The SequenceHandle
 
   .. attribute:: offset
   
-    Shorthand for :meth:`GetSequenceOffset`/:meth:`SetSequenceOffset`
+    Shorthand for :meth:`GetOffset`/:meth:`SetOffset`
 
   .. method:: __len__()
     
@@ -194,6 +194,16 @@ The SequenceHandle
 
     Returns the sequence as a string.
 
+.. function:: Match(s1, s2)
+
+  :param s1: The first sequence
+  :param s2: The second sequence
+  :type s1: :class:`SequenceHandle`, or :class:`str`
+  :type s2: :class:`SequenceHandle`, or :class:`str`
+
+  Check whether the two sequences s1 and s2 match. This function performs are
+  case-insensitive comparison of the two sequences. The character  'X' is
+  interpreted as a wildcard character that always matches the other sequence.
 
 The SequenceList    
 --------------------------------------------------------------------------------
diff --git a/modules/seq/base/pymod/export_sequence.cc b/modules/seq/base/pymod/export_sequence.cc
index c72fe3ae8..6db4d304e 100644
--- a/modules/seq/base/pymod/export_sequence.cc
+++ b/modules/seq/base/pymod/export_sequence.cc
@@ -50,6 +50,8 @@ void (AlignmentHandle::*attach_view_b)(int, const mol::EntityView&,
 SequenceHandle (*seq_from_chain_a)(const String&,const mol::ChainView&)=&SequenceFromChain;
 SequenceHandle (*seq_from_chain_b)(const String&,const mol::ChainHandle&)=&SequenceFromChain;
 
+bool (*m1)(const String&, const String&)=&Match;
+bool (*m2)(const ConstSequenceHandle&, const ConstSequenceHandle&)=&Match;
 template <typename T>
 T do_slice(const T& t, slice& sl) {
   int start=0, end=t.GetCount();
@@ -270,6 +272,8 @@ void export_sequence()
     .add_property("offset", &SequenceHandle::GetOffset,
                   &SequenceHandle::SetOffset)
   ;
+  def("Match", m1);
+  def("Match", m2);
 
   implicitly_convertible<SequenceHandle, ConstSequenceHandle>();
   
diff --git a/modules/seq/base/src/sequence_handle.cc b/modules/seq/base/src/sequence_handle.cc
index caeccfaec..62539237a 100644
--- a/modules/seq/base/src/sequence_handle.cc
+++ b/modules/seq/base/src/sequence_handle.cc
@@ -407,4 +407,25 @@ char SequenceHandle::operator[](size_t index) const
   return this->GetString()[index];
 }
 
+
+bool Match(const ConstSequenceHandle& s1, const ConstSequenceHandle& s2)
+{
+  return Match(s1.GetString(), s2.GetString());
+}
+
+bool Match(const String& s1, const String& s2)
+{
+  if (s1.size()!=s2.size()) {
+    return false;
+  }
+  for (size_t i=0; i<s1.size(); ++i) {
+    char c1=s1[i];
+    char c2=s2[i];
+    if (toupper(c1)!=toupper(c2) && toupper(c1)!='X' && toupper(c2)!='X') {
+      return false;
+    }
+  }
+  return true;
+}
+
 }}
diff --git a/modules/seq/base/src/sequence_handle.hh b/modules/seq/base/src/sequence_handle.hh
index a84e1b18d..3b3f1b1ab 100644
--- a/modules/seq/base/src/sequence_handle.hh
+++ b/modules/seq/base/src/sequence_handle.hh
@@ -304,6 +304,12 @@ SequenceHandle DLLEXPORT_OST_SEQ SequenceFromInfo(info::InfoGroup& group);
 
 DLLEXPORT_OST_SEQ std::ostream& operator<<(std::ostream& os, 
                                            const ConstSequenceHandle& sequence);
+
+bool DLLEXPORT_OST_SEQ Match(const ConstSequenceHandle& s1,
+                             const ConstSequenceHandle& s2);
+
+bool DLLEXPORT_OST_SEQ Match(const String& s1,
+                             const String& s2);
 }}
 
 #endif
diff --git a/modules/seq/base/tests/test_sequence.cc b/modules/seq/base/tests/test_sequence.cc
index 87669a644..3c4184435 100644
--- a/modules/seq/base/tests/test_sequence.cc
+++ b/modules/seq/base/tests/test_sequence.cc
@@ -65,6 +65,15 @@ BOOST_AUTO_TEST_CASE(seq_triv)
   BOOST_CHECK_THROW(s.SetString("1"), InvalidSequence);
 }
 
+BOOST_AUTO_TEST_CASE(match)
+{
+  BOOST_CHECK(Match("abcdefghijkl", "ABcDeFgHiJkL"));
+  BOOST_CHECK(Match("abcxXxxxxjXl", "ABcDeFgHiJkL"));
+  BOOST_CHECK(Match("ABcDeFgHiJkL", "ABcDeFXxiJxL"));
+  BOOST_CHECK(!Match("abc", "abcd"));
+  BOOST_CHECK(!Match("abc", "aby"));
+}
+
 BOOST_AUTO_TEST_CASE(seq_throw_invalid)
 {
   SequenceHandle s;
-- 
GitLab