diff --git a/modules/seq/alg/src/merge_pairwise_alignments.cc b/modules/seq/alg/src/merge_pairwise_alignments.cc index 6bba7edd06e48c707e815b0eded56b1226a32fe5..5636f32c97af7b96297663f75d6456abf954a4d4 100644 --- a/modules/seq/alg/src/merge_pairwise_alignments.cc +++ b/modules/seq/alg/src/merge_pairwise_alignments.cc @@ -38,7 +38,7 @@ void update_shifts(const AlignmentHandle& aln, ShiftMap& shifts) { ConstSequenceHandle s1=aln.GetSequence(0); - if (s1.GetGaplessString()!=ref_seq.GetString()) { + if (!Match(s1.GetGaplessString(), ref_seq.GetString())) { throw IntegrityError("The gapless version of '"+s1.GetString()+ "' is not identical to the reference sequence."); } diff --git a/modules/seq/base/doc/seq.rst b/modules/seq/base/doc/seq.rst index f42a2f574c7afff92265932553bbe3c6d85f96b9..64ac6d3a7d974446f95657f30748d3ddc508f325 100644 --- a/modules/seq/base/doc/seq.rst +++ b/modules/seq/base/doc/seq.rst @@ -184,7 +184,7 @@ The SequenceHandle .. attribute:: offset - Shorthand for :meth:`GetSequenceOffset`/:meth:`SetSequenceOffset` + Shorthand for :meth:`GetOffset`/:meth:`SetOffset` .. method:: __len__() @@ -194,6 +194,16 @@ The SequenceHandle Returns the sequence as a string. +.. function:: Match(s1, s2) + + :param s1: The first sequence + :param s2: The second sequence + :type s1: :class:`SequenceHandle`, or :class:`str` + :type s2: :class:`SequenceHandle`, or :class:`str` + + Check whether the two sequences s1 and s2 match. This function performs are + case-insensitive comparison of the two sequences. The character 'X' is + interpreted as a wildcard character that always matches the other sequence. The SequenceList -------------------------------------------------------------------------------- diff --git a/modules/seq/base/pymod/export_sequence.cc b/modules/seq/base/pymod/export_sequence.cc index c72fe3ae8e65f0f65d78e6751d57655b61711eb4..6db4d304e88f968afd3d51ef1f63f79c940dd1a6 100644 --- a/modules/seq/base/pymod/export_sequence.cc +++ b/modules/seq/base/pymod/export_sequence.cc @@ -50,6 +50,8 @@ void (AlignmentHandle::*attach_view_b)(int, const mol::EntityView&, SequenceHandle (*seq_from_chain_a)(const String&,const mol::ChainView&)=&SequenceFromChain; SequenceHandle (*seq_from_chain_b)(const String&,const mol::ChainHandle&)=&SequenceFromChain; +bool (*m1)(const String&, const String&)=&Match; +bool (*m2)(const ConstSequenceHandle&, const ConstSequenceHandle&)=&Match; template <typename T> T do_slice(const T& t, slice& sl) { int start=0, end=t.GetCount(); @@ -270,6 +272,8 @@ void export_sequence() .add_property("offset", &SequenceHandle::GetOffset, &SequenceHandle::SetOffset) ; + def("Match", m1); + def("Match", m2); implicitly_convertible<SequenceHandle, ConstSequenceHandle>(); diff --git a/modules/seq/base/src/sequence_handle.cc b/modules/seq/base/src/sequence_handle.cc index caeccfaececda45b3f61efc37f44201385607523..62539237a9a7dfe0de1105bb647f1dd5b08f2a40 100644 --- a/modules/seq/base/src/sequence_handle.cc +++ b/modules/seq/base/src/sequence_handle.cc @@ -407,4 +407,25 @@ char SequenceHandle::operator[](size_t index) const return this->GetString()[index]; } + +bool Match(const ConstSequenceHandle& s1, const ConstSequenceHandle& s2) +{ + return Match(s1.GetString(), s2.GetString()); +} + +bool Match(const String& s1, const String& s2) +{ + if (s1.size()!=s2.size()) { + return false; + } + for (size_t i=0; i<s1.size(); ++i) { + char c1=s1[i]; + char c2=s2[i]; + if (toupper(c1)!=toupper(c2) && toupper(c1)!='X' && toupper(c2)!='X') { + return false; + } + } + return true; +} + }} diff --git a/modules/seq/base/src/sequence_handle.hh b/modules/seq/base/src/sequence_handle.hh index a84e1b18db19cef0b35a4e2cdf1d8f954d458fec..3b3f1b1abf471f151cf79179248150960514fcc3 100644 --- a/modules/seq/base/src/sequence_handle.hh +++ b/modules/seq/base/src/sequence_handle.hh @@ -304,6 +304,12 @@ SequenceHandle DLLEXPORT_OST_SEQ SequenceFromInfo(info::InfoGroup& group); DLLEXPORT_OST_SEQ std::ostream& operator<<(std::ostream& os, const ConstSequenceHandle& sequence); + +bool DLLEXPORT_OST_SEQ Match(const ConstSequenceHandle& s1, + const ConstSequenceHandle& s2); + +bool DLLEXPORT_OST_SEQ Match(const String& s1, + const String& s2); }} #endif diff --git a/modules/seq/base/tests/test_sequence.cc b/modules/seq/base/tests/test_sequence.cc index 87669a644335e9e101c4c638d32b51e51344708d..3c418443503c2d94552e2571f85c9037d2821df7 100644 --- a/modules/seq/base/tests/test_sequence.cc +++ b/modules/seq/base/tests/test_sequence.cc @@ -65,6 +65,15 @@ BOOST_AUTO_TEST_CASE(seq_triv) BOOST_CHECK_THROW(s.SetString("1"), InvalidSequence); } +BOOST_AUTO_TEST_CASE(match) +{ + BOOST_CHECK(Match("abcdefghijkl", "ABcDeFgHiJkL")); + BOOST_CHECK(Match("abcxXxxxxjXl", "ABcDeFgHiJkL")); + BOOST_CHECK(Match("ABcDeFgHiJkL", "ABcDeFXxiJxL")); + BOOST_CHECK(!Match("abc", "abcd")); + BOOST_CHECK(!Match("abc", "aby")); +} + BOOST_AUTO_TEST_CASE(seq_throw_invalid) { SequenceHandle s;