diff --git a/modules/seq/base/doc/seq.rst b/modules/seq/base/doc/seq.rst index 042f1d09c9811d0b7f42650576ec604cfa9d8fce..e5a89a5f1ac2099fc0789905107e501681a7d007 100644 --- a/modules/seq/base/doc/seq.rst +++ b/modules/seq/base/doc/seq.rst @@ -30,7 +30,7 @@ Sequence Offset When using sequences and structures together, often the start of the structure and the beginning of the sequence do not fall together. In the following case, -the alignment of sequences B and C only covers a subpart of structure A:: +the alignment of sequences B and C only covers a subsequence of structure A:: A acefghiklmnpqrstuvwy B ghiklm @@ -167,6 +167,10 @@ The SequenceHandle Returns a string version of this sequence with all hyphens removed. Also available as the property :attr:`gapless_string`. + .. method:: Normalise() + + Remove '-' and '.' as gaps from the sequence and make it all upper case. + Works in place. .. method:: SetName() @@ -205,7 +209,7 @@ The SequenceHandle Check whether the two sequences s1 and s2 match. This function performs are case-insensitive comparison of the two sequences. The character 'X' is - interpreted as a wildcard character that always matches the other sequence. + interpreted as a wild card character that always matches the other sequence. The SequenceList -------------------------------------------------------------------------------- @@ -220,7 +224,7 @@ The AlignmentHandle -------------------------------------------------------------------------------- The :class:`AlignmentHandle` represents a list of aligned sequences. In -constrast to :class:`SequenceList`, an alignment requires all sequences to be of +contrast to :class:`SequenceList`, an alignment requires all sequences to be of the same length. New instances of alignments are created with :func:`CreateAlignment` and :func:`AlignmentFromSequenceList`. @@ -431,4 +435,3 @@ an alignment: .. method:: RemoveSequence(index) Remove sequence at *index* from the alignment. - diff --git a/modules/seq/base/pymod/export_sequence.cc b/modules/seq/base/pymod/export_sequence.cc index 6cdf081cd35740fa1f4a543ef341e37f3e0974f6..1919290e7f90f9cee97b19fcdcecccc007090c64 100644 --- a/modules/seq/base/pymod/export_sequence.cc +++ b/modules/seq/base/pymod/export_sequence.cc @@ -300,6 +300,7 @@ void export_sequence() .def("AttachView", attach_two) .def("Append", &SequenceHandle::Append) .def("SetString", &SequenceHandle::SetString) + .def("Normalise", &SequenceHandle::Normalise) .add_property("string", make_function(&SequenceHandle::GetString, return_value_policy<copy_const_reference>()), diff --git a/modules/seq/base/src/impl/sequence_impl.cc b/modules/seq/base/src/impl/sequence_impl.cc index 733eaf1b8e17f765581a4cb1e527beb2bf3483ef..9864fe8e5f375c3434b7050e5eca5d3ebd99da2d 100644 --- a/modules/seq/base/src/impl/sequence_impl.cc +++ b/modules/seq/base/src/impl/sequence_impl.cc @@ -331,6 +331,27 @@ void SequenceImpl::Replace(const String& str,int start, int end) this->ShiftsFromSequence(); } +void SequenceImpl::Normalise() { + const char gaps[]={'.', '-', '\0'}; + bool is_gap; + String new_seq_string = ""; + int n_gaps = sizeof(gaps) / sizeof(gaps[0]); + for (size_t i=0; i<seq_string_.length(); ++i) { + is_gap = false; + for (int j=0; j<n_gaps; ++j) { + if (seq_string_[i]==gaps[j]) { + is_gap = true; + break; + } + } + if (is_gap == false) { + new_seq_string = new_seq_string + char(toupper(seq_string_[i])); + } + } + seq_string_ = new_seq_string; + this->ShiftsFromSequence(); +} + void SequenceImpl::ShiftRegion(int start, int end, int amount) { if(start > end || start + amount < 0 || end + amount > this->GetLength()){ diff --git a/modules/seq/base/src/impl/sequence_impl.hh b/modules/seq/base/src/impl/sequence_impl.hh index fecad2ca921599ecd85c58ee33e6c184bdbd5a52..ccb64b7754e9e20e2cd6683f612dbe533f86423e 100644 --- a/modules/seq/base/src/impl/sequence_impl.hh +++ b/modules/seq/base/src/impl/sequence_impl.hh @@ -81,6 +81,9 @@ public: /// \brief Set sequence String void SetString(const String& seq); + /// \brief Remove everything 'unusual' from sequence + void Normalise(); + /// \brief replace substring starting from start to end void Replace(const String& str,int start, int end); diff --git a/modules/seq/base/src/sequence_handle.cc b/modules/seq/base/src/sequence_handle.cc index 627ebdc36ee0501a7ff5fcf2926af7c199dae592..f57a624e7634410910b4b479debcd5db6b25a4cd 100644 --- a/modules/seq/base/src/sequence_handle.cc +++ b/modules/seq/base/src/sequence_handle.cc @@ -295,6 +295,11 @@ String SequenceHandle::GetGaplessString() const return Impl()->GetGaplessString(); } +void SequenceHandle::Normalise() { + this->CheckValidity(); + Impl()->Normalise(); +} + int SequenceHandle::GetOffset() const { this->CheckValidity(); diff --git a/modules/seq/base/src/sequence_handle.hh b/modules/seq/base/src/sequence_handle.hh index d7247cbd10abca25bdc9fc3c00b60dac7c6e9915..cf9aa732d1d2a85c154e0dd774790a724ac0e2b2 100644 --- a/modules/seq/base/src/sequence_handle.hh +++ b/modules/seq/base/src/sequence_handle.hh @@ -123,7 +123,7 @@ public: bool HasAttachedView() const; const String& GetRole() const; - + bool operator==(const ConstSequenceHandle& rhs) const; bool operator!=(const ConstSequenceHandle& rhs) const; @@ -231,7 +231,11 @@ public: /// \brief get attached view. may be an invalid entity view /// \sa SequenceHandle::AttachView(const mol::EntityView&, const String&) mol::EntityView GetAttachedView() const; - + + /// \brief remove '.', '-' as gaps and make sequence all-uppercase + /// Changes happen in place. + void Normalise(); + /// \brief create copy sequence /// The newly created sequence has the same attached view. SequenceHandle Copy() const; diff --git a/modules/seq/base/tests/test_seq.py b/modules/seq/base/tests/test_seq.py index bfd80d7bf70461211962f6b36c2c603584b4fe4c..f202fe522bf19367af6f9ca70c86837d41869cf1 100644 --- a/modules/seq/base/tests/test_seq.py +++ b/modules/seq/base/tests/test_seq.py @@ -175,6 +175,16 @@ class TestSeq(unittest.TestCase): self.assertEqual(string_a, 'BDFH') self.assertEqual(string_b, 'BDFH') + def testNormalise(self): + seq_a=seq.CreateSequence("A", "B-D-FGH") + self.assertEqual("B-D-FGH", seq_a.GetString()) + seq_a.Normalise() + self.assertEqual("BDFGH", seq_a.GetString()) + seq_a=seq.CreateSequence("A", "b.d-fgh") + self.assertEqual("b.d-fgh", seq_a.GetString()) + seq_a.Normalise() + self.assertEqual("BDFGH", seq_a.GetString()) + if __name__== '__main__': from ost import testutils testutils.RunTests() diff --git a/modules/seq/base/tests/test_sequence.cc b/modules/seq/base/tests/test_sequence.cc index 462747784933dcc25dd36689c3a0d0df5a5f2de2..309939db0d715aa4b8dce5256bac8176117bc07a 100644 --- a/modules/seq/base/tests/test_sequence.cc +++ b/modules/seq/base/tests/test_sequence.cc @@ -106,6 +106,26 @@ BOOST_AUTO_TEST_CASE(seq_string) BOOST_CHECK_EQUAL(s.GetGaplessString(),""); } +BOOST_AUTO_TEST_CASE(seq_normalise) +{ + SequenceHandle s=CreateSequence("S1", "abfcdadeaf"); + BOOST_CHECK_EQUAL(s.GetString(),"abfcdadeaf"); + s.Normalise(); + BOOST_CHECK_EQUAL(s.GetString(),"ABFCDADEAF"); + s=CreateSequence("S1", ".afc..de.f"); + BOOST_CHECK_EQUAL(s.GetString(),".afc..de.f"); + s.Normalise(); + BOOST_CHECK_EQUAL(s.GetString(),"AFCDEF"); + s=CreateSequence("S1", "-afc--de-f"); + BOOST_CHECK_EQUAL(s.GetString(),"-afc--de-f"); + s.Normalise(); + BOOST_CHECK_EQUAL(s.GetString(),"AFCDEF"); + s=CreateSequence("S1", ".afc-.de-f"); + BOOST_CHECK_EQUAL(s.GetString(),".afc-.de-f"); + s.Normalise(); + BOOST_CHECK_EQUAL(s.GetString(),"AFCDEF"); +} + BOOST_AUTO_TEST_CASE(seq_onelettercode) { SequenceHandle s=CreateSequence("S1", "abfcdadeaf");