diff --git a/modules/seq/base/doc/seq.rst b/modules/seq/base/doc/seq.rst index 4c3e2bf17517818a8de3fac10fe02f8088191429..3d112dfa92418658a73b9cf5b45eb3a4273954e5 100644 --- a/modules/seq/base/doc/seq.rst +++ b/modules/seq/base/doc/seq.rst @@ -448,11 +448,11 @@ residue. It mainly contains: .. method:: GetFreq(aa) - :param aa: One letter code of standard amino acid + :return: Frequency of *aa* + :rtype: :class:`float` + :param aa: One letter code of standard amino acid :type aa: :class:`str` - :returns: Frequency of aa - .. method:: SetFreq(aa,freq) :param aa: One letter code of standard amino acid @@ -460,10 +460,21 @@ residue. It mainly contains: :type aa: :class:`str` :type freq: :class:`float` + .. method:: GetScore(other, null_model) + + :return: Column score as in Soeding-2005 paper. + :rtype: :class:`float` + :param other: Other column to compute score with. + :type other: :class:`ProfileColumn` + :param null_model: Null model to use for weighting. + :type null_model: :class:`ProfileColumn` + .. attribute:: entropy Shannon entropy based on the columns amino acid frequencies + :type: :class:`float` + .. class:: ProfileHandle @@ -492,9 +503,22 @@ residue. It mainly contains: (:attr:`null_model` is copied) :rtype: :class:`ProfileHandle` - :raises: :exc:`~exceptions.Error` if if *to* <= *from* or + :raises: :exc:`~exceptions.Error` if *to* <= *from* or *to* > :meth:`__len__`. + .. method:: GetAverageScore(other, offset=0) + + :return: Average column score between *other.columns[i]* and this object's + *columns[i+offset]* for *i* in [*0, len(other)-1*] using this + object's :attr:`null_model`. See :meth:`ProfileColumn.GetScore`. + :rtype: :class:`float` + :param other: Other profile to compare with. + :type other: :class:`ProfileHandle` + :param offset: Start comparison at column *offset* of this object. + :type offset: :class:`int` + + :raises: :exc:`~exceptions.Error` if any *columns[i+offset]* out of bounds. + .. method:: SetSequence(sequence) Sets :attr:`sequence`. @@ -509,18 +533,27 @@ residue. It mainly contains: Note: user must enforce consistency between sequence length and number of profile columns. + :type: :class:`str` + .. attribute:: columns Iterable columns of the profile + :type: :class:`ProfileColumnList` + .. attribute:: null_model Null model of the profile + :type: :class:`ProfileColumn` + .. attribute:: avg_entropy Average entropy of all the columns + :type: :class:`float` + + .. class:: ProfileDB A simple database to gather :class:`ProfileHandle` objects. It is possible diff --git a/modules/seq/base/pymod/export_profile_handle.cc b/modules/seq/base/pymod/export_profile_handle.cc index e563960f424e448863bf17f7d52d10db8b2875e5..111442f6c1a072f0a3f625cdcb986bd8c5086322 100644 --- a/modules/seq/base/pymod/export_profile_handle.cc +++ b/modules/seq/base/pymod/export_profile_handle.cc @@ -48,6 +48,8 @@ void export_profile_handle() .add_property("entropy", &ProfileColumn::GetEntropy) .def("GetFreq", &ProfileColumn::GetFreq) .def("SetFreq", &ProfileColumn::SetFreq) + .def("GetScore", &ProfileColumn::GetScore, + (arg("other"), arg("null_model"))) .def("BLOSUMNullModel", &ProfileColumn::BLOSUMNullModel).staticmethod("BLOSUMNullModel") ; @@ -62,6 +64,8 @@ void export_profile_handle() .def("Extract", &ProfileHandle::Extract) .def("SetNullModel", &ProfileHandle::SetNullModel) .def("SetSequence", &ProfileHandle::SetSequence) + .def("GetAverageScore", &ProfileHandle::GetAverageScore, + (arg("other"), arg("offset")=0)) .add_property("null_model", make_function(&ProfileHandle::GetNullModel, return_value_policy<copy_const_reference>())) .add_property("columns", diff --git a/modules/seq/base/src/profile_handle.cc b/modules/seq/base/src/profile_handle.cc index 0feebe958ba8aacaa9bdb690cc6c17d76a4e41c9..5e9cf536f66a6b9ec2c7711bacaa589fc3b4b48c 100644 --- a/modules/seq/base/src/profile_handle.cc +++ b/modules/seq/base/src/profile_handle.cc @@ -97,6 +97,15 @@ Real ProfileColumn::GetEntropy() const { return entropy; } +Real ProfileColumn::GetScore(const ProfileColumn& other, + const ProfileColumn& null_model) const { + Real summed_col_score = 0.001; // to avoid zero score + for (uint i = 0; i < 20; ++i) { + summed_col_score += freq_[i] * other.freq_[i] / null_model.freq_[i]; + } + return std::log(summed_col_score); +} + ////////////////////////////////////////////////////////////////////////////// // ProfileHandle ////////////////////////////////////////////////////////////////////////////// @@ -132,6 +141,22 @@ Real ProfileHandle::GetAverageEntropy() const { return (n > 0) ? n_eff/n : 0.0; } +Real ProfileHandle::GetAverageScore(const ProfileHandle& other, + uint offset) const { + // check offsets + const uint other_size = other.size(); + if (offset > this->size() || offset + other_size > this->size()) { + throw Error("Invalid profile size / offset when computing score!"); + } + // sum them up + Real sum = 0; + for (uint i = 0; i < other_size; ++i) { + sum += columns_[offset + i].GetScore(other[i], null_model_); + } + if (other_size > 0) return sum / other_size; + else return sum; +} + ////////////////////////////////////////////////////////////////////////////// // ProfileDB ////////////////////////////////////////////////////////////////////////////// diff --git a/modules/seq/base/src/profile_handle.hh b/modules/seq/base/src/profile_handle.hh index 0d67f8c696a0995133473c2da7a979d8b84bd6bd..e1ce34b80d523581fe9c3b407ec093bf7e31bc8c 100644 --- a/modules/seq/base/src/profile_handle.hh +++ b/modules/seq/base/src/profile_handle.hh @@ -86,6 +86,10 @@ public: /// \brief Get entropy for this column. Real GetEntropy() const; + /// \brief Get column score as in Soeding-2005 + Real GetScore(const ProfileColumn& other, + const ProfileColumn& null_model) const; + // functions to feed streams with limited accuracy of internal data // not intended for python export @@ -145,6 +149,11 @@ public: /// \brief Compute average entropy over all columns. Real GetAverageEntropy() const; + /// \brief Compute score comparing columns other[i] and this->at(i+offset) + /// Column score as in Soeding-2005, null model of this object used, + /// result normalized by other.size() + Real GetAverageScore(const ProfileHandle& other, uint offset = 0) const; + // some functions to make it behave like a vector void clear() { seq_ = ""; columns_.clear(); } diff --git a/modules/seq/base/tests/test_profile.cc b/modules/seq/base/tests/test_profile.cc index ef2292bc74f0d981a0cf658a93b6c8ecf129bbec..fb5ef983a5dbf433139b0004764c253581ccc86e 100644 --- a/modules/seq/base/tests/test_profile.cc +++ b/modules/seq/base/tests/test_profile.cc @@ -121,8 +121,15 @@ BOOST_AUTO_TEST_CASE(profile_handle) BOOST_CHECK_CLOSE(prof[3].GetEntropy(), entropy3, Real(1e-3)); BOOST_CHECK_CLOSE(prof.GetAverageEntropy(), avg_entropy, Real(1e-3)); - // extract parts + // check scoring (scores precomputed to make sure they don't randomly change!) ProfileHandlePtr pp = prof.Extract(1, 4); + BOOST_CHECK_CLOSE(prof.GetAverageScore(*pp, 0), -1.81746, Real(1e-3)); + BOOST_CHECK_CLOSE(prof.GetAverageScore(*pp, 1), 2.8012, Real(1e-3)); + BOOST_CHECK_CLOSE(prof.GetAverageScore(*pp, 2), -4.12038, Real(1e-3)); + BOOST_CHECK_THROW(prof.GetAverageScore(*pp, -1), ost::Error); + BOOST_CHECK_THROW(prof.GetAverageScore(*pp, 3), ost::Error); + + // extract parts avg_entropy = (entropy1 + entropy2 + entropy3)/3; BOOST_CHECK_EQUAL(pp->size(), size_t(3)); BOOST_CHECK_EQUAL(pp->GetSequence(), "RSP"); @@ -138,6 +145,10 @@ BOOST_AUTO_TEST_CASE(profile_handle) BOOST_CHECK_EQUAL(pp->size(), size_t(2)); BOOST_CHECK_EQUAL(pp->GetSequence(), "PP"); BOOST_CHECK_CLOSE(pp->GetAverageEntropy(), entropy3, Real(1e-3)); + + BOOST_CHECK_THROW(prof.Extract(3, 3), ost::Error); + BOOST_CHECK_THROW(prof.Extract(-1, 3), ost::Error); + BOOST_CHECK_THROW(prof.Extract(3, 6), ost::Error); }