Skip to content
Snippets Groups Projects
Commit 565746ed authored by Gerardo Tauriello's avatar Gerardo Tauriello
Browse files

SCHWED-1549: Implemented profile-profile scoring.

parent 3d719b4b
No related branches found
No related tags found
No related merge requests found
...@@ -448,11 +448,11 @@ residue. It mainly contains: ...@@ -448,11 +448,11 @@ residue. It mainly contains:
.. method:: GetFreq(aa) .. method:: GetFreq(aa)
:param aa: One letter code of standard amino acid :return: Frequency of *aa*
:rtype: :class:`float`
:param aa: One letter code of standard amino acid
:type aa: :class:`str` :type aa: :class:`str`
:returns: Frequency of aa
.. method:: SetFreq(aa,freq) .. method:: SetFreq(aa,freq)
:param aa: One letter code of standard amino acid :param aa: One letter code of standard amino acid
...@@ -460,10 +460,21 @@ residue. It mainly contains: ...@@ -460,10 +460,21 @@ residue. It mainly contains:
:type aa: :class:`str` :type aa: :class:`str`
:type freq: :class:`float` :type freq: :class:`float`
.. method:: GetScore(other, null_model)
:return: Column score as in Soeding-2005 paper.
:rtype: :class:`float`
:param other: Other column to compute score with.
:type other: :class:`ProfileColumn`
:param null_model: Null model to use for weighting.
:type null_model: :class:`ProfileColumn`
.. attribute:: entropy .. attribute:: entropy
Shannon entropy based on the columns amino acid frequencies Shannon entropy based on the columns amino acid frequencies
:type: :class:`float`
.. class:: ProfileHandle .. class:: ProfileHandle
...@@ -492,9 +503,22 @@ residue. It mainly contains: ...@@ -492,9 +503,22 @@ residue. It mainly contains:
(:attr:`null_model` is copied) (:attr:`null_model` is copied)
:rtype: :class:`ProfileHandle` :rtype: :class:`ProfileHandle`
:raises: :exc:`~exceptions.Error` if if *to* <= *from* or :raises: :exc:`~exceptions.Error` if *to* <= *from* or
*to* > :meth:`__len__`. *to* > :meth:`__len__`.
.. method:: GetAverageScore(other, offset=0)
:return: Average column score between *other.columns[i]* and this object's
*columns[i+offset]* for *i* in [*0, len(other)-1*] using this
object's :attr:`null_model`. See :meth:`ProfileColumn.GetScore`.
:rtype: :class:`float`
:param other: Other profile to compare with.
:type other: :class:`ProfileHandle`
:param offset: Start comparison at column *offset* of this object.
:type offset: :class:`int`
:raises: :exc:`~exceptions.Error` if any *columns[i+offset]* out of bounds.
.. method:: SetSequence(sequence) .. method:: SetSequence(sequence)
Sets :attr:`sequence`. Sets :attr:`sequence`.
...@@ -509,18 +533,27 @@ residue. It mainly contains: ...@@ -509,18 +533,27 @@ residue. It mainly contains:
Note: user must enforce consistency between sequence length and number of Note: user must enforce consistency between sequence length and number of
profile columns. profile columns.
:type: :class:`str`
.. attribute:: columns .. attribute:: columns
Iterable columns of the profile Iterable columns of the profile
:type: :class:`ProfileColumnList`
.. attribute:: null_model .. attribute:: null_model
Null model of the profile Null model of the profile
:type: :class:`ProfileColumn`
.. attribute:: avg_entropy .. attribute:: avg_entropy
Average entropy of all the columns Average entropy of all the columns
:type: :class:`float`
.. class:: ProfileDB .. class:: ProfileDB
A simple database to gather :class:`ProfileHandle` objects. It is possible A simple database to gather :class:`ProfileHandle` objects. It is possible
......
...@@ -48,6 +48,8 @@ void export_profile_handle() ...@@ -48,6 +48,8 @@ void export_profile_handle()
.add_property("entropy", &ProfileColumn::GetEntropy) .add_property("entropy", &ProfileColumn::GetEntropy)
.def("GetFreq", &ProfileColumn::GetFreq) .def("GetFreq", &ProfileColumn::GetFreq)
.def("SetFreq", &ProfileColumn::SetFreq) .def("SetFreq", &ProfileColumn::SetFreq)
.def("GetScore", &ProfileColumn::GetScore,
(arg("other"), arg("null_model")))
.def("BLOSUMNullModel", .def("BLOSUMNullModel",
&ProfileColumn::BLOSUMNullModel).staticmethod("BLOSUMNullModel") &ProfileColumn::BLOSUMNullModel).staticmethod("BLOSUMNullModel")
; ;
...@@ -62,6 +64,8 @@ void export_profile_handle() ...@@ -62,6 +64,8 @@ void export_profile_handle()
.def("Extract", &ProfileHandle::Extract) .def("Extract", &ProfileHandle::Extract)
.def("SetNullModel", &ProfileHandle::SetNullModel) .def("SetNullModel", &ProfileHandle::SetNullModel)
.def("SetSequence", &ProfileHandle::SetSequence) .def("SetSequence", &ProfileHandle::SetSequence)
.def("GetAverageScore", &ProfileHandle::GetAverageScore,
(arg("other"), arg("offset")=0))
.add_property("null_model", make_function(&ProfileHandle::GetNullModel, .add_property("null_model", make_function(&ProfileHandle::GetNullModel,
return_value_policy<copy_const_reference>())) return_value_policy<copy_const_reference>()))
.add_property("columns", .add_property("columns",
......
...@@ -97,6 +97,15 @@ Real ProfileColumn::GetEntropy() const { ...@@ -97,6 +97,15 @@ Real ProfileColumn::GetEntropy() const {
return entropy; return entropy;
} }
Real ProfileColumn::GetScore(const ProfileColumn& other,
const ProfileColumn& null_model) const {
Real summed_col_score = 0.001; // to avoid zero score
for (uint i = 0; i < 20; ++i) {
summed_col_score += freq_[i] * other.freq_[i] / null_model.freq_[i];
}
return std::log(summed_col_score);
}
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// ProfileHandle // ProfileHandle
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -132,6 +141,22 @@ Real ProfileHandle::GetAverageEntropy() const { ...@@ -132,6 +141,22 @@ Real ProfileHandle::GetAverageEntropy() const {
return (n > 0) ? n_eff/n : 0.0; return (n > 0) ? n_eff/n : 0.0;
} }
Real ProfileHandle::GetAverageScore(const ProfileHandle& other,
uint offset) const {
// check offsets
const uint other_size = other.size();
if (offset > this->size() || offset + other_size > this->size()) {
throw Error("Invalid profile size / offset when computing score!");
}
// sum them up
Real sum = 0;
for (uint i = 0; i < other_size; ++i) {
sum += columns_[offset + i].GetScore(other[i], null_model_);
}
if (other_size > 0) return sum / other_size;
else return sum;
}
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// ProfileDB // ProfileDB
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
......
...@@ -86,6 +86,10 @@ public: ...@@ -86,6 +86,10 @@ public:
/// \brief Get entropy for this column. /// \brief Get entropy for this column.
Real GetEntropy() const; Real GetEntropy() const;
/// \brief Get column score as in Soeding-2005
Real GetScore(const ProfileColumn& other,
const ProfileColumn& null_model) const;
// functions to feed streams with limited accuracy of internal data // functions to feed streams with limited accuracy of internal data
// not intended for python export // not intended for python export
...@@ -145,6 +149,11 @@ public: ...@@ -145,6 +149,11 @@ public:
/// \brief Compute average entropy over all columns. /// \brief Compute average entropy over all columns.
Real GetAverageEntropy() const; Real GetAverageEntropy() const;
/// \brief Compute score comparing columns other[i] and this->at(i+offset)
/// Column score as in Soeding-2005, null model of this object used,
/// result normalized by other.size()
Real GetAverageScore(const ProfileHandle& other, uint offset = 0) const;
// some functions to make it behave like a vector // some functions to make it behave like a vector
void clear() { seq_ = ""; columns_.clear(); } void clear() { seq_ = ""; columns_.clear(); }
......
...@@ -121,8 +121,15 @@ BOOST_AUTO_TEST_CASE(profile_handle) ...@@ -121,8 +121,15 @@ BOOST_AUTO_TEST_CASE(profile_handle)
BOOST_CHECK_CLOSE(prof[3].GetEntropy(), entropy3, Real(1e-3)); BOOST_CHECK_CLOSE(prof[3].GetEntropy(), entropy3, Real(1e-3));
BOOST_CHECK_CLOSE(prof.GetAverageEntropy(), avg_entropy, Real(1e-3)); BOOST_CHECK_CLOSE(prof.GetAverageEntropy(), avg_entropy, Real(1e-3));
// extract parts // check scoring (scores precomputed to make sure they don't randomly change!)
ProfileHandlePtr pp = prof.Extract(1, 4); ProfileHandlePtr pp = prof.Extract(1, 4);
BOOST_CHECK_CLOSE(prof.GetAverageScore(*pp, 0), -1.81746, Real(1e-3));
BOOST_CHECK_CLOSE(prof.GetAverageScore(*pp, 1), 2.8012, Real(1e-3));
BOOST_CHECK_CLOSE(prof.GetAverageScore(*pp, 2), -4.12038, Real(1e-3));
BOOST_CHECK_THROW(prof.GetAverageScore(*pp, -1), ost::Error);
BOOST_CHECK_THROW(prof.GetAverageScore(*pp, 3), ost::Error);
// extract parts
avg_entropy = (entropy1 + entropy2 + entropy3)/3; avg_entropy = (entropy1 + entropy2 + entropy3)/3;
BOOST_CHECK_EQUAL(pp->size(), size_t(3)); BOOST_CHECK_EQUAL(pp->size(), size_t(3));
BOOST_CHECK_EQUAL(pp->GetSequence(), "RSP"); BOOST_CHECK_EQUAL(pp->GetSequence(), "RSP");
...@@ -138,6 +145,10 @@ BOOST_AUTO_TEST_CASE(profile_handle) ...@@ -138,6 +145,10 @@ BOOST_AUTO_TEST_CASE(profile_handle)
BOOST_CHECK_EQUAL(pp->size(), size_t(2)); BOOST_CHECK_EQUAL(pp->size(), size_t(2));
BOOST_CHECK_EQUAL(pp->GetSequence(), "PP"); BOOST_CHECK_EQUAL(pp->GetSequence(), "PP");
BOOST_CHECK_CLOSE(pp->GetAverageEntropy(), entropy3, Real(1e-3)); BOOST_CHECK_CLOSE(pp->GetAverageEntropy(), entropy3, Real(1e-3));
BOOST_CHECK_THROW(prof.Extract(3, 3), ost::Error);
BOOST_CHECK_THROW(prof.Extract(-1, 3), ost::Error);
BOOST_CHECK_THROW(prof.Extract(3, 6), ost::Error);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment