Skip to content
Snippets Groups Projects
Commit b1207558 authored by Marco Biasini's avatar Marco Biasini
Browse files

Shannon entropy calculation for sequence alignments

parent ae7f70c6
Branches
Tags
No related merge requests found
......@@ -118,3 +118,18 @@
:param gap_open: The gap opening penalty. Must be a negative number
:param gap_ext: The gap extension penalty. Must be a negative number
:returns: best-scoring alignment of *seq1* and *seq2*.
.. function:: ShannonEntropy(aln, ignore_gaps=True)
Returns the per-column Shannon entropies of the alignment. The entropy
describes how conserved a certain column in the alignment is . The higher
the entropy is, the less conserved the column. For column with no amino
aids, the entropy value is set to NAN
:param aln: multiple sequence alignment
:type aln: :class:`~ost.seq.AlignmentHandle`
:param ignore_gaps: whether to ignore gaps in the column.
:type ignore_gaps: bool
:returns: list of column entropies
......@@ -26,6 +26,7 @@
#include <ost/seq/alg/subst_weight_matrix.hh>
#include <ost/seq/alg/local_align.hh>
#include <ost/seq/alg/global_align.hh>
#include <ost/seq/alg/entropy.hh>
using namespace boost::python;
using namespace ost::seq;
using namespace ost::seq::alg;
......@@ -58,5 +59,6 @@ BOOST_PYTHON_MODULE(_ost_seq_alg)
arg("gap_open")=-5, arg("gap_ext")=-2));
def("GlobalAlign", &GlobalAlign,(arg("seq1"),arg("seq2"),arg("subst_weight"),
arg("gap_open")=-5, arg("gap_ext")=-2));
def("ShannonEntropy", &ShannonEntropy, (arg("aln"), arg("ignore_gaps")=true));
}
......@@ -4,6 +4,7 @@ module_config.hh
ins_del.hh
subst_weight_matrix.hh
alignment_opts.hh
entropy.hh
merge_pairwise_alignments.hh
conservation.hh
local_align.hh
......@@ -14,6 +15,7 @@ set(OST_SEQ_ALG_SOURCES
merge_pairwise_alignments.cc
local_align.cc
global_align.cc
entropy.cc
sequence_identity.cc
ins_del.cc
conservation.cc
......
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2011 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#include <limits>
#include "entropy.hh"
#include <ost/seq/aligned_column.hh>
#include <ost/seq/aligned_column_iterator.hh>
namespace ost { namespace seq { namespace alg {
int OneLetterCodeToIndex(char olc, bool ignore_gaps)
{
if (olc >= 'A' && olc <= 'Z') return (olc-'A');
if (olc >= 'a' && olc <= 'z') return (olc-'a');
if (olc == '-' && !ignore_gaps) return 26;
return -1;
}
std::vector<Real> ShannonEntropy(const AlignmentHandle& aln, bool ignore_gaps) {
std::vector<Real> entropies(aln.GetLength(), 0);
int aa_counts[27];
int k = 0;
for (AlignmentHandle::iterator
i = aln.begin(), e = aln.end(); i != e; ++i, ++k) {
const AlignedColumn& col = *i;
memset(aa_counts, 0, sizeof(int)*27);
int counts = 0;
for (int j=0; j<aln.GetCount(); ++j) {
int index = OneLetterCodeToIndex(col[j], ignore_gaps);
if (index==-1) {
continue;
}
counts += 1;
aa_counts[index] += 1;
}
Real entropy = 0.0;
for (int j = 0; j < 27; ++j) {
if (aa_counts[j]) {
Real freq = static_cast<Real>(aa_counts[j])/counts;
entropy-= log(freq)*freq;
}
}
if (counts)
entropies[k] = entropy;
else
entropies[k] = std::numeric_limits<Real>::quiet_NaN();
}
return entropies;
}
}}}
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2011 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#ifndef OST_SEQ_ALG_ENTROPY_HH
#define OST_SEQ_ALG_ENTROPY_HH
#include <vector>
#include <ost/seq/alignment_handle.hh>
#include "module_config.hh"
namespace ost { namespace seq { namespace alg {
/// \brief calculates the Shannon entropy for each column in the alignment
std::vector<Real> DLLEXPORT_OST_SEQ_ALG ShannonEntropy(const AlignmentHandle& aln,
bool ignore_gaps=true);
}}}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment