Skip to content
Snippets Groups Projects
Commit 422bd6ea authored by Studer Gabriel's avatar Studer Gabriel
Browse files

sequence similarity algorithm

parent 3410aec8
No related branches found
No related tags found
No related merge requests found
......@@ -21,6 +21,7 @@
#include <ost/seq/alg/merge_pairwise_alignments.hh>
#include <ost/seq/alg/sequence_identity.hh>
#include <ost/seq/alg/sequence_similarity.hh>
#include <ost/seq/alg/ins_del.hh>
#include <ost/seq/alg/conservation.hh>
#include <ost/seq/alg/subst_weight_matrix.hh>
......@@ -39,11 +40,18 @@ BOOST_PYTHON_MODULE(_ost_seq_alg)
.value("LONGER_SEQUENCE", RefMode::LONGER_SEQUENCE)
.export_values()
;
def("SequenceIdentity", &SequenceIdentity,
(arg("ref_mode")=RefMode::ALIGNMENT, arg("seq_a")=0, arg("seq_b")=1));
def("SequenceSimilarity", &SequenceSimilarity,
(arg("aln"),arg("subst_weight"),arg("normalize")=false,
arg("seq_a")=0,arg("seq_b")=1));
class_<AlignedRegionList>("AlignedRegionList", init<>())
.def(vector_indexing_suite<AlignedRegionList>())
;
class_<InsDel>("InsDel", init<const AlignmentHandle&, int, int>())
.def(init<const AlignmentHandle&>())
.def("GetDeletions", &InsDel::GetDeletions)
......@@ -54,6 +62,7 @@ BOOST_PYTHON_MODULE(_ost_seq_alg)
.def("GetWeight", &SubstWeightMatrix::GetWeight)
.def("SetWeight", &SubstWeightMatrix::SetWeight)
;
def("MergePairwiseAlignments", &MergePairwiseAlignments);
def("Conservation", &Conservation, (arg("assign")=true, arg("prop_name")="cons", arg("ignore_gap")=false));
def("LocalAlign", &LocalAlign, (arg("seq1"), arg("seq2"),arg("subst_weight"),
......
set(OST_SEQ_ALG_HEADERS
sequence_identity.hh
sequence_similarity.hh
module_config.hh
ins_del.hh
subst_weight_matrix.hh
......@@ -19,6 +20,7 @@ global_align.cc
semiglobal_align.cc
entropy.cc
sequence_identity.cc
sequence_similarity.cc
ins_del.cc
conservation.cc
)
......
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2015 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#include <ost/seq/aligned_column_iterator.hh>
#include <ost/seq/alg/sequence_similarity.hh>
namespace ost { namespace seq { namespace alg {
Real SequenceSimilarity(const AlignmentHandle& aln, SubstWeightMatrixPtr subst,
bool normalize, int seq_a, int seq_b){
int non_gap_count=0;
int similarity_score = 0;
const String& sa=aln.GetSequence(seq_a).GetString();
const String& sb=aln.GetSequence(seq_b).GetString();
for (String::const_iterator
i=sa.begin(), e=sa.end(), j=sb.begin(); i!=e; ++i, ++j) {
if (!((*i)== '-' || (*j)=='-')) {
++non_gap_count;
similarity_score += subst->GetWeight(*i,*j);
}
}
Real seq_sim = subst->GetMinWeight();
if(non_gap_count > 0){
seq_sim = static_cast<Real>(similarity_score)/non_gap_count;
}
if(normalize){
seq_sim -= subst->GetMinWeight();
Real weight_range = subst->GetMaxWeight() - subst->GetMinWeight();
if(weight_range > 0) seq_sim /= weight_range;
}
return seq_sim;
}
}}}
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2015 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#ifndef OST_SEQ_SEQUENCE_SIMILARITY_HH
#define OST_SEQ_SEQUENCE_SIMILARITY_HH
#include <ost/seq/alg/module_config.hh>
#include <ost/seq/alignment_handle.hh>
#include <ost/seq/alg/subst_weight_matrix.hh>
namespace ost { namespace seq { namespace alg {
/// \brief calculate sequence similarity for two sequences in an alignment
/// \param seq_a is the index of the first sequence
/// \param seq_b is the index of the second sequence
/// \param aln is the sequence alignment
/// \return sequence similarity
Real DLLEXPORT_OST_SEQ_ALG
SequenceSimilarity(const AlignmentHandle& aln, SubstWeightMatrixPtr subst,
bool normalize=false, int seq_a=0, int seq_b=1);
}}}
#endif
......@@ -47,7 +47,7 @@ public:
///
/// In order to get a useful substitution weight matrix, use SetWeight().
/// Alternatively you may want to load the substitution from an info group.
SubstWeightMatrix() {
SubstWeightMatrix():max_weight_(0),min_weight_(0) {
::memset(weights_, 0, sizeof(WeightType)*ALPHABET_SIZE*ALPHABET_SIZE);
}
......@@ -64,6 +64,12 @@ public:
return (i>=0 && i<ALPHABET_SIZE*ALPHABET_SIZE) ? weights_[i] : 0;
}
/// \brief Get the minimal substitution weight of the matrix
WeightType GetMinWeight() { return min_weight_; }
/// \brief Get the maximal substitution weight of the matrix
WeightType GetMaxWeight() { return max_weight_; }
/// \brief Set the substitution weight between two amino acids
///
/// The weight is only set if the amino acid single letter code
......@@ -74,6 +80,8 @@ public:
int i=Index(aa_one, aa_two);
if (i>=0 && i<ALPHABET_SIZE*ALPHABET_SIZE) {
weights_[i]=weight;
min_weight_ = std::min(min_weight_,weight);
max_weight_ = std::max(max_weight_,weight);
}
}
}
......@@ -88,6 +96,8 @@ private:
return (toupper(aa)>='A' && toupper(aa)<='Z');
}
WeightType weights_[ALPHABET_SIZE*ALPHABET_SIZE];
WeightType max_weight_;
WeightType min_weight_;
};
#if(OST_INFO_ENABLED)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment