Skip to content
Snippets Groups Projects
Commit 422bd6ea authored by Studer Gabriel's avatar Studer Gabriel
Browse files

sequence similarity algorithm

parent 3410aec8
Branches
Tags
No related merge requests found
......@@ -21,6 +21,7 @@
#include <ost/seq/alg/merge_pairwise_alignments.hh>
#include <ost/seq/alg/sequence_identity.hh>
#include <ost/seq/alg/sequence_similarity.hh>
#include <ost/seq/alg/ins_del.hh>
#include <ost/seq/alg/conservation.hh>
#include <ost/seq/alg/subst_weight_matrix.hh>
......@@ -39,11 +40,18 @@ BOOST_PYTHON_MODULE(_ost_seq_alg)
.value("LONGER_SEQUENCE", RefMode::LONGER_SEQUENCE)
.export_values()
;
def("SequenceIdentity", &SequenceIdentity,
(arg("ref_mode")=RefMode::ALIGNMENT, arg("seq_a")=0, arg("seq_b")=1));
def("SequenceSimilarity", &SequenceSimilarity,
(arg("aln"),arg("subst_weight"),arg("normalize")=false,
arg("seq_a")=0,arg("seq_b")=1));
class_<AlignedRegionList>("AlignedRegionList", init<>())
.def(vector_indexing_suite<AlignedRegionList>())
;
class_<InsDel>("InsDel", init<const AlignmentHandle&, int, int>())
.def(init<const AlignmentHandle&>())
.def("GetDeletions", &InsDel::GetDeletions)
......@@ -54,6 +62,7 @@ BOOST_PYTHON_MODULE(_ost_seq_alg)
.def("GetWeight", &SubstWeightMatrix::GetWeight)
.def("SetWeight", &SubstWeightMatrix::SetWeight)
;
def("MergePairwiseAlignments", &MergePairwiseAlignments);
def("Conservation", &Conservation, (arg("assign")=true, arg("prop_name")="cons", arg("ignore_gap")=false));
def("LocalAlign", &LocalAlign, (arg("seq1"), arg("seq2"),arg("subst_weight"),
......
set(OST_SEQ_ALG_HEADERS
sequence_identity.hh
sequence_similarity.hh
module_config.hh
ins_del.hh
subst_weight_matrix.hh
......@@ -19,6 +20,7 @@ global_align.cc
semiglobal_align.cc
entropy.cc
sequence_identity.cc
sequence_similarity.cc
ins_del.cc
conservation.cc
)
......
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2015 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#include <ost/seq/aligned_column_iterator.hh>
#include <ost/seq/alg/sequence_similarity.hh>
namespace ost { namespace seq { namespace alg {
Real SequenceSimilarity(const AlignmentHandle& aln, SubstWeightMatrixPtr subst,
bool normalize, int seq_a, int seq_b){
int non_gap_count=0;
int similarity_score = 0;
const String& sa=aln.GetSequence(seq_a).GetString();
const String& sb=aln.GetSequence(seq_b).GetString();
for (String::const_iterator
i=sa.begin(), e=sa.end(), j=sb.begin(); i!=e; ++i, ++j) {
if (!((*i)== '-' || (*j)=='-')) {
++non_gap_count;
similarity_score += subst->GetWeight(*i,*j);
}
}
Real seq_sim = subst->GetMinWeight();
if(non_gap_count > 0){
seq_sim = static_cast<Real>(similarity_score)/non_gap_count;
}
if(normalize){
seq_sim -= subst->GetMinWeight();
Real weight_range = subst->GetMaxWeight() - subst->GetMinWeight();
if(weight_range > 0) seq_sim /= weight_range;
}
return seq_sim;
}
}}}
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2015 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#ifndef OST_SEQ_SEQUENCE_SIMILARITY_HH
#define OST_SEQ_SEQUENCE_SIMILARITY_HH
#include <ost/seq/alg/module_config.hh>
#include <ost/seq/alignment_handle.hh>
#include <ost/seq/alg/subst_weight_matrix.hh>
namespace ost { namespace seq { namespace alg {
/// \brief calculate sequence similarity for two sequences in an alignment
/// \param seq_a is the index of the first sequence
/// \param seq_b is the index of the second sequence
/// \param aln is the sequence alignment
/// \return sequence similarity
Real DLLEXPORT_OST_SEQ_ALG
SequenceSimilarity(const AlignmentHandle& aln, SubstWeightMatrixPtr subst,
bool normalize=false, int seq_a=0, int seq_b=1);
}}}
#endif
......@@ -47,7 +47,7 @@ public:
///
/// In order to get a useful substitution weight matrix, use SetWeight().
/// Alternatively you may want to load the substitution from an info group.
SubstWeightMatrix() {
SubstWeightMatrix():max_weight_(0),min_weight_(0) {
::memset(weights_, 0, sizeof(WeightType)*ALPHABET_SIZE*ALPHABET_SIZE);
}
......@@ -64,6 +64,12 @@ public:
return (i>=0 && i<ALPHABET_SIZE*ALPHABET_SIZE) ? weights_[i] : 0;
}
/// \brief Get the minimal substitution weight of the matrix
WeightType GetMinWeight() { return min_weight_; }
/// \brief Get the maximal substitution weight of the matrix
WeightType GetMaxWeight() { return max_weight_; }
/// \brief Set the substitution weight between two amino acids
///
/// The weight is only set if the amino acid single letter code
......@@ -74,6 +80,8 @@ public:
int i=Index(aa_one, aa_two);
if (i>=0 && i<ALPHABET_SIZE*ALPHABET_SIZE) {
weights_[i]=weight;
min_weight_ = std::min(min_weight_,weight);
max_weight_ = std::max(max_weight_,weight);
}
}
}
......@@ -88,6 +96,8 @@ private:
return (toupper(aa)>='A' && toupper(aa)<='Z');
}
WeightType weights_[ALPHABET_SIZE*ALPHABET_SIZE];
WeightType max_weight_;
WeightType min_weight_;
};
#if(OST_INFO_ENABLED)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment