diff --git a/modules/seq/alg/pymod/wrap_seq_alg.cc b/modules/seq/alg/pymod/wrap_seq_alg.cc index 418defe259b025cb06b1535576fa4e4a9d585ff0..6597e459e522c23046ad636870a60a4fc966ce69 100644 --- a/modules/seq/alg/pymod/wrap_seq_alg.cc +++ b/modules/seq/alg/pymod/wrap_seq_alg.cc @@ -21,6 +21,7 @@ #include <ost/seq/alg/merge_pairwise_alignments.hh> #include <ost/seq/alg/sequence_identity.hh> +#include <ost/seq/alg/sequence_similarity.hh> #include <ost/seq/alg/ins_del.hh> #include <ost/seq/alg/conservation.hh> #include <ost/seq/alg/subst_weight_matrix.hh> @@ -39,11 +40,18 @@ BOOST_PYTHON_MODULE(_ost_seq_alg) .value("LONGER_SEQUENCE", RefMode::LONGER_SEQUENCE) .export_values() ; + def("SequenceIdentity", &SequenceIdentity, (arg("ref_mode")=RefMode::ALIGNMENT, arg("seq_a")=0, arg("seq_b")=1)); + + def("SequenceSimilarity", &SequenceSimilarity, + (arg("aln"),arg("subst_weight"),arg("normalize")=false, + arg("seq_a")=0,arg("seq_b")=1)); + class_<AlignedRegionList>("AlignedRegionList", init<>()) .def(vector_indexing_suite<AlignedRegionList>()) ; + class_<InsDel>("InsDel", init<const AlignmentHandle&, int, int>()) .def(init<const AlignmentHandle&>()) .def("GetDeletions", &InsDel::GetDeletions) @@ -54,6 +62,7 @@ BOOST_PYTHON_MODULE(_ost_seq_alg) .def("GetWeight", &SubstWeightMatrix::GetWeight) .def("SetWeight", &SubstWeightMatrix::SetWeight) ; + def("MergePairwiseAlignments", &MergePairwiseAlignments); def("Conservation", &Conservation, (arg("assign")=true, arg("prop_name")="cons", arg("ignore_gap")=false)); def("LocalAlign", &LocalAlign, (arg("seq1"), arg("seq2"),arg("subst_weight"), diff --git a/modules/seq/alg/src/CMakeLists.txt b/modules/seq/alg/src/CMakeLists.txt index 76549ab0d14034ecac80fce5e1528958c5306e45..2124aa283530a09d78ac9bf0f9fe1b436527f023 100644 --- a/modules/seq/alg/src/CMakeLists.txt +++ b/modules/seq/alg/src/CMakeLists.txt @@ -1,5 +1,6 @@ set(OST_SEQ_ALG_HEADERS sequence_identity.hh +sequence_similarity.hh module_config.hh ins_del.hh subst_weight_matrix.hh @@ -19,6 +20,7 @@ global_align.cc semiglobal_align.cc entropy.cc sequence_identity.cc +sequence_similarity.cc ins_del.cc conservation.cc ) diff --git a/modules/seq/alg/src/sequence_similarity.cc b/modules/seq/alg/src/sequence_similarity.cc new file mode 100644 index 0000000000000000000000000000000000000000..67f2e063710830f508375a3dff3fe4245f7664b7 --- /dev/null +++ b/modules/seq/alg/src/sequence_similarity.cc @@ -0,0 +1,57 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2015 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +#include <ost/seq/aligned_column_iterator.hh> +#include <ost/seq/alg/sequence_similarity.hh> + + +namespace ost { namespace seq { namespace alg { + +Real SequenceSimilarity(const AlignmentHandle& aln, SubstWeightMatrixPtr subst, + bool normalize, int seq_a, int seq_b){ + int non_gap_count=0; + int similarity_score = 0; + + const String& sa=aln.GetSequence(seq_a).GetString(); + const String& sb=aln.GetSequence(seq_b).GetString(); + + for (String::const_iterator + i=sa.begin(), e=sa.end(), j=sb.begin(); i!=e; ++i, ++j) { + if (!((*i)== '-' || (*j)=='-')) { + ++non_gap_count; + similarity_score += subst->GetWeight(*i,*j); + } + } + + Real seq_sim = subst->GetMinWeight(); + + if(non_gap_count > 0){ + seq_sim = static_cast<Real>(similarity_score)/non_gap_count; + } + + if(normalize){ + seq_sim -= subst->GetMinWeight(); + Real weight_range = subst->GetMaxWeight() - subst->GetMinWeight(); + if(weight_range > 0) seq_sim /= weight_range; + } + + return seq_sim; +} + +}}} diff --git a/modules/seq/alg/src/sequence_similarity.hh b/modules/seq/alg/src/sequence_similarity.hh new file mode 100644 index 0000000000000000000000000000000000000000..daaa09a7b54e819679b51113edc42c353e6ffe9a --- /dev/null +++ b/modules/seq/alg/src/sequence_similarity.hh @@ -0,0 +1,39 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2015 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_SEQ_SEQUENCE_SIMILARITY_HH +#define OST_SEQ_SEQUENCE_SIMILARITY_HH + +#include <ost/seq/alg/module_config.hh> +#include <ost/seq/alignment_handle.hh> +#include <ost/seq/alg/subst_weight_matrix.hh> + +namespace ost { namespace seq { namespace alg { + +/// \brief calculate sequence similarity for two sequences in an alignment +/// \param seq_a is the index of the first sequence +/// \param seq_b is the index of the second sequence +/// \param aln is the sequence alignment +/// \return sequence similarity +Real DLLEXPORT_OST_SEQ_ALG +SequenceSimilarity(const AlignmentHandle& aln, SubstWeightMatrixPtr subst, + bool normalize=false, int seq_a=0, int seq_b=1); + +}}} + +#endif diff --git a/modules/seq/alg/src/subst_weight_matrix.hh b/modules/seq/alg/src/subst_weight_matrix.hh index 97f0ef825cb45dda7e2572558b2917498e87a353..6cede33bd9b380c599aafb6c82f7f6582738ec16 100644 --- a/modules/seq/alg/src/subst_weight_matrix.hh +++ b/modules/seq/alg/src/subst_weight_matrix.hh @@ -47,7 +47,7 @@ public: /// /// In order to get a useful substitution weight matrix, use SetWeight(). /// Alternatively you may want to load the substitution from an info group. - SubstWeightMatrix() { + SubstWeightMatrix():max_weight_(0),min_weight_(0) { ::memset(weights_, 0, sizeof(WeightType)*ALPHABET_SIZE*ALPHABET_SIZE); } @@ -64,6 +64,12 @@ public: return (i>=0 && i<ALPHABET_SIZE*ALPHABET_SIZE) ? weights_[i] : 0; } + /// \brief Get the minimal substitution weight of the matrix + WeightType GetMinWeight() { return min_weight_; } + + /// \brief Get the maximal substitution weight of the matrix + WeightType GetMaxWeight() { return max_weight_; } + /// \brief Set the substitution weight between two amino acids /// /// The weight is only set if the amino acid single letter code @@ -74,6 +80,8 @@ public: int i=Index(aa_one, aa_two); if (i>=0 && i<ALPHABET_SIZE*ALPHABET_SIZE) { weights_[i]=weight; + min_weight_ = std::min(min_weight_,weight); + max_weight_ = std::max(max_weight_,weight); } } } @@ -88,6 +96,8 @@ private: return (toupper(aa)>='A' && toupper(aa)<='Z'); } WeightType weights_[ALPHABET_SIZE*ALPHABET_SIZE]; + WeightType max_weight_; + WeightType min_weight_; }; #if(OST_INFO_ENABLED)