diff --git a/modules/seq/alg/doc/seqalg.rst b/modules/seq/alg/doc/seqalg.rst index c0f9b885b87534ca78f74ee31635b7b1257757b2..31274af12bbbbe32bfb0ca17083f0acf9738b458 100644 --- a/modules/seq/alg/doc/seqalg.rst +++ b/modules/seq/alg/doc/seqalg.rst @@ -261,24 +261,59 @@ Algorithms for Alignments Substitution Weight Matrices and BLOSUM Matrices -------------------------------------------------------------------------------- -.. autoclass:: SubstWeightMatrix - :members: +.. class:: SubstWeightMatrix + + Substitution weights for alignment algorithms + + .. method:: GetWeight(olc_one, olc_two) + + Get :class:`int` weight for pair of characters + + :param olc_one: first character + :type olc_one: :class:`string` + :param olc_two: second character + :type olc_two: :class:`string` + + .. method:: SetWeight(olc_one, olc_two, weight) + + Set :class:`int` weight for pair of characters + + :param olc_one: first character + :type olc_one: :class:`string` + :param olc_two: second character + :type olc_two: :class:`string` + :param weight: the weight + :type weight: :class:`int` + + .. method:: GetMinWeight() + + Returns the minimal weight of the matrix + + .. method:: GetMaxWeight() + + Returns the maximum weight of the matrix + + .. method:: GetName() + + Getter for name (empty string if not set) + + .. method:: SetName(name) + + Setter for name + + :param name: Name to be set + :type name: :class:`str` .. _blosum: -Four preset BLOSUM (BLOcks SUbstitution Matrix) matrices are available at -different levels of sequence identity: +Four already preset BLOSUM (BLOcks SUbstitution Matrix) matrices are available +at different levels of sequence identity: - BLOSUM45 - BLOSUM62 - BLOSUM80 - BLOSUM100 -Two naive substitution matrices: - -- IDENTITY: Matches have score of 1, all other are 0 -- MATCH: Matches have score of 1, all other are -1 - Nucleotide substitution matrices: - NUC44: Nucleotide substitution matrix used in blastn that can deal with IUPAC @@ -286,6 +321,14 @@ Nucleotide substitution matrices: equivalence, i.e. you can just do `m.GetWeight('G', 'U')` instead of first translating 'U' to 'T'. +They can be directly accessed upon importing the sequence module: + +.. code-block:: python + + from ost import seq + mat = seq.alg.BLOSUM62 + print(mat.GetWeight('A', 'A')) + .. _contact-prediction: diff --git a/modules/seq/alg/pymod/mat.py b/modules/seq/alg/pymod/mat.py index f00ec2237076edc72287478f6057309a0dfcbe60..873d2b42ac58aa6ec2ab14b7e0b3fb38fef5facc 100644 --- a/modules/seq/alg/pymod/mat.py +++ b/modules/seq/alg/pymod/mat.py @@ -9,9 +9,6 @@ BLOSUM45 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM45) BLOSUM62 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM62) BLOSUM80 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM80) BLOSUM100 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM100) -IDENTITY = _InitMatrix(SubstWeightMatrix.Preset.IDENTITY) -MATCH = _InitMatrix(SubstWeightMatrix.Preset.MATCH) NUC44 = _InitMatrix(SubstWeightMatrix.Preset.NUC44) -__all__=['BLOSUM45','BLOSUM62','BLOSUM80','BLOSUM100', 'IDENTITY', 'MATCH', - 'NUC44'] +__all__=['BLOSUM45','BLOSUM62','BLOSUM80','BLOSUM100','NUC44'] diff --git a/modules/seq/alg/pymod/wrap_seq_alg.cc b/modules/seq/alg/pymod/wrap_seq_alg.cc index 508a8cc5a72064a8498ce0665286ab9d4ddc11e4..cc57693e8899ce11547a0cec8366133408670b29 100644 --- a/modules/seq/alg/pymod/wrap_seq_alg.cc +++ b/modules/seq/alg/pymod/wrap_seq_alg.cc @@ -215,6 +215,9 @@ void export_contact_prediction() .def("GetMinWeight", &SubstWeightMatrix::GetMinWeight) .def("GetMaxWeight", &SubstWeightMatrix::GetMaxWeight) .def("AssignPreset", &SubstWeightMatrix::AssignPreset) + .def("SetName", &SubstWeightMatrix::SetName) + .def("GetName", &SubstWeightMatrix::GetName, + return_value_policy<copy_const_reference>()) ; enum_<SubstWeightMatrix::Preset>("Preset") @@ -222,8 +225,6 @@ void export_contact_prediction() .value("BLOSUM62", SubstWeightMatrix::BLOSUM62) .value("BLOSUM80", SubstWeightMatrix::BLOSUM80) .value("BLOSUM100", SubstWeightMatrix::BLOSUM100) - .value("IDENTITY", SubstWeightMatrix::IDENTITY) - .value("MATCH", SubstWeightMatrix::MATCH) .value("NUC44", SubstWeightMatrix::NUC44) ; } diff --git a/modules/seq/alg/src/subst_weight_matrix.cc b/modules/seq/alg/src/subst_weight_matrix.cc index 6d22a0b9cbd65b8fd104055fefa6a81567511936..3c7ee7225b33c99d54925ce94fa3685ed082c89d 100644 --- a/modules/seq/alg/src/subst_weight_matrix.cc +++ b/modules/seq/alg/src/subst_weight_matrix.cc @@ -170,28 +170,6 @@ void FillNucData(ost::seq::alg::SubstWeightMatrix* subst, } } -void FillIdentity(ost::seq::alg::SubstWeightMatrix* subst) { - char chars[26] = {'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', - 'P','Q','R','S','T','U','V','W','X','Y','Z'}; - for(uint i = 0; i < 26; ++i) { - subst->SetWeight(chars[i], chars[i], 1.0); - } -} - -void FillMatch(ost::seq::alg::SubstWeightMatrix* subst) { - char chars[26] = {'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', - 'P','Q','R','S','T','U','V','W','X','Y','Z'}; - for(uint i = 0; i < 26; ++i) { - for(uint j = 0; j < 26; ++j) { - if(i == j){ - subst->SetWeight(chars[i], chars[j], 1.0); - } else { - subst->SetWeight(chars[i], chars[j], -1.0); - } - } - } -} - } namespace ost { namespace seq { namespace alg { @@ -205,34 +183,30 @@ void SubstWeightMatrix::AssignPreset(SubstWeightMatrix::Preset p) switch(p){ case BLOSUM45:{ FillData(this,RAW_BLOSUM45_DATA); + this->SetName("blosum45"); break; } case BLOSUM62:{ FillData(this,RAW_BLOSUM62_DATA); + this->SetName("blosum62"); break; } case BLOSUM80:{ FillData(this,RAW_BLOSUM80_DATA); + this->SetName("blosum80"); break; } case BLOSUM100:{ FillData(this,RAW_BLOSUM100_DATA); - break; - } - case IDENTITY:{ - FillIdentity(this); - break; - } - case MATCH:{ - FillMatch(this); + this->SetName("blosum100"); break; } case NUC44:{ FillNucData(this,RAW_NUC44_DATA); + this->SetName("nuc44"); break; } } } - }}} diff --git a/modules/seq/alg/src/subst_weight_matrix.hh b/modules/seq/alg/src/subst_weight_matrix.hh index 50636399e659fd037971f2a48d89f8c09453652f..a0e1e5862f4bd5cb748e935e647a5468424aa098 100644 --- a/modules/seq/alg/src/subst_weight_matrix.hh +++ b/modules/seq/alg/src/subst_weight_matrix.hh @@ -22,6 +22,7 @@ #include <ctype.h> #include <string.h> #include <boost/shared_ptr.hpp> +#include <ost/base.hh> #include <ost/config.hh> #include <ost/seq/alg/module_config.hh> @@ -44,9 +45,7 @@ public: BLOSUM62 = 1, BLOSUM80 = 2, BLOSUM100 = 3, - IDENTITY = 4, - MATCH = 5, - NUC44 = 6}; + NUC44 = 4}; /// \brief Initialize substitution matrix with zero. /// /// In order to get a useful substitution weight matrix, use SetWeight(). @@ -92,6 +91,10 @@ public: } } + void SetName(const String& name) { name_ = name; } + + const String& GetName() { return name_; } + private: int Index(char aa_one, char aa_two) const { return (toupper(aa_one)-'A')*ALPHABET_SIZE+(toupper(aa_two)-'A'); @@ -104,6 +107,7 @@ private: WeightType weights_[ALPHABET_SIZE*ALPHABET_SIZE]; WeightType max_weight_; WeightType min_weight_; + String name_; }; }}}