From 2330eadd680796542198c910c0d223969dba99e2 Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Mon, 9 May 2022 22:33:02 +0200 Subject: [PATCH] introduce IDENTITY substitution matrix Probably performs terrible on not super close homologues but the use case here is to align very similar DNA/RNA sequences --- modules/seq/alg/pymod/mat.py | 3 ++- modules/seq/alg/pymod/wrap_seq_alg.cc | 1 + modules/seq/alg/src/subst_weight_matrix.cc | 12 ++++++++++++ modules/seq/alg/src/subst_weight_matrix.hh | 3 ++- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/modules/seq/alg/pymod/mat.py b/modules/seq/alg/pymod/mat.py index f79bf8326..57fa10c17 100644 --- a/modules/seq/alg/pymod/mat.py +++ b/modules/seq/alg/pymod/mat.py @@ -9,5 +9,6 @@ BLOSUM45 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM45) BLOSUM62 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM62) BLOSUM80 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM80) BLOSUM100 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM100) +IDENTITY = _InitMatrix(SubstWeightMatrix.Preset.IDENTITY) -__all__=['BLOSUM45','BLOSUM62','BLOSUM80','BLOSUM100'] +__all__=['BLOSUM45','BLOSUM62','BLOSUM80','BLOSUM100', 'IDENTITY'] diff --git a/modules/seq/alg/pymod/wrap_seq_alg.cc b/modules/seq/alg/pymod/wrap_seq_alg.cc index ebb3adb97..d0577fa5c 100644 --- a/modules/seq/alg/pymod/wrap_seq_alg.cc +++ b/modules/seq/alg/pymod/wrap_seq_alg.cc @@ -222,6 +222,7 @@ void export_contact_prediction() .value("BLOSUM62", SubstWeightMatrix::BLOSUM62) .value("BLOSUM80", SubstWeightMatrix::BLOSUM80) .value("BLOSUM100", SubstWeightMatrix::BLOSUM100) + .value("IDENTITY", SubstWeightMatrix::IDENTITY) ; } diff --git a/modules/seq/alg/src/subst_weight_matrix.cc b/modules/seq/alg/src/subst_weight_matrix.cc index f382e4f8e..454dbeddb 100644 --- a/modules/seq/alg/src/subst_weight_matrix.cc +++ b/modules/seq/alg/src/subst_weight_matrix.cc @@ -140,6 +140,14 @@ void FillData(ost::seq::alg::SubstWeightMatrix* subst, short (&data)[23][23]){ } } +void FillIdentity(ost::seq::alg::SubstWeightMatrix* subst) { + char chars[26] = {'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', + 'P','Q','R','S','T','U','V','W','X','Y','Z'}; + for(uint i = 0; i < 26; ++i) { + subst->SetWeight(chars[i], chars[i], 1.0); + } +} + } namespace ost { namespace seq { namespace alg { @@ -167,6 +175,10 @@ void SubstWeightMatrix::AssignPreset(SubstWeightMatrix::Preset p) FillData(this,RAW_BLOSUM100_DATA); break; } + case IDENTITY:{ + FillIdentity(this); + break; + } } } diff --git a/modules/seq/alg/src/subst_weight_matrix.hh b/modules/seq/alg/src/subst_weight_matrix.hh index 25f74dc1a..fe057be1b 100644 --- a/modules/seq/alg/src/subst_weight_matrix.hh +++ b/modules/seq/alg/src/subst_weight_matrix.hh @@ -43,7 +43,8 @@ public: enum Preset{BLOSUM45 = 0, BLOSUM62 = 1, BLOSUM80 = 2, - BLOSUM100 = 3}; + BLOSUM100 = 3, + IDENTITY = 4}; /// \brief Initialize substitution matrix with zero. /// /// In order to get a useful substitution weight matrix, use SetWeight(). -- GitLab