From 2330eadd680796542198c910c0d223969dba99e2 Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Mon, 9 May 2022 22:33:02 +0200
Subject: [PATCH] introduce IDENTITY substitution matrix

Probably performs terrible on not super close homologues but the use case
here is to align very similar DNA/RNA sequences
---
 modules/seq/alg/pymod/mat.py               |  3 ++-
 modules/seq/alg/pymod/wrap_seq_alg.cc      |  1 +
 modules/seq/alg/src/subst_weight_matrix.cc | 12 ++++++++++++
 modules/seq/alg/src/subst_weight_matrix.hh |  3 ++-
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/modules/seq/alg/pymod/mat.py b/modules/seq/alg/pymod/mat.py
index f79bf8326..57fa10c17 100644
--- a/modules/seq/alg/pymod/mat.py
+++ b/modules/seq/alg/pymod/mat.py
@@ -9,5 +9,6 @@ BLOSUM45 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM45)
 BLOSUM62 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM62)
 BLOSUM80 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM80)
 BLOSUM100 = _InitMatrix(SubstWeightMatrix.Preset.BLOSUM100)
+IDENTITY = _InitMatrix(SubstWeightMatrix.Preset.IDENTITY)
 
-__all__=['BLOSUM45','BLOSUM62','BLOSUM80','BLOSUM100']
+__all__=['BLOSUM45','BLOSUM62','BLOSUM80','BLOSUM100', 'IDENTITY']
diff --git a/modules/seq/alg/pymod/wrap_seq_alg.cc b/modules/seq/alg/pymod/wrap_seq_alg.cc
index ebb3adb97..d0577fa5c 100644
--- a/modules/seq/alg/pymod/wrap_seq_alg.cc
+++ b/modules/seq/alg/pymod/wrap_seq_alg.cc
@@ -222,6 +222,7 @@ void export_contact_prediction()
     .value("BLOSUM62", SubstWeightMatrix::BLOSUM62)
     .value("BLOSUM80", SubstWeightMatrix::BLOSUM80)
     .value("BLOSUM100", SubstWeightMatrix::BLOSUM100)
+    .value("IDENTITY", SubstWeightMatrix::IDENTITY)
   ;
 }
 
diff --git a/modules/seq/alg/src/subst_weight_matrix.cc b/modules/seq/alg/src/subst_weight_matrix.cc
index f382e4f8e..454dbeddb 100644
--- a/modules/seq/alg/src/subst_weight_matrix.cc
+++ b/modules/seq/alg/src/subst_weight_matrix.cc
@@ -140,6 +140,14 @@ void FillData(ost::seq::alg::SubstWeightMatrix* subst, short (&data)[23][23]){
   }
 }
 
+void FillIdentity(ost::seq::alg::SubstWeightMatrix* subst) {
+  char chars[26] = {'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
+                    'P','Q','R','S','T','U','V','W','X','Y','Z'};
+  for(uint i = 0; i < 26; ++i) {
+    subst->SetWeight(chars[i], chars[i], 1.0);
+  }
+}
+
 }
 
 namespace ost { namespace seq { namespace alg {
@@ -167,6 +175,10 @@ void SubstWeightMatrix::AssignPreset(SubstWeightMatrix::Preset p)
       FillData(this,RAW_BLOSUM100_DATA);
       break;
     }
+    case IDENTITY:{
+      FillIdentity(this);
+      break;
+    }
   }
 }
 
diff --git a/modules/seq/alg/src/subst_weight_matrix.hh b/modules/seq/alg/src/subst_weight_matrix.hh
index 25f74dc1a..fe057be1b 100644
--- a/modules/seq/alg/src/subst_weight_matrix.hh
+++ b/modules/seq/alg/src/subst_weight_matrix.hh
@@ -43,7 +43,8 @@ public:
   enum Preset{BLOSUM45 = 0,
               BLOSUM62 = 1,
               BLOSUM80 = 2,
-              BLOSUM100 = 3};
+              BLOSUM100 = 3,
+              IDENTITY = 4};
   /// \brief Initialize substitution matrix with zero.
   /// 
   /// In order to get a useful  substitution weight matrix, use SetWeight(). 
-- 
GitLab