From 23b813f4a001e9a3b7da9242c7558d9ad3da8db6 Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Tue, 1 Mar 2022 17:26:40 +0100
Subject: [PATCH] Encode confidence in ConSurf values

Requires a container with latest ConSurf deps
---
 var3d/seq_anno.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/var3d/seq_anno.py b/var3d/seq_anno.py
index 3c11bcc..a67e548 100644
--- a/var3d/seq_anno.py
+++ b/var3d/seq_anno.py
@@ -25,8 +25,11 @@ class ConsurfSeqAnno(SeqAnno):
     The code is not opensource but kindly provided by the Ben-Tal group
     and wrapped into a Singularity container by us.
 
-    The returned annotations correspond to the Consurf conservation scores in
-    range [1-9]
+    ConSurf conservation scores are in range [1-9]. However, the returned values
+    additionally encode confidence. A score is considered non-confident if less
+    than 6 non-gaped homologue sequences have been found at that location or if
+    the ConSurf internal confidence interval is >= 4. Non-confident scores are
+    multiplied by -1. So a non-confident score of 8 would be represented as -8.
 
     :param seq_db: Path to sequence db that can be read by Jackhmmer, i.e. a big 
                    fat fasta file. Typically uniref90
@@ -108,7 +111,10 @@ class ConsurfSeqAnno(SeqAnno):
         with open(result_json, "r") as fh:
             result = json.load(fh)
 
-        return self.MapAnno(sequence, result["COLOR"], seq_range)
+        color = result["COLOR"]
+        confidence = result["CONFIDENCE"]
+        scores = [a if b else -a for a,b in zip(color, confidence)]
+        return self.MapAnno(sequence, scores, seq_range)
 
 
 class EntropySeqAnno(SeqAnno):
-- 
GitLab