From 23b813f4a001e9a3b7da9242c7558d9ad3da8db6 Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Tue, 1 Mar 2022 17:26:40 +0100 Subject: [PATCH] Encode confidence in ConSurf values Requires a container with latest ConSurf deps --- var3d/seq_anno.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/var3d/seq_anno.py b/var3d/seq_anno.py index 3c11bcc..a67e548 100644 --- a/var3d/seq_anno.py +++ b/var3d/seq_anno.py @@ -25,8 +25,11 @@ class ConsurfSeqAnno(SeqAnno): The code is not opensource but kindly provided by the Ben-Tal group and wrapped into a Singularity container by us. - The returned annotations correspond to the Consurf conservation scores in - range [1-9] + ConSurf conservation scores are in range [1-9]. However, the returned values + additionally encode confidence. A score is considered non-confident if less + than 6 non-gaped homologue sequences have been found at that location or if + the ConSurf internal confidence interval is >= 4. Non-confident scores are + multiplied by -1. So a non-confident score of 8 would be represented as -8. :param seq_db: Path to sequence db that can be read by Jackhmmer, i.e. a big fat fasta file. Typically uniref90 @@ -108,7 +111,10 @@ class ConsurfSeqAnno(SeqAnno): with open(result_json, "r") as fh: result = json.load(fh) - return self.MapAnno(sequence, result["COLOR"], seq_range) + color = result["COLOR"] + confidence = result["CONFIDENCE"] + scores = [a if b else -a for a,b in zip(color, confidence)] + return self.MapAnno(sequence, scores, seq_range) class EntropySeqAnno(SeqAnno): -- GitLab