diff --git a/var3d/seq_anno.py b/var3d/seq_anno.py index 3c11bccb3fa8b7d5bc67fa5b155b448d940df578..a67e54885549780630de1aea4d08e8968abaca75 100644 --- a/var3d/seq_anno.py +++ b/var3d/seq_anno.py @@ -25,8 +25,11 @@ class ConsurfSeqAnno(SeqAnno): The code is not opensource but kindly provided by the Ben-Tal group and wrapped into a Singularity container by us. - The returned annotations correspond to the Consurf conservation scores in - range [1-9] + ConSurf conservation scores are in range [1-9]. However, the returned values + additionally encode confidence. A score is considered non-confident if less + than 6 non-gaped homologue sequences have been found at that location or if + the ConSurf internal confidence interval is >= 4. Non-confident scores are + multiplied by -1. So a non-confident score of 8 would be represented as -8. :param seq_db: Path to sequence db that can be read by Jackhmmer, i.e. a big fat fasta file. Typically uniref90 @@ -108,7 +111,10 @@ class ConsurfSeqAnno(SeqAnno): with open(result_json, "r") as fh: result = json.load(fh) - return self.MapAnno(sequence, result["COLOR"], seq_range) + color = result["COLOR"] + confidence = result["CONFIDENCE"] + scores = [a if b else -a for a,b in zip(color, confidence)] + return self.MapAnno(sequence, scores, seq_range) class EntropySeqAnno(SeqAnno):