Skip to content
Snippets Groups Projects
Commit 3017ccdc authored by Tobias Schmidt's avatar Tobias Schmidt Committed by Valerio Mariani
Browse files

Table class: add function to compute matthews correlation coefficient

parent d50583e8
Branches
Tags
No related merge requests found
......@@ -1613,6 +1613,74 @@ class Table(object):
except ImportError:
LogError("Function needs matplotlib, but I could not import it.")
raise
def ComputeMCC(self, score_col, class_col, score_dir='-',
class_dir='-', score_cutoff=2.0, class_cutoff=2.0):
'''
Compute Matthews correlation coefficient (MCC) for one column (*score_col*)
with the points classified into true positives, false positives, true
negatives and false negatives according to a specified classification
column (*class_col*).
The datapoints in *score_col* and *class_col* are classified into
positive and negative points. This can be done in two ways:
- by using 'bool' columns which contains True for positives and False
for negatives
- by using 'float' or 'int' columns and specifying a cutoff value and the
columns direction. This will generate the classification on the fly
* if *class_dir*/*score_dir*=='-': values in the classification column
that are less than or equal to
*class_cutoff*/*score_cutoff* will be
counted as positives
* if *class_dir*/*score_dir*=='+': values in the classification column
that are larger than or equal to
*class_cutoff*/*score_cutoff* will be
counted as positives
The two possibilities can be used together, i.e. 'bool' type for one column
and 'float'/'int' type and cutoff/direction for the other column.
'''
ALLOWED_DIR = ['+','-']
score_idx = self.GetColIndex(score_col)
score_type = self.col_types[score_idx]
if score_type!='int' and score_type!='float' and score_type!='bool':
raise TypeError("Score column must be numeric or bool type")
class_idx = self.GetColIndex(class_col)
class_type = self.col_types[class_idx]
if class_type!='int' and class_type!='float' and class_type!='bool':
raise TypeError("Classifier column must be numeric or bool type")
if (score_dir not in ALLOWED_DIR) or (class_dir not in ALLOWED_DIR):
raise ValueError("Direction must be one of %s"%str(ALLOWED_DIR))
tp = 0
fp = 0
fn = 0
tn = 0
for i,row in enumerate(self.rows):
class_val = row[class_idx]
score_val = row[score_idx]
if class_val!=None:
if (class_type=='bool' and class_val==True) or (class_type!='bool' and ((class_dir=='-' and class_val<=class_cutoff) or (class_dir=='+' and class_val>=class_cutoff))):
if (score_type=='bool' and score_val==True) or (score_type!='bool' and ((score_dir=='-' and score_val<=score_cutoff) or (score_dir=='+' and score_val>=score_cutoff))):
tp += 1
else:
fn += 1
else:
if (score_type=='bool' and score_val==False) or (score_type!='bool' and ((score_dir=='-' and score_val>score_cutoff) or (score_dir=='+' and score_val<score_cutoff))):
tn += 1
else:
fp += 1
mcc = ((tp*tn)-(fp*fn)) / math.sqrt((tp+fn)*(tp+fp)*(tn+fn)*(tn+fp))
return mcc
def IsEmpty(self, col_name=None, ignore_nan=True):
'''
......
......@@ -1142,6 +1142,34 @@ class TestTable(unittest.TestCase):
auc = tab.ComputeROCAUC(score_col='score', score_dir='+', class_col='classific')
self.assertAlmostEquals(auc, auc_ref)
def testCalcMCC(self):
tab = Table(['score', 'rmsd', 'class_rmsd', 'class_score'], 'ffbb',
score= [2.64, 1.11, 2.17, 0.45,0.15,0.85, 1.13, 2.90, 0.50, 1.03, 1.46, 2.83, 1.15, 2.04, 0.67, 1.27, 2.22, 1.90, 0.68, 0.36,1.04, 2.46, 0.91,0.60],
rmsd=[9.58,1.61,7.48,0.29,1.68,3.52,3.34,8.17,4.31,2.85,6.28,8.78,0.41,6.29,4.89,7.30,4.26,3.51,3.38,0.04,2.21,0.24,7.58,8.40],
class_rmsd= [False,True, False,True,True,False,False,False,False,False,False,False,True, False,False,False,False,False,False,True,False,True,False,False],
class_score=[False,False,False,True,True,True, False,False,True, False,False,False,False,False,True, False,False,False,True, True,False,False,True,True])
mcc = tab.ComputeMCC(score_col='score', score_dir='-', class_col='rmsd', class_dir='-', score_cutoff=1.0, class_cutoff=2.0)
self.assertAlmostEquals(mcc, 0.1490711984)
mcc = tab.ComputeMCC(score_col='class_score', class_col='class_rmsd')
self.assertAlmostEquals(mcc, 0.1490711984)
mcc = tab.ComputeMCC(score_col='score', score_dir='+', class_col='rmsd', class_dir='+', score_cutoff=1.0, class_cutoff=2.0)
self.assertAlmostEquals(mcc, 0.1490711984)
mcc = tab.ComputeMCC(score_col='score', score_dir='-', class_col='rmsd', class_dir='+', score_cutoff=1.0, class_cutoff=2.0)
self.assertAlmostEquals(mcc, -0.1490711984)
mcc = tab.ComputeMCC(score_col='score', score_dir='+', class_col='rmsd', class_dir='-', score_cutoff=1.0, class_cutoff=2.0)
self.assertAlmostEquals(mcc, -0.1490711984)
def testCalcMCCPreclassified(self):
tab = Table(['reference', 'prediction1', 'prediction2'],'bbb',
reference= [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, True, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, True, False, False, True, False, False, False, False, False, False, False, False],
prediction1=[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, True, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, True, False, True, False, False, False, False, False, False],
prediction2=[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, True, False, False, True, False, True, True, False, False, False, False, False, False, False, False, False, False, True, False, False, True, False, False, False, True, False, False, False, False])
mcc = tab.ComputeMCC(score_col='prediction1', class_col='reference')
self.assertAlmostEquals(mcc, 0.538389277)
mcc = tab.ComputeMCC(score_col='prediction2', class_col='reference')
self.assertAlmostEquals(mcc, 0.882089673321)
def testTableAsNumpyMatrix(self):
'''
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment