diff --git a/modules/base/pymod/table.py b/modules/base/pymod/table.py index e5de1d5bf64f503ef0ae11110a9781636f457d77..04fefecc05bd98600ee91a9bba143ba45cbe93d9 100644 --- a/modules/base/pymod/table.py +++ b/modules/base/pymod/table.py @@ -2721,6 +2721,63 @@ Statistics for column %(col)s except ImportError: LogError("Function needs numpy, but I could not import it.") raise + + def ComputeLogROCAUC(self, score_col, class_col, score_dir='-', + class_dir='-', class_cutoff=2.0): + ''' + Computes the area under the curve of the log receiver operating + characteristics (logROC) where the x-axis is semilogarithmic + using the trapezoidal rule. + + The logROC is computed with a lambda of 0.001 according to + Rapid Context-Dependent Ligand Desolvation in Molecular Docking + Mysinger M. and Shoichet B., Journal of Chemical Information and Modeling + 2010 50 (9), 1561-1573 + + For more information about parameters of the ROC, see + :meth:`ComputeROC`. + + :warning: The function depends on *numpy* + ''' + try: + import numpy as np + + roc = self.ComputeROC(score_col, class_col, score_dir, + class_dir, class_cutoff) + + if not roc: + return None + + rocxt, rocyt = roc + rocx=[] + rocy=[] + + # define lambda + l=0.001 + + # remove all duplicate x-values + rocxt = [x if x>0 else l for x in rocxt] + for i in range(len(rocxt)-1): + if rocxt[i]==rocxt[i+1]: + continue + rocx.append(rocxt[i]) + rocy.append(rocyt[i]) + rocx.append(1.0) + rocy.append(1.0) + + # compute logauc + value = 0 + for i in range(len(rocx)-1): + x = rocx[i] + if rocx[i]==rocx[i+1]: + continue + b = rocy[i+1]-rocx[i+1]*((rocy[i+1]-rocy[i])/(rocx[i+1]-rocx[i])) + value += ((rocy[i+1]-rocy[i])/math.log(10))+b*(math.log10(rocx[i+1])-math.log10(rocx[i])) + return value/math.log10(1.0/l) + + except ImportError: + LogError("Function needs numpy, but I could not import it.") + raise def PlotROC(self, score_col, class_col, score_dir='-', class_dir='-', class_cutoff=2.0, @@ -2772,6 +2829,63 @@ Statistics for column %(col)s LogError("Function needs matplotlib, but I could not import it.") raise + def PlotLogROC(self, score_col, class_col, score_dir='-', + class_dir='-', class_cutoff=2.0, + style='-', title=None, x_title=None, y_title=None, + clear=True, save=None): + ''' + Plot an logROC curve where the x-axis is semilogarithmic using matplotlib + + For more information about parameters of the ROC, see + :meth:`ComputeROC`, and for plotting see :meth:`Plot`. + + :warning: The function depends on *matplotlib* + ''' + + try: + import matplotlib.pyplot as plt + + roc = self.ComputeROC(score_col, class_col, score_dir, + class_dir, class_cutoff) + + if not roc: + return None + + rocx, rocy = roc + + if not title: + title = 'logROC of %s'%score_col + + if not x_title: + x_title = 'false positive rate' + + if not y_title: + y_title = 'true positive rate' + + if clear: + plt.clf() + + rocx = [x if x>0 else 0.001 for x in rocx] + + + plt.plot(rocx, rocy, style) + + plt.title(title, size='x-large', fontweight='bold') + plt.ylabel(y_title, size='x-large') + plt.xlabel(x_title, size='x-large') + + plt.xscale('log', basex=10) + plt.xlim(0.001, 1.0) + + + if save: + plt.savefig(save) + + return plt + except ImportError: + LogError("Function needs matplotlib, but I could not import it.") + raise + def ComputeMCC(self, score_col, class_col, score_dir='-', class_dir='-', score_cutoff=2.0, class_cutoff=2.0): ''' diff --git a/modules/base/tests/test_table.py b/modules/base/tests/test_table.py index 818fcc16b82d7ff7683df5f31891e83f274e246a..e18e3092f3c149781b430d1a82196b6c7774bfcf 100644 --- a/modules/base/tests/test_table.py +++ b/modules/base/tests/test_table.py @@ -32,7 +32,7 @@ try: except ImportError: HAS_SCIPY_NDIMG=False print "Could not find scipy.ndimage: ignoring some table class unit tests" - + try: import matplotlib @@ -1286,6 +1286,28 @@ class TestTable(unittest.TestCase): save=os.path.join("testfiles","roc-out.png")) self.assertEquals(pl, None) + def testPlotLogROC(self): + if not HAS_MPL or not HAS_PIL: + return + tab = Table(['classific', 'score'], 'bf', + classific=[True, True, False, True, True, True, False, False, True, False, True, False, True, False, False, False, True, False, True, False], + score=[0.9, 0.8, 0.7, 0.6, 0.55, 0.54, 0.53, 0.52, 0.51, 0.505, 0.4, 0.39, 0.38, 0.37, 0.36, 0.35, 0.34, 0.33, 0.30, 0.1]) + pl = tab.PlotLogROC(score_col='score', score_dir='+', + class_col='classific', + save=os.path.join("testfiles","logroc-out.png")) + img1 = Image.open(os.path.join("testfiles","logroc-out.png")) + #img2 = Image.open(os.path.join("testfiles","roc.png")) + #self.CompareImages(img1, img2) + + # no true positives + tab = Table(['classific', 'score'], 'bf', + classific=[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], + score=[0.9, 0.8, 0.7, 0.6, 0.55, 0.54, 0.53, 0.52, 0.51, 0.505, 0.4, 0.39, 0.38, 0.37, 0.36, 0.35, 0.34, 0.33, 0.30, 0.1]) + pl = tab.PlotLogROC(score_col='score', score_dir='+', + class_col='classific', + save=os.path.join("testfiles","logroc-out.png")) + self.assertEquals(pl, None) + def testPlotROCSameValues(self): if not HAS_MPL or not HAS_PIL: return @@ -1334,6 +1356,40 @@ class TestTable(unittest.TestCase): score=[0.9, 0.8, 0.7, 0.6, 0.55, 0.54, 0.53, 0.52, 0.51, 0.505, 0.4, 0.39, 0.38, 0.37, 0.36, 0.35, 0.34, 0.33, 0.30, 0.1]) auc = tab.ComputeROCAUC(score_col='score', score_dir='+', class_col='classific') self.assertEquals(auc, None) + + def testLogROCAUCforPerfectCurve(self): + if not HAS_NUMPY: + return + auc_ref = 1.0 + tab = Table(['classific', 'score'], 'bf', + classific=[True, True, True, True, True, True, False, False, False, False, False, False], + score=[0.9, 0.8, 0.7, 0.6, 0.55, 0.54, 0.4, 0.39, 0.38, 0.37, 0.36, 0.35]) + + # test logAUC + auc = tab.ComputeLogROCAUC(score_col='score', score_dir='+', class_col='classific') + self.assertAlmostEquals(auc, auc_ref) + + # test linear AUC + auc = tab.ComputeROCAUC(score_col='score', score_dir='+', class_col='classific') + self.assertAlmostEquals(auc, auc_ref) + + def testCalcLogROCAUCRandomCurve(self): + if not HAS_NUMPY: + return + tab = Table(['classific', 'score'], 'bf', + classific=[True, False, True, False, True, False, True, False, True, False, True, False], + score=[0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0.4, 0.4, 0.3, 0.3, 0.3, 0.3]) + + # test logAUC + auc_ref = 0.1440197405305 + auc = tab.ComputeLogROCAUC(score_col='score', score_dir='+', class_col='classific') + self.assertAlmostEquals(auc, auc_ref) + + # test linear AUC + auc_ref = 0.5 + auc = tab.ComputeROCAUC(score_col='score', score_dir='+', class_col='classific') + self.assertAlmostEquals(auc, auc_ref) + def testCalcROCAUCWithCutoff(self): if not HAS_NUMPY: