From ed575e7cfd17cd2cf9aa70c1c863798e3d8fc6da Mon Sep 17 00:00:00 2001 From: Marco Biasini <mvbiasini@gmail.com> Date: Fri, 10 Aug 2012 13:43:54 +0200 Subject: [PATCH] Added percentiles function to table --- modules/base/pymod/table.py | 39 ++++++++++++++++++++++++++++++++ modules/base/tests/test_table.py | 9 +++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/modules/base/pymod/table.py b/modules/base/pymod/table.py index 77951c634..dae603fb6 100644 --- a/modules/base/pymod/table.py +++ b/modules/base/pymod/table.py @@ -1511,6 +1511,45 @@ class Table(object): self.AddCol(mean_col_name, 'f', mean_rows) + def Percentiles(self, col, nths): + """ + returns the percentiles of column *col* given in *nths*. + + The percentils are calculated as + + .. code-block:: python + + values[min(len(values), int(round(len(values)*p/100+0.5)-1))] + + where values are the sorted values of *col* not equal to none + :param: nths: list of percentiles to be calculated. Each percentil is a number + between 0 and 100. + + :raises: :class:`TypeError` if column type is ``string`` + :returns: List of percentils in the same order as given in *nths* + """ + idx = self.GetColIndex(col) + col_type = self.col_types[idx] + if col_type!='int' and col_type!='float' and col_type!='bool': + raise TypeError("Median can only be used on numeric column types") + + for nth in nths: + if nth < 0 or nth > 100: + raise ValueError("percentiles must be between 0 and 100") + vals=[] + for v in self[col]: + if v!=None: + vals.append(v) + vals=sorted(vals) + if len(vals)==0: + return [None]*len(nths) + percentiles=[] + + for nth in nths: + p=vals[min(len(vals)-1, int(round(len(vals)*nth/100.0+0.5)-1))] + percentiles.append(p) + return percentiles + def Median(self, col): """ Returns the median of the given column. Cells with None are ignored. Returns diff --git a/modules/base/tests/test_table.py b/modules/base/tests/test_table.py index 13afa0437..b4242473e 100644 --- a/modules/base/tests/test_table.py +++ b/modules/base/tests/test_table.py @@ -196,7 +196,14 @@ class TestTable(unittest.TestCase): self.assertEquals(type(z[0][1]),int) self.assertEquals(type(z[1][1]),int) self.assertRaises(ValueError, tab.Zip, 'col5', 'col3') - + def testPercentiles(self): + tab = Table(['nums'], 'i') + self.assertEqual(tab.Percentiles('nums', [0,100]), [None, None]) + self.assertRaises(ValueError, tab.Percentiles, 'nums', [101]) + self.assertRaises(ValueError, tab.Percentiles, 'nums', [-1]) + for i in (35,15,50,40,20): + tab.AddRow([i]) + self.assertEqual(tab.Percentiles('nums', [0,30,40,100]), [15,20,35,50]) def testTableInitEmpty(self): ''' empty table -- GitLab