diff --git a/modules/base/pymod/CMakeLists.txt b/modules/base/pymod/CMakeLists.txt index 9e9a429777335a1a5364a9359263ee0337858632..6069953a67e435873141a403fda80233405863a6 100644 --- a/modules/base/pymod/CMakeLists.txt +++ b/modules/base/pymod/CMakeLists.txt @@ -7,7 +7,7 @@ set(OST_BASE_PYMOD_SOURCES if (NOT ENABLE_STATIC) pymod(NAME base OUTPUT_DIR ost CPP ${OST_BASE_PYMOD_SOURCES} - PY settings.py stutil.py table.py xmlrunner.py testutils.py) + PY settings.py stutil.py table.py table_selector.py xmlrunner.py testutils.py) endif() if(WIN32) diff --git a/modules/base/pymod/table.py b/modules/base/pymod/table.py index 4e88a8bdd90aa8d7c24633489c802ef8e4af689a..82f65d43d95f21bce01c1d0457637704a2a7152b 100644 --- a/modules/base/pymod/table.py +++ b/modules/base/pymod/table.py @@ -820,378 +820,6 @@ Statistics for column %(col)s return filt_tab - def _EvaluateEqualNone(self, lhs, rhs): - return (lhs==None or lhs!=lhs) == (rhs==None or rhs!=rhs) - - def _EvaluateNonEqualNone(self, lhs, rhs): - return (lhs==None or lhs!=lhs) != (rhs==None or rhs!=rhs) - - def _EvaluateAnd(self, lhs, rhs): - return lhs and rhs - - def _EvaluateOr(self, lhs, rhs): - return lhs or rhs - - def _EvaluateEqual(self, lhs, rhs): - return lhs==rhs - - def _EvaluateNonEqual(self, lhs, rhs): - return lhs!=rhs - - def _EvaluateLower(self, lhs, rhs): - return lhs<rhs - - def _EvaluateGreater(self, lhs, rhs): - return lhs>rhs - - def _EvaluateLowerEqual(self, lhs, rhs): - return lhs<=rhs - - def _EvaluateGreaterEqual(self, lhs, rhs): - return lhs>=rhs - - def _EvaluateAdd(self, lhs, rhs): - if lhs==None or lhs!=lhs or rhs==None or rhs!=rhs: - return None - return lhs+rhs - - def _EvaluateSubtract(self, lhs, rhs): - if lhs==None or lhs!=lhs or rhs==None or rhs!=rhs: - return None - return lhs-rhs - - def _EvaluateMultiply(self, lhs, rhs): - if lhs==None or lhs!=lhs or rhs==None or rhs!=rhs: - return None - return lhs*rhs - - def _EvaluateDivide(self, lhs, rhs): - if lhs==None or lhs!=lhs or rhs==None or rhs!=rhs: - return None - return lhs/rhs - - - def _EvaluateOperator(self, op, lhs, rhs): - - if op=='+': - return self._EvaluateAdd(lhs, rhs) - elif op=='-': - return self._EvaluateSubtract(lhs, rhs) - elif op=='/': - return self._EvaluateDivide(lhs, rhs) - elif op=='*': - return self._EvaluateMultiply(lhs, rhs) - elif lhs==None or lhs!=lhs or rhs==None or rhs!=rhs: - if op=='=': - return self._EvaluateEqualNone(lhs,rhs) - elif op=='!=': - return self._EvaluateNonEqualNone(lhs,rhs) - return None - elif op=='and': - return self._EvaluateAnd(lhs, rhs) - elif op=='or': - return self._EvaluateOr(lhs, rhs) - elif op=='=': - return self._EvaluateEqual(lhs, rhs) - elif op=='!=': - return self._EvaluateNonEqual(lhs, rhs) - elif op=='<': - return self._EvaluateLower(lhs, rhs) - elif op=='>': - return self._EvaluateGreater(lhs, rhs) - elif op=='<=': - return self._EvaluateLowerEqual(lhs, rhs) - elif op=='>=': - return self._EvaluateGreaterEqual(lhs, rhs) - - else: - raise ValueError('Unknown operator: '+op) - - def _EvaluateRPN(self, RPNExp, valid_operators): - #Evaluates the reverse polish notation - stack=list() - while True: - if len(RPNExp)==0: - break - exp=RPNExp.pop(0) - if exp in valid_operators: - if len(stack)<2: - raise ValueError('Cannot evaluate operator on less than two operands!') - rhs=stack.pop() - lhs=stack.pop() - result=self._EvaluateOperator(exp, lhs, rhs) - if result==None: - return False - stack.append(result) - else: - stack.append(exp) - if len(stack)>1: - raise ValueError('Too many operands for given operators!') - return stack.pop() - - def _ShuntingYard(self, split_expression, valid_operators, precedence): - #Creates the so called reverse polish notation out of the expression parser output. - #note, that there won't be parenthesis anymore and potential parenthesis - #mismatches get recognized. - #The shunting yard algorithm from dijkstra gets used. - - output_stack=list() - operator_stack=list() - - while True: - if len(split_expression)==0: - while True: - if len(operator_stack)==0: - break - if operator_stack[-1] in ['(',')']: - raise ValueError('Parenthesis mismatch!') - output_stack.append(operator_stack.pop()) - break - - exp=split_expression.pop(0) - - if exp == '(': - operator_stack.append('(') - continue - - if exp in valid_operators: - prec=precedence[exp] - while len(operator_stack)>0: - if operator_stack[-1]=='(': - break - elif prec>=precedence[operator_stack[-1]]: - output_stack.append(operator_stack.pop()) - else: - break - operator_stack.append(exp) - continue - - if exp == ')': - while True: - if len(operator_stack)==0: - raise ValueError('Parenthesis mismatch!') - if operator_stack[-1]=='(': - operator_stack.pop() - break - output_stack.append(operator_stack.pop()) - continue - - output_stack.append(exp) - - return output_stack - - def _EvaluateOperand(self, operand): - - import re - - float_expression=re.compile('[-+]?[0-9]*\.[0-9]+(?:[eE][-+]?[0-9]+)?$') - int_expression=re.compile('[-+]?[0-9]+(?:[eE][-+]?[0-9]+)?$') - bool_expression=re.compile('true$|True$|false$|False$') - none_expression=re.compile('None$|none$|nan$|NAN$|NaN$') - - if re.match(float_expression,operand): - return float(operand) - elif re.match(int_expression, operand): - return int(operand) - elif re.match(bool_expression,operand): - if operand == 'false' or operand == 'False': - return False - return True - elif re.match(none_expression,operand): - return None - return operand - - #If nothing above matches, operand must be a string, full string - #gets returned. - - - def _LexerHelper(self, operand): - if len(operand.strip())>0: - if ' ' in operand.strip(): - raise ValueError('Cannot Evaluate %s'%(operand)) - return [operand.strip()] - return [] - - - - def _ExpressionLexer(self, expression, valid_operators, precedence): - - #Reads token after token and searches for brackets and valid_operators - #everything, that doesn't match the above is assumed to be an operand - #and is cast into the most likely type based on regular expression - #Note, that there is no check, wether the operands can be processed by - #their corresponding operators (with respect to types)! - - split_expression=list() - actual_position=0 - eaten_stuff='' - - while True: - - if actual_position>=len(expression): - if len(eaten_stuff.strip())>0: - op=eaten_stuff.strip() - if ' ' in op: - raise ValueError('cannot evaluate %s'%(op)) - split_expression.append(op) - - #check for problematic cases like 'a<=b<=c'. We don't know which operator to evaluate first - for i in range(len(split_expression)-3): - if (split_expression[i] in valid_operators) and (split_expression[i+2] in valid_operators): - if precedence[split_expression[i]]==precedence[split_expression[i+2]]: - raise ValueError('Cannot Evaluate '+' '.join(split_expression[i:i+3])+' since both operators have same precedence!') - - - #handle , operator - #replaces an expression like 'rnum=1,2,3' with '(rnum=1 or rnum=2 or rnum=3)' - - temp_split_expression=list() - skips=0 - - for i in range(len(split_expression)): - if skips>0: - skips-=1 - continue - if ',' in split_expression[i]: - - if split_expression[max(0,i-1)] != '=' and split_expression[min(i+1,len(split_expression)-1)] != '=': - raise ValueError('Can evaluate \',\' sign only in combination with \'=\'') - - single_operands=split_expression[i].split(',') - - if split_expression[max(0,i-1)]=='=': - if i-2<0: - raise ValueError('Does it really make sense to start with an \'=\'') - main_operand=split_expression[i-2] - temp_split_expression.pop() - temp_split_expression.pop() - skips=0 - - else: - if i+2>len(split_expression)-1: - raise ValueError('Does it really make sense to end with an \'=\'') - main_operand=split_expression[i+2] - skips=2 - - temp_expression=list(['(']) - temp_expression+=' or '.join(['%s = %s'% (a,b) for (a,b) in zip(len(single_operands)*[main_operand],single_operands)]).split() - temp_expression.append(')') - temp_split_expression+=temp_expression - continue - - temp_split_expression.append(split_expression[i]) - - split_expression=temp_split_expression - - #handle ':' operator - #replaces an expression like 'col_a=x:y' with '(col_a>=x and col_a<=y)' - - temp_split_expression=list() - skips=0 - - for i in range(len(split_expression)): - if skips>0: - skips-=1 - continue - if ':' in split_expression[i]: - if split_expression[max(0,i-1)] != '=' and split_expression[min(i+1,len(split_expression)-1)] != '=': - raise ValueError('Can evaluate \':\' sign only in combination with \'=\'') - if len(split_expression[i].split(':')) != 2: - raise ValueError('Can operate \':\' operator only on 2 operands') - - #even though we are still in the lexer, its necessary to evaluate the next - #expressions... They will be written back into the splitexpression as string again - lhs=self._EvaluateOperand(split_expression[i].split(':')[0]) - rhs=self._EvaluateOperand(split_expression[i].split(':')[1]) - - template_expression=['(','','<=','','and','','<=','',')'] - - if split_expression[max(0,i-1)] == '=': - if i-2<0: - raise ValueError('Does it really make sense to start with an \'=\'?') - temp_split_expression.pop() - temp_split_expression.pop() - template_expression[3]=split_expression[i-2] - template_expression[5]=split_expression[i-2] - skips=0 - - else: - if i+2>len(split_expression)-1: - raise ValueError('Does it really make sense to end with an \'=\'?') - template_expression[3]=split_expression[i+2] - template_expression[5]=split_expression[i+2] - skips=2 - - template_expression[1]=str(min(lhs,rhs)) - template_expression[7]=str(max(lhs,rhs)) - temp_split_expression+=template_expression - continue - - temp_split_expression.append(split_expression[i]) - - split_expression=temp_split_expression - - return split_expression - - token=expression[actual_position] - - if token.isspace(): - split_expression+=self._LexerHelper(eaten_stuff) - actual_position+=1 - eaten_stuff='' - continue - - if token in ['(','[','{']: - split_expression+=self._LexerHelper(eaten_stuff) - split_expression.append('(') - actual_position+=1 - eaten_stuff='' - continue - - if token in [')',']','}']: - split_expression+=self._LexerHelper(eaten_stuff) - split_expression.append(')') - actual_position+=1 - eaten_stuff='' - continue - - if token in ['+','-','*','/','=']: - split_expression+=self._LexerHelper(eaten_stuff) - split_expression.append(token) - actual_position+=1 - eaten_stuff='' - continue - - if token == '!': - if actual_position+1==len(expression): - raise ValueError('Cannot evaluate \'!\'') - if expression[actual_position+1]== '=': - split_expression+=self._LexerHelper(eaten_stuff) - split_expression.append('!=') - actual_position+=2 - eaten_stuff='' - continue - else: - raise ValueError('Cannot evaluate single \'!\'') - - if token in ['<','>']: - if actual_position+1<len(expression): - if expression[actual_position+1]=='=': - split_expression+=self._LexerHelper(eaten_stuff) - split_expression.append(token+'=') - actual_position+=2 - eaten_stuff='' - continue - split_expression+=self._LexerHelper(eaten_stuff) - split_expression.append(token) - actual_position+=1 - eaten_stuff='' - continue - - eaten_stuff+=token - actual_position+=1 - - def Select(self, query): """ @@ -1227,36 +855,19 @@ Statistics for column %(col)s """ - valid_operators=['and','or','!=','<=','>=','=','<','>','+','-','*','/'] - - #http://en.wikipedia.org/wiki/Order_of_operations - - precedence={'or':6 , 'and':5 , '!=':4 , '=':4 , '<=':3 , - '>=':3 , '<':3 , '>':3 , '+':2 , '-':2 , '*':1 , '/':1} - - split_expression=self._ExpressionLexer(query, valid_operators, precedence) - rpn_expression=self._ShuntingYard(list(split_expression), valid_operators, precedence) - - tab_indices=list() - exp_indices=list() + import traceback + try: + from table_selector import * + except: + traceback.print_exc() + raise ImportError("Tried to import the file table_selector.py, but could not find it!") - #extract indices for tab values and cast other operands in their most likely type based on - #regular expressions - for i, exp in enumerate(rpn_expression): - if exp in self.col_names: - tab_indices.append(self.GetColIndex(exp)) - exp_indices.append(i) - continue - elif exp in valid_operators or exp in ['(',')']: - continue - rpn_expression[i] = self._EvaluateOperand(exp) + selector=TableSelector(self.col_types, self.col_names, query) selected_tab=Table(list(self.col_names), list(self.col_types)) for row in self.rows: - for ti, ei in zip(tab_indices, exp_indices): - rpn_expression[ei] = row[ti] - if self._EvaluateRPN(list(rpn_expression), valid_operators): + if selector.EvaluateRow(row): selected_tab.AddRow(row) return selected_tab @@ -1990,7 +1601,7 @@ Statistics for column %(col)s ax.legend(legend_data, cols) else: ax.legend(legend_data, legend) - + #pass if save: plt.savefig(save) diff --git a/modules/base/pymod/table_selector.py b/modules/base/pymod/table_selector.py new file mode 100644 index 0000000000000000000000000000000000000000..9cf2df6c971ecd41c895054ee617fb081d65113d --- /dev/null +++ b/modules/base/pymod/table_selector.py @@ -0,0 +1,459 @@ + + + +class TableSelector: + + def __init__(self, col_types, col_names, query): + + self.col_types=col_types + self.col_names=col_names + self.query=query + + self.valid_operators=dict() + + self.valid_operators['and']={'type':'boolean','precedence':5} + self.valid_operators['or']={'type':'boolean','precedence':6} + + self.valid_operators['!']={'type':'comparison','precedence':4,'extension':'!='} + self.valid_operators['!=']={'type':'comparison','precedence':4,'extension':None} + self.valid_operators['=']={'type':'comparison','precedence':4,'extension':'=='} + self.valid_operators['==']={'type':'comparison','precedence':4,'extension':None} + self.valid_operators['<=']={'type':'comparison','precedence':3,'extension':None} + self.valid_operators['>=']={'type':'comparison','precedence':3,'extension':None} + self.valid_operators['>']={'type':'comparison','precedence':3,'extension':'>='} + self.valid_operators['<']={'type':'comparison','precedence':3,'extension':'<='} + + self.valid_operators['+']={'type':'arithmetic','precedence':2} + self.valid_operators['-']={'type':'arithmetic','precedence':2} + self.valid_operators['/']={'type':'arithmetic','precedence':1} + self.valid_operators['*']={'type':'arithmetic','precedence':1} + + self.valid_operators['(']={'type':'left_bracket','precedence':float('NaN')} + self.valid_operators['[']={'type':'left_bracket','precedence':float('NaN')} + self.valid_operators['{']={'type':'left_bracket','precedence':float('NaN')} + self.valid_operators[')']={'type':'right_bracket','precedence':float('NaN')} + self.valid_operators[']']={'type':'right_bracket','precedence':float('NaN')} + self.valid_operators['}']={'type':'right_bracket','precedence':float('NaN')} + + self.split_expression=self._ExpressionLexer(self.query) + self.parsed_expression=self._ParseExpression(self.split_expression) + self.rpn_expression=self._ShuntingYard(self.parsed_expression) + + self.tab_indices=list() + self.exp_indices=list() + + #extract column indices + for i, exp in enumerate(self.rpn_expression): + if exp in self.col_names: + self.tab_indices.append(self._GetIndex(exp)) + self.exp_indices.append(i) + + def EvaluateRow(self,row): + for ti, ei in zip(self.tab_indices, self.exp_indices): + #check for NaN + if row[ti]!=row[ti]: + self.rpn_expression[ei]=None + else: + self.rpn_expression[ei] = row[ti] + if self._EvaluateRPN(list(self.rpn_expression)): + return True + return False + + def _GetIndex(self, col): + if col not in self.col_names: + raise ValueError('Table Selector has no column named "%s"' % col) + return self.col_names.index(col) + + def _EvaluateAnd(self, lhs, rhs): + return lhs==True and rhs==True + + def _EvaluateOr(self, lhs, rhs): + return lhs==True or rhs==True + + def _EvaluateEqual(self, lhs, rhs): + return lhs==rhs + + def _EvaluateNonEqual(self, lhs, rhs): + return lhs!=rhs + + def _EvaluateLower(self, lhs, rhs): + if lhs==None or rhs==None: + return False + return lhs<rhs + + def _EvaluateGreater(self, lhs, rhs): + if lhs==None or rhs==None: + return False + return lhs>rhs + + def _EvaluateLowerEqual(self, lhs, rhs): + if lhs==None or rhs==None: + return False + return lhs<=rhs + + def _EvaluateGreaterEqual(self, lhs, rhs): + if lhs==None or rhs==None: + return False + return lhs>=rhs + + def _EvaluateAdd(self, lhs, rhs): + if lhs==None or rhs==None: + return None + return lhs+rhs + + def _EvaluateSubtract(self, lhs, rhs): + if lhs==None or rhs==None: + return None + return lhs-rhs + + def _EvaluateMultiply(self, lhs, rhs): + if lhs==None or rhs==None: + return None + return lhs*rhs + + def _EvaluateDivide(self, lhs, rhs): + if lhs==None or rhs==None: + return None + return lhs/rhs + + + def _EvaluateOperator(self, op, lhs, rhs): + + #this function assumes, that all NaN values have been replaced by None! + + if op=='+': + return self._EvaluateAdd(lhs, rhs) + elif op=='-': + return self._EvaluateSubtract(lhs, rhs) + elif op=='/': + return self._EvaluateDivide(lhs, rhs) + elif op=='*': + return self._EvaluateMultiply(lhs, rhs) + elif op=='and': + return self._EvaluateAnd(lhs, rhs) + elif op=='or': + return self._EvaluateOr(lhs, rhs) + elif op=='=' or op=='==': + return self._EvaluateEqual(lhs, rhs) + elif op=='!=' or op=='!': + return self._EvaluateNonEqual(lhs, rhs) + elif op=='<': + return self._EvaluateLower(lhs, rhs) + elif op=='>': + return self._EvaluateGreater(lhs, rhs) + elif op=='<=': + return self._EvaluateLowerEqual(lhs, rhs) + elif op=='>=': + return self._EvaluateGreaterEqual(lhs, rhs) + + else: + raise ValueError('Unknown operator: '+op) + + def _EvaluateRPN(self, RPNExp): + #Evaluates the reverse polish notation + stack=list() + while True: + if len(RPNExp)==0: + break + exp=RPNExp.pop(0) + if exp in self.valid_operators: + if len(stack)<2: + raise ValueError('Cannot evaluate operator on less than two operands!') + rhs=stack.pop() + lhs=stack.pop() + result=self._EvaluateOperator(exp, lhs, rhs) + if result==None: + return False + stack.append(result) + else: + stack.append(exp) + if len(stack)>1: + raise ValueError('Too many operands for given operators!') + return stack.pop() + + def _ShuntingYard(self, split_expression): + #Creates the so called reverse polish notation out of the expression parser output. + #note, that there won't be parenthesis anymore and potential parenthesis + #mismatches get recognized. + #The shunting yard algorithm from dijkstra gets used. + + output_stack=list() + operator_stack=list() + + while True: + if len(split_expression)==0: + while True: + if len(operator_stack)==0: + break + if self.valid_operators[operator_stack[-1]]['type'] in ['left_bracket','right_bracket']: + raise ValueError('Parenthesis mismatch!') + output_stack.append(operator_stack.pop()) + break + + exp=split_expression.pop(0) + + if exp in self.valid_operators: + if self.valid_operators[exp]['type']=='left_bracket': + operator_stack.append(exp) + continue + + if exp in self.valid_operators: + if self.valid_operators[exp]['type'] == 'right_bracket': + while True: + if len(operator_stack)==0: + raise ValueError('Parenthesis mismatch!') + if self.valid_operators[operator_stack[-1]]['type']=='left_bracket': + operator_stack.pop() + break + output_stack.append(operator_stack.pop()) + continue + + if exp in self.valid_operators: + prec=self.valid_operators[exp]['precedence'] + while len(operator_stack)>0: + if self.valid_operators[operator_stack[-1]]['type']=='left_bracket': + break + elif prec>=self.valid_operators[operator_stack[-1]]['precedence']: + output_stack.append(operator_stack.pop()) + else: + break + operator_stack.append(exp) + continue + output_stack.append(exp) + + return output_stack + + def _ParseSubExpression(self, subexpression): + + valid_types={'float':'numeric','int':'numeric','string':'string','bool':'bool'} + + column_names=list() + column_types=list() + + final_expression=list() + + + for item in subexpression: + if item in self.col_names: + column_names.append(item) + column_types.append(valid_types[self.col_types[self._GetIndex(item)]]) + + unique_type=list(set(column_types)) + if len(unique_type)>1: + raise RuntimeError('Try to compare columns '+','.join(column_names)+' which have inconsistent types!') + if len(unique_type)==0: + raise RuntimeError('Try to evaluate subexpression '+' '.join(subexpression)+' that contains no valid column name of current table!') + + for item in subexpression: + if item in self.valid_operators: + final_expression.append(item) + continue + if item in column_names: + final_expression.append(item) + continue + if unique_type[0]=='numeric': + if item in ['NaN','nan','None','none']: + final_expression.append(None) + continue + else: + try: + final_expression.append(float(item)) + continue + except: + raise RuntimeError('Tried to cast '+item+' into numeric type to compare with column(s) '+','.join(column_names)+', but failed!') + elif unique_type[0]=='bool': + if item in ['None','none']: + final_expression.append(None) + continue + if item in ['true','True']: + final_expression.append(True) + continue + if item in ['false','False']: + final_expression.append(False) + continue + raise RuntimeError('Tried to cast '+item+' into boolean type to compare with column(s) '+','.join(column_names)+', but failed!') + elif unique_type[0]=='string': + final_expression.append(item) + + return final_expression + + + def _ParseExpression(self, split_expression): + + #check for problematic cases like 'a<=b<=c'. We don't know which operator to evaluate first + for i in range(len(split_expression)-3): + if (split_expression[i] in self.valid_operators) and (split_expression[i+2] in self.valid_operators): + if self.valid_operators[split_expression[i]]['precedence']==self.valid_operators[split_expression[i+2]]['precedence']: + raise ValueError('Cannot Evaluate '+' '.join(split_expression[i:i+3])+' since both operators have same precedence!') + + #handle , operator + #replaces an expression like 'rnum=1,2,3' with '(rnum=1 or rnum=2 or rnum=3)' + + temp_split_expression=list() + skips=0 + + for i in range(len(split_expression)): + if skips>0: + skips-=1 + continue + if ',' in split_expression[i]: + + if split_expression[max(0,i-1)] != '=' and split_expression[min(i+1,len(split_expression)-1)] != '=': + raise ValueError('Can evaluate \',\' operator only in combination with \"=\" in subexpression ',' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])) + + single_operands=split_expression[i].split(',') + + if split_expression[max(0,i-1)]=='=': + if i-2<0: + raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' starting with an \'=\'') + main_operand=split_expression[i-2] + temp_split_expression.pop() + temp_split_expression.pop() + skips=0 + + else: + if i+2>len(split_expression)-1: + raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' ending with an \'=\'') + main_operand=split_expression[i+2] + skips=2 + + temp_expression=list(['(']) + temp_expression+=' or '.join(['%s = %s'% (a,b) for (a,b) in zip(len(single_operands)*[main_operand],single_operands)]).split() + temp_expression.append(')') + temp_split_expression+=temp_expression + continue + + temp_split_expression.append(split_expression[i]) + + split_expression=temp_split_expression + + #handle ':' operator + #replaces an expression like 'col_a=x:y' with '(col_a>=x and col_a<=y)' + + temp_split_expression=list() + skips=0 + + for i in range(len(split_expression)): + if skips>0: + skips-=1 + continue + if ':' in split_expression[i]: + if split_expression[max(0,i-1)] != '=' and split_expression[min(i+1,len(split_expression)-1)] != '=': + raise ValueError('Can evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' \':\' sign is only allowed in combination with \'=\'') + if len(split_expression[i].split(':')) != 2: + raise ValueError('Can operate \':\' operator only on 2 operands in subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])) + + lhs=split_expression[i].split(':')[0] + rhs=split_expression[i].split(':')[1] + + template_expression=['(','','<=','','and','','<=','',')'] + + if split_expression[max(0,i-1)] == '=': + if i-2<0: + raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' starting with an \'=\'') + temp_split_expression.pop() + temp_split_expression.pop() + template_expression[3]=split_expression[i-2] + template_expression[5]=split_expression[i-2] + skips=0 + + else: + if i+2>len(split_expression)-1: + raise ValueError('Cannot evaluate subexpression '+' '.join(split_expression[max(0,i-1):min(i+1,len(split_expression))])+' ending with an \'=\'') + template_expression[3]=split_expression[i+2] + template_expression[5]=split_expression[i+2] + skips=2 + + template_expression[1]=lhs + template_expression[7]=rhs + temp_split_expression+=template_expression + continue + + temp_split_expression.append(split_expression[i]) + + split_expression=temp_split_expression + + #the whole thing is now split to pieces, we need to cast the types of the operands + #into the types of the columns, the operands are compared against. + + final_expression=list() + subexpression=list() + + for item in split_expression: + if item in self.valid_operators: + if self.valid_operators[item]['type'] in ['boolean','left_bracket','right_bracket']: + if len(subexpression)>0: + #figure out the column type(s) and parse the operands + final_expression+=self._ParseSubExpression(subexpression) + subexpression=list() + final_expression.append(item) + continue + subexpression.append(item) + + if len(subexpression)>0: + final_expression+=self._ParseSubExpression(subexpression) + + return final_expression + + + def _ExpressionLexer(self, expression): + + #Reads token after token and searches for brackets and valid_operators + #everything, that doesn't match the above is assumed to be an operand + + split_expression=list() + + actual_position=0 + eaten_stuff='' + + while True: + + if actual_position>=len(expression): + if len(eaten_stuff)>0: + split_expression.append(eaten_stuff) + return split_expression + + token=expression[actual_position] + + if token.isspace(): + if len(eaten_stuff)>0: + split_expression.append(eaten_stuff) + eaten_stuff='' + actual_position+=1 + continue + + #note, that there is no check for boolean operators. They need to be clearly separated by spaces + #or brackets anyway, so they get added with the eaten stuff + if token in self.valid_operators: + if self.valid_operators[token]['type']=='left_bracket' or self.valid_operators[token]['type']=='right_bracket': + if len(eaten_stuff)>0: + split_expression.append(eaten_stuff) + eaten_stuff='' + split_expression.append(token) + actual_position+=1 + continue + + if self.valid_operators[token]['type']=='arithmetic': + if len(eaten_stuff)>0: + split_expression.append(eaten_stuff) + eaten_stuff='' + split_expression.append(token) + actual_position+=1 + continue + + if self.valid_operators[token]['type']=='comparison': + if len(eaten_stuff)>0: + split_expression.append(eaten_stuff) + eaten_stuff='' + if self.valid_operators[token]['extension']!=None: + if actual_position+len(self.valid_operators[token]['extension'])<len(expression): + if expression[actual_position:actual_position+len(self.valid_operators[token]['extension'])]==self.valid_operators[token]['extension']: + split_expression.append(self.valid_operators[token]['extension']) + actual_position+=len(self.valid_operators[token]['extension']) + continue + split_expression.append(token) + actual_position+=1 + continue + + eaten_stuff+=token + actual_position+=1 diff --git a/modules/base/tests/test_table.py b/modules/base/tests/test_table.py index b3c1921d95a70cd665314348080576ae067f7228..bfba6afb1c32dc0814c896a653bae1965960b3eb 100644 --- a/modules/base/tests/test_table.py +++ b/modules/base/tests/test_table.py @@ -1682,19 +1682,6 @@ class TestTable(unittest.TestCase): c=[True,True,True,True,False,False,False,False,None,None], d=['a','b','c','d','e','f','g','h','i','j']) - valid_operators=['and','or','!=','<=','>=','=','<','>','+','-','*','/'] - - precedence={'or':6 , 'and':5 , '!=':4 , '=':4 , '<=':3 , - '>=':3 , '<':3 , '>':3 , '+':2 , '-':2 , '*':1 , '/':1} - - #check wether special operators get parsed correctly - exp_one=['a','=','1','and','(','1.0','<=','b','and','b','<=','2.0',')'] - exp_two=['a','=','1','and','(','b','=','1.0','or','b','=','2.0',')'] - self.assertEquals(tab._ExpressionLexer('a=1 and b=1.0:2.0', valid_operators, precedence),exp_one) - self.assertEquals(tab._ExpressionLexer('a=1 and 1.0:2.0=b', valid_operators, precedence),exp_one) - self.assertEquals(tab._ExpressionLexer('a=1 and b=1.0,2.0', valid_operators, precedence),exp_two) - self.assertEquals(tab._ExpressionLexer('a=1 and 1.0,2.0=b', valid_operators, precedence),exp_two) - self.assertRaises(ValueError, tab._ExpressionLexer,'a=1 and b=1.0:2.0:3.0',valid_operators, precedence) #check wether error gets raised in problematic cases like: a=1=b self.assertRaises(ValueError, tab.Select, 'a=1=b and c=True') @@ -1712,42 +1699,52 @@ class TestTable(unittest.TestCase): #check wether error gets raised when two operands are not separated by an operator self.assertRaises(ValueError,tab.Select,'a=1 b=1.0') + from ost.table_selector import * + + selector=TableSelector(tab.col_types,tab.col_names,'') + #check some examples for dijkstras shunting yard algorithm query_one='a=1:4 and ((b>5.0 or b<2.0) and c=True)' - split_exp_one=tab._ExpressionLexer(query_one,valid_operators,precedence) - rpn_one=['1', 'a', '<=', 'a', '4', '<=', 'and', 'b', '5.0', '>', 'b', '2.0', '<', 'or', 'c', 'True', '=', 'and', 'and'] + split_exp_one=selector._ExpressionLexer(query_one) + parsed_exp_one=selector._ParseExpression(split_exp_one) + rpn_one=[1, 'a', '<=', 'a', 4, '<=', 'and', 'b', 5.0, '>', 'b', 2.0, '<', 'or', 'c', True, '=', 'and', 'and'] query_two='(a=1,2) or (b>5.0+a or b<2.0)' - split_exp_two=tab._ExpressionLexer(query_two,valid_operators,precedence) - rpn_two=['a', '1', '=', 'a', '2', '=', 'or', 'b', '5.0', 'a', '+', '>', 'b', '2.0', '<', 'or', 'or'] - self.assertEquals(tab._ShuntingYard(split_exp_one,valid_operators,precedence),rpn_one) - self.assertEquals(tab._ShuntingYard(split_exp_two,valid_operators,precedence),rpn_two) + split_exp_two=selector._ExpressionLexer(query_two) + parsed_exp_two=selector._ParseExpression(split_exp_two) + rpn_two=['a', 1, '=', 'a', 2, '=', 'or', 'b', 5.0, 'a', '+', '>', 'b', 2.0, '<', 'or', 'or'] + self.assertEquals(selector._ShuntingYard(parsed_exp_one),rpn_one) + self.assertEquals(selector._ShuntingYard(parsed_exp_two),rpn_two) #check operator evaluations - self.assertTrue(tab._EvaluateOperator('=',False,False)) - self.assertFalse(tab._EvaluateOperator('=',False,True)) - self.assertTrue(tab._EvaluateOperator('and',True,True)) - self.assertFalse(tab._EvaluateOperator('and',True,False)) - self.assertTrue(tab._EvaluateOperator('or',True,False)) - self.assertTrue(tab._EvaluateOperator('or',False,True)) - self.assertFalse(tab._EvaluateOperator('or',False,False)) - self.assertTrue(tab._EvaluateOperator('!=',False,True)) - self.assertFalse(tab._EvaluateOperator('!=',True,True)) - self.assertTrue(tab._EvaluateOperator('<=',1.0,2.0)) - self.assertTrue(tab._EvaluateOperator('<=',1.0,1.0)) - self.assertFalse(tab._EvaluateOperator('<=',2.0,1.0)) - self.assertFalse(tab._EvaluateOperator('>=',1.0,2.0)) - self.assertTrue(tab._EvaluateOperator('>=',2.0,1.0)) - self.assertTrue(tab._EvaluateOperator('>=',1.0,1.0)) - self.assertTrue(tab._EvaluateOperator('<',1.0,2.0)) - self.assertFalse(tab._EvaluateOperator('<',2.0,1.0)) - self.assertFalse(tab._EvaluateOperator('<',1.0,1.0)) - self.assertFalse(tab._EvaluateOperator('>',1.0,2.0)) - self.assertTrue(tab._EvaluateOperator('>',2.0,1.0)) - self.assertFalse(tab._EvaluateOperator('>',1.0,1.0)) - self.assertEqual(tab._EvaluateOperator('+',1,1),2) - self.assertEqual(tab._EvaluateOperator('-',1,1),0) - self.assertEqual(tab._EvaluateOperator('*',2,2),4) - self.assertEqual(tab._EvaluateOperator('/',2,2),1) + self.assertTrue(selector._EvaluateOperator('=',False,False)) + self.assertFalse(selector._EvaluateOperator('=',False,True)) + self.assertTrue(selector._EvaluateOperator('and',True,True)) + self.assertFalse(selector._EvaluateOperator('and',True,False)) + self.assertTrue(selector._EvaluateOperator('or',True,False)) + self.assertTrue(selector._EvaluateOperator('or',False,True)) + self.assertFalse(selector._EvaluateOperator('or',False,False)) + self.assertTrue(selector._EvaluateOperator('!=',False,True)) + self.assertFalse(selector._EvaluateOperator('!=',True,True)) + self.assertTrue(selector._EvaluateOperator('<=',1.0,2.0)) + self.assertTrue(selector._EvaluateOperator('<=',1.0,1.0)) + self.assertFalse(selector._EvaluateOperator('<=',2.0,1.0)) + self.assertFalse(selector._EvaluateOperator('>=',1.0,2.0)) + self.assertTrue(selector._EvaluateOperator('>=',2.0,1.0)) + self.assertTrue(selector._EvaluateOperator('>=',1.0,1.0)) + self.assertTrue(selector._EvaluateOperator('<',1.0,2.0)) + self.assertFalse(selector._EvaluateOperator('<',2.0,1.0)) + self.assertFalse(selector._EvaluateOperator('<',1.0,1.0)) + self.assertFalse(selector._EvaluateOperator('>',1.0,2.0)) + self.assertTrue(selector._EvaluateOperator('>',2.0,1.0)) + self.assertFalse(selector._EvaluateOperator('>',1.0,1.0)) + self.assertEqual(selector._EvaluateOperator('+',1,1),2) + self.assertEqual(selector._EvaluateOperator('-',1,1),0) + self.assertEqual(selector._EvaluateOperator('*',2,2),4) + self.assertEqual(selector._EvaluateOperator('/',2,2),1) + self.assertEqual(selector._EvaluateOperator('+',None,5),None) + self.assertEqual(selector._EvaluateOperator('-',2,None),None) + self.assertEqual(selector._EvaluateOperator('/',2,None),None) + self.assertEqual(selector._EvaluateOperator('*',None,3),None) #check a few rpn evaluation examples @@ -1755,9 +1752,9 @@ class TestTable(unittest.TestCase): rpn_two=[1, 0, '<=', 2, 4, '<=', 'and', 6.0, 5.0, '>', 2.0, 2.0, '<', 'or', True, True, '=', 'and', 'and'] rpn_three=[1, 0, '<=', 2, 4, '<=', 'or', 6.0, 5.0, '>', 2.0, 2.0, '<', 'or', True, True, '=', 'and', 'and'] - self.assertTrue(tab._EvaluateRPN(rpn_one, valid_operators)) - self.assertFalse(tab._EvaluateRPN(rpn_two, valid_operators)) - self.assertTrue(tab._EvaluateRPN(rpn_three, valid_operators)) + self.assertTrue(selector._EvaluateRPN(rpn_one)) + self.assertFalse(selector._EvaluateRPN(rpn_two)) + self.assertTrue(selector._EvaluateRPN(rpn_three)) #check a few selection examples @@ -1766,12 +1763,19 @@ class TestTable(unittest.TestCase): query_three='d=e,f,j and a!=4' query_four='(b=2.0:3.0 and c=False) or (b<1.0 and c)' query_five='b=0.0:1.2,2.5:8.0' + query_six='c=None' + query_seven='c!=None' + query_eight='b/0.5=2' self.assertEqual([0,1,2,3], list(r[0] for r in tab.Select(query_one).rows)) self.assertEqual([2,3], list(r[0] for r in tab.Select(query_two).rows)) self.assertEqual([5,9], list(r[0] for r in tab.Select(query_three).rows)) self.assertEqual([0,7], list(r[0] for r in tab.Select(query_four).rows)) self.assertEqual([0,1,2,3,8,9], list(r[0] for r in tab.Select(query_five).rows)) + self.assertEqual([8,9], list(r[0] for r in tab.Select(query_six).rows)) + self.assertEqual([0,1,2,3,4,5,6,7], list(r[0] for r in tab.Select(query_seven).rows)) + self.assertEqual([1], list(r[0] for r in tab.Select(query_eight).rows)) + if __name__ == "__main__":