[Checkins] SVN: Sandbox/adamg/ocql/branches/qo-compiler/src/ocql/parser/queryparser.py introduce queryparser with new grammar modifications

Charith Paranaliyanage paranaliyanage at gmail.com
Wed Aug 6 11:43:00 EDT 2008


Log message for revision 89447:
  introduce queryparser with new grammar modifications

Changed:
  A   Sandbox/adamg/ocql/branches/qo-compiler/src/ocql/parser/queryparser.py

-=-
Added: Sandbox/adamg/ocql/branches/qo-compiler/src/ocql/parser/queryparser.py
===================================================================
--- Sandbox/adamg/ocql/branches/qo-compiler/src/ocql/parser/queryparser.py	                        (rev 0)
+++ Sandbox/adamg/ocql/branches/qo-compiler/src/ocql/parser/queryparser.py	2008-08-06 15:42:59 UTC (rev 89447)
@@ -0,0 +1,651 @@
+# -*- coding: UTF-8 -*-
+
+"""Parse a string to Query Object
+
+"""
+
+#TODOs:
+#add metadata into the picture!!!
+#remove shift/reduce conflicts, when possible
+#look after raise "Help"
+#revise according to new grammar
+
+from ply import lex, yacc
+from collections import deque
+from threading import local
+
+from zope.component import adapts
+from zope.component import provideAdapter
+from zope.interface import implements
+
+from ocql.queryobject.queryobject import *
+from ocql.interfaces import IQueryParser
+
+DEBUG = 0
+
+class SymbolContainer:
+    def __init__(self):
+        self.stack = deque()
+        self.stack.append(dict())
+
+    def addlevel(self):
+        top = self.current()
+        new = dict(top)
+        self.stack.append(new)
+
+    def dellevel(self):
+        self.stack.pop()
+
+    def current(self):
+        return self.stack[-1]
+
+tokens = ('SET', 'LIST', 'COMMA', 'NOT_EQUAL', 'UNION', 'AS', 'EVERY', 'ATMOST', 'LT', 'GT', 'ELLIPSIS', 'BRACKET_R', 'OR', 'PIPE', 'DOT', 'IN', 'LTE', 'SOME', 'AND', 'CBRACKET_L', 'CONSTANT', 'EQUAL', 'GTE', 'ISINSTANCE', 'SEMI_COLON', 'BRACKET_L', 'ASSIGN', 'NOT_ASSIGN', 'FOR', 'CBRACKET_R', 'JUST', 'IDENTIFIER', 'DIFFER', 'LEN', 'BAG', 'SBRACKET_L', 'NOT', 'ATLEAST', 'SBRACKET_R')
+
+precedence = (
+    ('left', 'UNION'),
+    ('left', 'DIFFER'),
+#   ('token', 'SQUAREL'),
+#   ('token', 'PIPE'),
+#   ('token', 'SQUARER'),
+    ('left', 'AND'),
+    ('left', 'OR'),
+    ('right', 'NOT'),
+    #('left', 'COND_OP'),
+#    ('left', 'PLUS', 'MINUS'),
+#    ('left', 'MUL', 'DIV'),
+#   ('token', 'IDENTIFIER'),
+#   ('token', 'BRACEL'),
+#   ('token', 'BRACER'),
+#   ('token', 'CONSTANT'),
+#   ('token', 'TYPE'),
+#   ('token', 'CURLYL'),
+#   ('token', 'CURLYR'),
+#   ('token', 'ELLIPSIS'),
+    ('left', 'DOT'),
+#   ('token', 'COMMA'),
+    ('left', 'SEMI_COLON'),
+
+    ('left', 'IN'),
+    ('left', 'AS'),
+
+#   ('token', 'MODIFIER'),
+#   ('token', 'QUANTOR'),
+
+#    ('left', 'SIZE'),
+)
+
+class Lexer(object):
+    tokens = tokens
+    t_ignore = ' \t\n\r'
+
+    def t_error(self, t):
+        print "Illegal character '%s'" % t.value[0]
+        t.lexer.skip(1)
+
+    def t_UNION(self, t):
+        r'union'
+        return t
+
+    def t_DIFFER(self, t):
+        r'differ'
+        return t
+
+    def t_SET(self, t):
+        r'set'
+        return t
+
+    def t_LIST(self, t):
+        r'list'
+        return t
+
+    def t_BAG(self, t):
+        r'bag'
+        return t
+
+    def t_FOR(self, t):
+        r'for'
+        return t
+
+    def t_LEN(self, t):
+        r'len'
+        return t
+
+    def t_AS(self, t):
+        r'as'
+        return t
+
+    def t_IN(self, t):
+        r'in'
+        return t
+
+    def t_OR(self, t):
+        r'or'
+        return t
+
+    def t_AND(self, t):
+        r'and'
+        return t
+
+    def t_NOT(self, t):
+        r'not'
+        return t
+
+    def t_ISINSTANCE(self, t):
+        r'isinstance'
+        return t
+
+    def t_EVERY(self, t):
+        r'every'
+        return t
+
+    def t_ATMOST(self, t):
+        r'atmost'
+        return t
+
+    def t_ATLEAST(self, t):
+        r'atleast'
+        return t
+
+    def t_SOME(self, t):
+        r'some'
+        return t
+
+    def t_JUST(self, t):
+        r'just'
+        return t
+
+    def t_CONSTANT(self, t):
+        r'(\'(\\.|[^\'])*\'|"(\\.|[^"])*"|[0-9]+)'
+        return t
+
+    def t_IDENTIFIER(self, t):
+        r'[a-zA-Z][0-9a-zA-Z_]*'
+        return t
+
+    def t_COMMA(self, t):
+        r','
+        return t
+
+#this may be != sign
+    def t_NOT_EQUAL(self, t):
+        r'~=='
+        return t
+
+    def t_LT(self, t):
+        r'<'
+        return t
+
+    def t_GT(self, t):
+        r'>'
+        return t
+
+    def t_ELLIPSIS(self, t):
+        r'\.\.\.'
+        return t
+
+    def t_PIPE(self, t):
+        r'\|'
+        return t
+
+    def t_DOT(self, t):
+        r'\.'
+        return t
+
+    def t_MUL(self, t):
+        r'\*'
+        return t
+
+    def t_CBRACKET_L(self, t):
+        r'{'
+        return t
+
+    def t_CBRACKET_R(self, t):
+        r'}'
+        return t
+
+    def t_EQUAL(self, t):
+        r'=='
+        return t
+
+    def t_GTE(self, t):
+        r'>='
+        return t
+
+    def t_LTE(self, t):
+        r'<='
+        return t
+
+    def t_SEMI_COLON(self, t):
+        r';'
+        return t
+
+    def t_BRACKET_L(self, t):
+        r'\('
+        return t
+
+    def t_BRACKET_R(self, t):
+        r'\)'
+        return t
+
+    def t_ASSIGN(self, t):
+        r'='
+        return t
+
+    def t_NOT_ASSIGN(self, t):
+        r'~='
+        return t
+
+#    def t_DIV(self, t):
+#        r'/'
+#        return t
+
+#    def t_PLUS(self, t):
+#        r'\+'
+#        return t
+
+#    def t_MINUS(self, t):
+#        r'-'
+#        return t
+
+    def t_SBRACKET_L(self, t):
+        r'\['
+        return t
+
+    def t_SBRACKET_R(self, t):
+        r'\]'
+        return t
+
+
+
+class Parser(object):
+    tokens = tokens
+    precedence = precedence
+    metadata = None
+    symbols = None
+    types = { 'set' : set, 'list': list }
+    start = 'expression'
+
+    def __init__(self, metadata):
+        self.metadata = metadata
+        self.symbols = SymbolContainer()
+
+    def p_error(self, t):
+        print "Syntax error at '%s' (%s)" % (t.value, t.lexpos)
+
+    def p_expr_union(self, t):
+        r'''expression : expression UNION expression
+        '''
+        t[0] = Union(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "expression UNION expression" to "expression"', t[0]
+
+    def p_expr_differ(self, t):
+        r'''expression : expression DIFFER expression
+        '''
+        t[0] = Differ(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "expression DIFFER expression" to "expression"', t[0]
+
+#    def p_expr_3(self, t):
+#        r'''expression : collection SBRACKET_L expression SBRACKET_R
+#        '''
+#        t[0] = Query(self.metadata, self.symbols, t[1], [], t[3])
+#        if DEBUG: print 'reducing "collection SBRACKET_L qualifier PIPE expression SBRACKET_R" to "expression"'
+
+    def p_expr_query(self, t):
+        r'''expression : collection SBRACKET_L qualifier PIPE expression SBRACKET_R
+        '''
+        t[0] = Query(self.metadata, self.symbols, t[1], t[3], t[5])
+        if DEBUG: print 'reducing "collection SBRACKET_L qualifier PIPE expression SBRACKET_R" to "expression"', t[0]
+
+#TODO add a test
+    def p_expr_for_query(self, t):
+        r'''expression : collection SBRACKET_L qualifier FOR expression SBRACKET_R
+        '''
+        t[0] = Query(self.metadata, self.symbols, t[1], t[3], t[5])
+        if DEBUG: print 'reducing "collection SBRACKET_L qualifier FOR expression SBRACKET_R" to "expression"', t[0]
+
+    def p_expr_literal(self, t):
+        r'''expression : literal
+        '''
+        t[0] = t[1]
+        if DEBUG: print 'reducing "literal" to "expression"', t[0]
+
+    def p_expr_path(self, t):
+        r'''expression : path
+        '''
+        t[0] = t[1]
+        if DEBUG: print 'reducing "path" to "expression"', t[0]
+
+    def p_expr_call(self, t):
+        r'''expression : call
+        '''
+        t[0] = t[1]
+        if DEBUG: print 'reducing "path" to "expression"', t[0]
+
+    def p_expr_len(self, t):
+        r'''expression : LEN BRACKET_L expression BRACKET_R
+        '''
+        t[0] = Count(self.metadata, self.symbols, t[3])
+        if DEBUG: print 'reducing "LEN BRACKET_L expression BRACKET_R" to "expression"', t[0]
+
+    def p_collection_set(self, t):
+        r'''collection : SET
+        '''
+        t[0] = self.types['set']
+        if DEBUG: print 'reducing "set" to "collection"', t[0]
+
+    def p_collection_list(self, t):
+        r'''collection : LIST
+        '''
+        t[0] = self.types['list']
+        if DEBUG: print 'reducing "list" to "collection"', t[0]
+
+    def p_collection_bag(self, t):
+        r'''collection : BAG
+        '''
+        raise NotImplementedError('bag')
+        if DEBUG: print 'reducing "bag" to "collection"', t[0]
+
+    def p_qualifier_null(self, t):
+        r'''qualifier :
+        '''
+        t[0] = []
+        if DEBUG: print 'reducing "" to "qualifier"', t[0]
+
+    def p_qualifier_generator(self, t):
+        r'''qualifier : generator
+        '''
+        t[0] = [t[1]]
+        if DEBUG: print 'reducing "generator" to "qualifier"', t[0]
+
+    def p_qualifier_definition(self, t):
+        r'''qualifier : definition
+        '''
+        t[0] = [t[1]]
+        if DEBUG: print 'reducing "definition" to "qualifier"', t[0]
+
+    def p_qualifier_filter(self, t):
+        r'''qualifier : filter
+        '''
+        t[0] = [t[1]]
+        if DEBUG: print 'reducing "filter" to "qualifier"', t[0]
+
+    def p_qualifier_qualifier(self, t):
+        r'''qualifier : qualifier SEMI_COLON qualifier
+        '''
+        t[0] = t[0].extend(t[1])
+        t[0] = t[0].extend(t[3])
+        if DEBUG: print 'reducing "qualifier SEMI_COLON qualifier" to "qualifier"', t[0]
+
+#    def p_qualifier_6(self, t):
+#        r'''qualifier : expression
+#        '''
+#        t[0] = t[1]
+#        if DEBUG: print 'reducing "expression" to "qualifier"'
+
+    def p_generator_in(self, t):
+        r'''generator : IDENTIFIER IN expression
+        '''
+        t[0] = In(self.metadata,
+                  self.symbols,
+                  Identifier(self.metadata,
+                             self.symbols,
+                             t[1]),
+                  t[3])
+        if DEBUG: print 'reducing "IDENTIFIER IN expression" to "generator"', t[0]
+
+    def p_filter_and(self, t):
+        r'''filter : filter AND filter
+        '''
+        t[0] = And(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "filter AND filter" to "filter"', t[0]
+
+    def p_filter_or(self, t):
+        r'''filter : filter OR filter
+        '''
+        t[0] = Or(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "filter OR filter" to "filter"', t[0]
+
+    def p_filter_not(self, t):
+        r'''filter : NOT condition
+        '''
+        t[0] = Not(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "NOT condition" to "filter"', t[0]
+
+    def p_filter_condition(self, t):
+        r'''filter : condition
+        '''
+        t[0] = t[1]
+        if DEBUG: print 'reducing "condition" to "filter"', t[0]
+
+    def p_condition_filter(self, t):
+        r'''condition : BRACKET_L filter BRACKET_R
+        '''
+        t[0] = t[2]
+        if DEBUG: print 'reducing "BRACKET_L filter BRACKET_R" to "condition"', t[0]
+
+    def p_condition_assign(self, t):
+        r'''condition : quantified ASSIGN quantified
+        '''
+        raise NotImplementedError('assign')
+        if DEBUG: print 'reducing "quantified operator quantified" to "condition"', t[0]
+
+    def p_condition_not_assign(self, t):
+        r'''condition : quantified NOT_ASSIGN quantified
+        '''
+        raise NotImplementedError('not assign')
+        if DEBUG: print 'reducing "quantified operator quantified" to "condition"', t[0]
+
+    def p_condition_lt(self, t):
+        r'''condition : quantified LT quantified
+        '''
+        t[0] = Lt(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "quantified operator quantified" to "condition"', t[0]
+
+    def p_condition_lte(self, t):
+        r'''condition : quantified LTE quantified
+        '''
+        t[0] = Le(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "quantified operator quantified" to "condition"', t[0]
+
+    def p_condition_gt(self, t):
+        r'''condition : quantified GT quantified
+        '''
+        t[0] = Gt(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "quantified operator quantified" to "condition"', t[0]
+
+    def p_condition_gte(self, t):
+        r'''condition : quantified GTE quantified
+        '''
+        t[0] = Ge(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "quantified operator quantified" to "condition"', t[0]
+
+    def p_condition_equal(self, t):
+        r'''condition : quantified EQUAL quantified
+        '''
+        t[0] = Eq(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "quantified operator quantified" to "condition"', t[0]
+
+    def p_condition_not_equal(self, t):
+        r'''condition : quantified  NOT_EQUAL quantified
+        '''
+        t[0] = Ne(self.metadata, self.symbols, t[1], t[3])
+        if DEBUG: print 'reducing "quantified operator quantified" to "condition"', t[0]
+
+    #need to extend this for collection of types
+    def p_condition_isinstance(self, t):
+        r'''condition : ISINSTANCE BRACKET_L expression COMMA IDENTIFIER BRACKET_R
+        '''
+        raise NotImplementedError('isinstance')
+        if DEBUG: print 'reducing "ISINSTANCE BRACKET_L expression COMMA IDENTIFIER BRACKET_R" to "condition"', t[0]
+
+    def p_quantified_expression(self, t):
+        r'''quantified : expression
+        '''
+        t[0] = t[1]
+        if DEBUG: print 'reducing "expression" to "quantified"', t[0]
+
+    def p_quantified_some(self, t):
+        r'''quantified : SOME expression
+        '''
+        t[0] = Some(self.metadata, self.symbols, t[2])
+        if DEBUG: print 'reducing "quantification expression" to "quantified"', t[0]
+
+    def p_quantified_just(self, t):
+        r'''quantified : JUST expression
+        '''
+        t[0] = Just(self.metadata, self.symbols, t[2])
+        if DEBUG: print 'reducing "quantification expression" to "quantified"', t[0]
+
+    def p_quantified_every(self, t):
+        r'''quantified : EVERY expression
+        '''
+        t[0] = Every(self.metadata, self.symbols, t[2])
+        if DEBUG: print 'reducing "quantification expression" to "quantified"', t[0]
+
+    def p_quantified_atleast(self, t):
+        r'''quantified : ATLEAST expression
+        '''
+        t[0] = Atleast(self.metadata, self.symbols, t[2])
+        if DEBUG: print 'reducing "quantification expression" to "quantified"', t[0]
+
+    def p_quantified_almost(self, t):
+        r'''quantified : ATMOST expression
+        '''
+        t[0] = Atmost(self.metadata, self.symbols, t[2])
+        if DEBUG: print 'reducing "quantification expression" to "quantified"', t[0]
+
+    def p_definition_as(self, t):
+        r'''definition : IDENTIFIER AS expression
+        '''
+        #t[0]=''
+        if DEBUG: print 'reducing "IDENTIFIER AS expression" to "definition"', t[0]
+
+    def p_literal_constant(self, t):
+        r'''literal : CONSTANT
+        '''
+        t[0] = Constant(self.metadata, self.symbols, t[1])
+        if DEBUG: print 'reducing "CONSTANT" to "literal"', t[0]
+
+    def p_literal_element(self, t):
+        r'''literal : collection CBRACKET_L element CBRACKET_R
+        '''
+        raise NotImplementedError('collection set')
+        if DEBUG: print 'reducing "collection CBRACKET_L element CBRACKET_R" to "literal"', t[0]
+
+    def p_element_null(self, t):
+        r'''element :
+        '''
+        t[0] = None
+        if DEBUG: print 'reducing "" to "element"', t[0]
+
+    def p_element_expression(self, t):
+        r'''element : expression
+        '''
+        t[0] = t[1]
+        if DEBUG: print 'reducing "expression" to "element"', t[0]
+
+# Why this raise a shift/reduce conflict
+#    def p_element_comma(self, t):
+#        r'''element : element COMMA element
+#        '''
+#        raise NotImplementedError('element list')
+#        if DEBUG: print 'reducing "element COMMA element" to "element"', t[0]
+
+    def p_element_ellipsis(self, t):
+        r'''element : expression ELLIPSIS expression
+        '''
+        raise NotImplementedError('range')
+        if DEBUG: print 'reducing "expression ELLIPSIS expression" to "element"', t[0]
+
+    def p_path_identifier(self, t):
+        r'''path : IDENTIFIER
+        '''
+        t[0] = Identifier(self.metadata, self.symbols, t[1])
+        if DEBUG: print 'reducing "IDENTIFIER" to "path"', t[0]
+
+    def p_path_method(self, t):
+        r'''path : IDENTIFIER DOT method
+        '''
+        t[0] = Property(self.metadata, self.symbols, Identifier(self.metadata, self.symbols, t[1]), t[3])
+        if DEBUG: print 'reducing "IDENTIFIER DOT method" to "path"', t[0]
+
+    def p_method_identifier(self, t):
+        r'''method : IDENTIFIER
+        '''
+        t[0] = Identifier(self.metadata, self.symbols, t[1])
+        if DEBUG: print 'reducing "IDENTIFIER" to "method"', t[0]
+
+    def p_method_arguments(self, t):
+        r'''method : IDENTIFIER BRACKET_L argument_list BRACKET_R
+        '''
+        raise NotImplementedError('function call')
+        if DEBUG: print 'reducing "IDENTIFIER BRACKET_L argument_list BRACKET_R" to "method"', t[0]
+
+    def p_argument_list_null(self, t):
+        r'''argument_list :
+        '''
+        t[0] = None
+        if DEBUG: print 'reducing "" to "argument_list"', t[0]
+
+    def p_argument_list_expression(self, t):
+        r'''argument_list : expression
+        '''
+        t[0] = t[1]
+        if DEBUG: print 'reducing "expression" to "argument_list"', t[0]
+
+    def p_argument_list_set(self, t):
+        r'''argument_list : expression COMMA argument_list
+        '''
+        t[0]=''
+        if DEBUG: print 'reducing "expression COMMA argument_list" to "argument_list"', t[0]
+
+    def p_call(self, t):
+        r'''call : IDENTIFIER BRACKET_L argument_list BRACKET_R
+        '''
+        raise NotImplementedError('function call')
+        if DEBUG: print 'reducing "IDENTIFIER BRACKET_L argument_list BRACKET_R" to "call"', t[0]
+
+#these are here, to keep lexer and parser instantiation to a minimum possible
+#level because they are quite expensive operations
+#parsers must be thread safe on the other hand!
+LEXER = lex.lex(object=Lexer(), debug=0)
+#PARSERS = local()
+
+def parse(str, metadata):
+    lexer = LEXER.clone()
+
+    #global PARSERS
+    #try:
+    #    parser = PARSERS.parser
+    #
+    #    try:
+    #        parser.restart()
+    #    except AttributeError:
+    #        pass
+    #
+    #except AttributeError:
+    #    parser = yacc.yacc(module = Parser(metadata))
+    #    PARSERS.parser = parser
+
+    try:
+        parser = yacc.yacc(module = Parser(metadata))
+
+        retval = parser.parse(str, lexer = lexer)
+    except Exception, e:
+        if DEBUG: print e
+        raise
+    return retval
+
+class QueryParser(object):
+    implements(IQueryParser)
+    adapts(basestring)
+
+    def __init__(self, context):
+        self.context = context
+        #self.db = db
+
+    def __call__(self, metadata):
+        strg = self.context
+        tree = parse(strg, metadata)
+        return Head(tree)
+        #return parse(strg, None)
\ No newline at end of file



More information about the Checkins mailing list