[Zope-CVS] CVS: Products/ZCTextIndex - QueryParser.py:1.1.2.6

Tim Peters tim.one@comcast.net
Thu, 2 May 2002 18:43:47 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv8517

Modified Files:
      Tag: TextIndexDS9-branch
	QueryParser.py 
Log Message:
Made keyword recognition case-insensitive ("AND", "and", "aNd", ..., all
the same thing).


=== Products/ZCTextIndex/QueryParser.py 1.1.2.5 => 1.1.2.6 ===
 # Copyright (c) 2001, 2002 Zope Corporation and Contributors.
 # All Rights Reserved.
-# 
+#
 # This software is subject to the provisions of the Zope Public License,
 # Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
 # FOR A PARTICULAR PURPOSE.
-# 
+#
 ##############################################################################
 
 """Query Parser.
@@ -24,12 +24,29 @@
 
 An ATOM is a string not containing whitespace or parentheses, and not
 equal to one of the key words 'AND', 'OR', 'NOT'.  The key words are
-only recognized in all upper case.
-
+recognized in any mixture of case..
 """
 
 import re
 
+# Create unique symbols for token types.
+_AND    = intern("AND")
+_OR     = intern("OR")
+_NOT    = intern("NOT")
+_LPAREN = intern("(")
+_RPAREN = intern(")")
+_ATOM   = intern("ATOM")
+_EOF    = intern(" EOF ")
+
+# Map keyword string to token type.
+_keywords = {
+    _AND:       _AND,
+    _OR:        _OR,
+    _NOT:       _NOT,
+    _LPAREN:    _LPAREN,
+    _RPAREN:    _RPAREN,
+}
+
 class ParseError(Exception):
     pass
 
@@ -39,39 +56,47 @@
         pass # This parser has no persistent state
 
     def parseQuery(self, query):
-        # Lexical analysis
+        # Lexical analysis.
         tokens = re.findall(r"[()]|[^\s()]+", query)
         self.__tokens = tokens
-        self.__tokens.append(None) # EOF token
+        # classify tokens
+        self.__tokentypes = [_EOF] * len(tokens)
+        for i in range(len(tokens)):
+            token = tokens[i].upper()
+            self.__tokentypes[i] = _keywords.get(token, _ATOM)
+        # add _EOF
+        self.__tokens.append(_EOF)
+        self.__tokentypes.append(_EOF)
         self.__index = 0
-        # Syntactical analysis
+
+        # Syntactical analysis.
         tree = self._parseOrExpr()
-        self._require(None)
+        self._require(_EOF)
         return tree
 
     # Recursive descent parser
 
-    def _require(self, token):
-        if not self._check(token):
+    def _require(self, tokentype):
+        if not self._check(tokentype):
             t = self.__tokens[self.__index]
-            raise ParseError, "Token %r required, %r found" % (token, t)
+            raise ParseError, "Token %r required, %r found" % (tokentype, t)
 
-    def _check(self, token):
-        if self.__tokens[self.__index] == token:
+    def _check(self, tokentype):
+        if self.__tokentypes[self.__index] is tokentype:
             self.__index += 1
             return 1
         else:
             return 0
 
-    def _get(self):
+    def _get(self, tokentype):
         t = self.__tokens[self.__index]
-        self.__index += 1
+        self._require(tokentype)
         return t
 
     def _parseOrExpr(self):
         L = []
         L.append(self._parseAndExpr())
-        while self._check("OR"):
+        while self._check(_OR):
             L.append(self._parseAndExpr())
         if len(L) == 1:
             return L[0]
@@ -81,7 +106,7 @@
     def _parseAndExpr(self):
         L = []
         L.append(self._parseTerm())
-        while self._check("AND"):
+        while self._check(_AND):
             L.append(self._parseNotExpr())
         if len(L) == 1:
             return L[0]
@@ -89,19 +114,17 @@
             return AndNode(L)
 
     def _parseNotExpr(self):
-        if self._check("NOT"):
+        if self._check(_NOT):
             return NotNode(self._parseTerm())
         else:
             return self._parseTerm()
 
     def _parseTerm(self):
-        if self._check("("):
+        if self._check(_LPAREN):
             tree = self._parseOrExpr()
-            self._require(")")
+            self._require(_RPAREN)
         else:
-            t = self._get()
-            if t in [")", "AND", "OR", "NOT", None]:
-                raise ParseError("Token %r not expected" % t)
+            t = self._get(_ATOM)
             tree = AtomNode(t)
         return tree
 
@@ -124,16 +147,16 @@
 
 class NotNode(ParseTreeNode):
 
-    _nodeType = "NOT"
+    _nodeType = _NOT
 
 class AndNode(ParseTreeNode):
 
-    _nodeType = "AND"
+    _nodeType = _AND
 
 class OrNode(ParseTreeNode):
 
-    _nodeType = "OR"
+    _nodeType = _OR
 
 class AtomNode(ParseTreeNode):
 
-    _nodeType = "ATOM"
+    _nodeType = _ATOM