[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser - kjParseBuild.py:1.1.2.2 kjParser.py:1.1.2.2 kjSet.py:1.1.2.2

Andreas Jung andreas@digicool.com
Thu, 7 Feb 2002 15:35:42 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser
In directory cvs.zope.org:/tmp/cvs-serv29733

Modified Files:
      Tag: ajung-textindexng-branch
	kjParseBuild.py kjParser.py kjSet.py 
Log Message:
replaced regex by re module (thanks to Stephan Richter)
(taken from the ZOQLMethod package)


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/kjParseBuild.py 1.1.2.1 => 1.1.2.2 === (2397/2497 lines abridged)
+
 # python code for building a parser from a grammar
 #  Copyright Aaron Robert Watters, 1994
 #
@@ -14,12 +14,10 @@
 import string
 import kjSet
 import kjParser
-import regex
 
 # import some constants
-from kjParser import \
- TERMFLAG, NOMATCHFLAG, MOVETOFLAG, REDUCEFLAG, TRANSFLAG, KEYFLAG, \
- NONTERMFLAG, TERMFLAG, EOFFLAG, ENDOFFILETOKEN
+from kjParser import TERMFLAG, NOMATCHFLAG, MOVETOFLAG, REDUCEFLAG, \
+     TRANSFLAG, KEYFLAG, NONTERMFLAG, TERMFLAG, EOFFLAG, ENDOFFILETOKEN
 
 PMODULE = kjParser.THISMODULE
 
@@ -41,121 +39,122 @@
 #
 class CFSMachine(kjParser.FSMachine):
 
-  def __init__(self, nonterm):
-      kjParser.FSMachine.__init__(self, nonterm)
+    def __init__(self, nonterm):
+        kjParser.FSMachine.__init__(self, nonterm)
 
-  # return the epsilon closure of the FSM as a new FSM
-  #
-  # DoNullMap, if set, will map unexpected tokens to
-  # the "empty" state (usually creating a really big fsm)
-  #
-  def Eclosure(self, Epsilon, DoNullMaps=0):
-    Closure = CFSMachine( self.root_nonTerminal )
+  
+    # return the epsilon closure of the FSM as a new FSM
+    #
+    # DoNullMap, if set, will map unexpected tokens to
+    # the "empty" state (usually creating a really big fsm)
+    #
+    def Eclosure(self, Epsilon, DoNullMaps=0):
+        Closure = CFSMachine( self.root_nonTerminal )
+        
+        # compute the Epsilon Graph between states
+        EGraph = kjSet.NewDG([])
+        for State in range(0,self.maxState+1):
+            # every state is E-connected to self
+            kjSet.AddArc( EGraph, State, State )
+            # add possible transition on epsilon (ONLY ONE SUPPORTED!)

[-=- -=- -=- 2397 lines omitted -=- -=- -=-]

-  L = kjParser.nonterminal("L")
-  R = kjParser.nonterminal("R")
-  RS1 = kjParser.ParseRule( S, [L, equals, R] )
-  RS2 = kjParser.ParseRule( S, [R], echo )
-  RL1 = kjParser.ParseRule( L, [star, R])
-  RL2 = kjParser.ParseRule( L, [id])
-  RR1 = kjParser.ParseRule( R, [L] )
-  rs3 = ruleset(S, [RS1,RS2,RL1,RL2,RR1])
-  rs3.CompFirst()
-  rs3.CompFollow()
-  rs3.CompSLRNFA()
-  rs3.CompDFA()
-  #rs3.SLRFixDFA() # should fail and does.
-
-  # testing RULEGRAM
-  ObjG = NullCGrammar()
-  ObjG.Addterm("id","id",echo)
-  ObjG.Nonterms("T E Ep F Tp")
-  ObjG.Keywords("begin end")
-  ObjG.punct("+*()")
-  ObjG.comments(["--.*\n"])
-  # PROBLEM WITH COMMENTS???
-  Rulestr = """
-    ## what a silly grammar!
-    T ::
-    @R One :: T >> begin E end
-    @R Three :: E >>
-    @R Two :: E >> E + T
-    @R Four :: E >> ( T )
-    """
-  RL = RULEGRAM.DoParse1( Rulestr, ObjG )
+    # testing RULEGRAM
+    ObjG = NullCGrammar()
+    ObjG.Addterm("id","id",echo)
+    ObjG.Nonterms("T E Ep F Tp")
+    ObjG.Keywords("begin end")
+    ObjG.punct("+*()")
+    ObjG.comments(["--.*\n"])
+    # PROBLEM WITH COMMENTS???
+    Rulestr = """## what a silly grammar!
+                 T ::
+                 @R One :: T >> begin E end
+                 @R Three :: E >>
+                 @R Two :: E >> E + T
+                 @R Four :: E >> ( T )
+                 """
+    RL = RULEGRAM.DoParse1( Rulestr, ObjG )
+
+
+


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/kjParser.py 1.1.2.1 => 1.1.2.2 === (2061/2161 lines abridged)
 import kjSet
 import string
-import regex
-import regsub
-import string
+from string import joinfields, whitespace
+
+import re
+
 
 # set this flag for regression testing at each load
 RUNTESTS = 0
@@ -40,7 +41,8 @@
 
 # regular expression for matching whitespace
 WHITERE = "["+string.whitespace+"]+"
-WHITEREGEX = regex.compile(WHITERE)
+WHITEREGEX = re.compile(WHITERE)
+    
 
 # local errors
 LexTokenError = "LexTokenError" # may happen on bad string
@@ -64,13 +66,15 @@
 
 # utility function for error diagnostics
 def DumpStringWindow(Str, Pos, Offset=15):
-    L = []
-    L.append("near ::")
-    start = Pos-Offset
-    end = Pos+Offset
-    if start<0: start = 0
-    if end>len(Str): end = len(Str)
-    L.append(`Str[start:Pos]`+"*"+`Str[Pos:end]`)
+    L = [""]
+    lines = len(string.split(Str[:Pos], '\n'))+1
+    L.append("Syntax Error on line %i." %lines)
+    start = Pos - Offset
+    end = Pos + Offset
+    if start < 0: start = 0
+    if end > len(Str): end = len(Str)
+    L.append(`Str[start:end]`)
+    L.append(' '*(Pos-start+1)+'^')
     from string import join
     return join(L, "\n")
 
@@ -137,403 +141,433 @@
 #
 class LexDictionary:
 
-  def __init__(self):

[-=- -=- -=- 2061 lines omitted -=- -=- -=-]

+        LexTokens = {}
+        tokens = self.tokens
+        for tokenindex in range(len(tokens)):
+            (kind,name) = tokens[tokenindex]
+            if kind == KEYFLAG:
+               tokens[tokenindex] = LexD.keyword(name)
+            elif not kind in [TERMFLAG, NONTERMFLAG]:
+               raise FlowError, "unknown token type"
+        # not needed
+        self.tokens = tokens
+
+ 
+    def MakeRules(self):
+        Grammar = self.Gram
+        Grammar.DFA.root_nonTerminal = self.Root
+        NameIndex = Grammar.RuleNameToIndex
+        RuleTuples = self.RuleTups
+        nRules = len(RuleTuples)
+        RuleList = [None] * nRules
+        for index in range(nRules):
+            (Name, Components) = RuleTuples[index]
+            rule = apply(ParseRule, Components)
+            rule.Name = Name
+            RuleList[index] = rule
+            NameIndex[Name] = index
+        Grammar.RuleL = RuleList
+
+ 
+    def MakeTransitions(self):
+        Grammar = self.Gram
+        DFA = Grammar.DFA
+        StateTokenMap = DFA.StateTokenMap
+        tokens = self.tokens
+        # record the state number
+        DFA.maxState = self.MaxStates
+        # this is historical, unfortunately...  CLEAN IT UP SOMEDAY!
+        # THE DFA.States DICT IS NOT NEEDED (?) (here)
+        for state in range(1, self.MaxStates+1):
+            DFA.States[state] = [TRANSFLAG]
+        # record the reductions
+        for (fromState, TokenIndex, rulenum) in self.reducts:
+            DFA.SetReduction(fromState, tokens[TokenIndex], rulenum)
+        # record the transitions
+        for (fromState, TokenIndex, ToState) in self.moveTos:
+            DFA.SetMap(fromState, tokens[TokenIndex], ToState)
+
+ 
+    def Cleanup(self):
+        Grammar = self.Gram
+        Grammar.CleanUp()


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/kjSet.py 1.1.2.1 => 1.1.2.2 ===
-#
 # sets implemented using mappings
 #  Copyright Aaron Robert Watters, 1994
 #
@@ -13,34 +11,39 @@
         Result[Elt] = 1
     return Result
 
+
 def Empty(Set):
     if Set == {}:
-       return 1
+        return 1
     else:
-       return 0
+        return 0
+
 
 def get_elts(Set):
     return Set.keys()
 
+
 def member(Elt,Set):
     return Set.has_key(Elt)
 
+
 # in place mutators:
 # returns if no change otherwise 1
 
 def addMember(Elt,Set):
     change = 0
     if not Set.has_key(Elt):
-       Set[Elt] = 1
-       change = 1
+        Set[Elt] = 1
+        change = 1
     return change
 
+
 def Augment(Set, OtherSet):
     change = 0
     for Elt in OtherSet.keys():
         if not Set.has_key(Elt):
-           Set[Elt] = 1
-           change = 1
+            Set[Elt] = 1
+            change = 1
     return change
 
 
@@ -48,45 +51,51 @@
     change = 0
     for Elt in OtherSet.keys():
         if Set.has_key(Elt):
-           del Set[Elt]
-           change = 1
+            del Set[Elt]
+            change = 1
     return change
 
+
 # side effect free functions
 
 def Intersection(Set1, Set2):
     Result = {}
     for Elt in Set1.keys():
         if Set2.has_key(Elt):
-           Result[Elt] = 1
+            Result[Elt] = 1
     return Result
 
+
 def Difference(Set1, Set2):
     Result = {}
     for Elt in Set1.keys():
         if not Set2.has_key(Elt):
-           Result[Elt] = 1
+            Result[Elt] = 1
     return Result
 
+
 def Union(Set1,Set2):
     Result = {}
     Augment(Result,Set1)
     Augment(Result,Set2)
     return Result
 
+
 def Subset(Set1,Set2):
     Result = 1
     for Elt in Set1.keys():
         if not Set2.has_key(Elt):
-           Result = 0
-           return Result # nonlocal
+            Result = 0
+            return Result # nonlocal
     return Result
 
+
 def Same(Set1,Set2):
     if Subset(Set1,Set2) and Subset(Set2,Set1):
-       return 1
+        return 1
     else:
-       return 0
+        return 0
+
 
 # directed graphs as Dictionaries of Sets
 #   also only works for immutable nodes
@@ -109,6 +118,7 @@
         result = result + ThesePairs
     return result
 
+
 def AddArc(Graph, Source, Dest):
     change = 0
     if Graph.has_key(Source):
@@ -121,21 +131,25 @@
        change = 1
     return change
 
+
 def Neighbors(Graph,Source):
     if Graph.has_key(Source):
-       return get_elts(Graph[Source])
+        return get_elts(Graph[Source])
     else:
-       return []
+        return []
+
 
 def HasArc(Graph, Source, Dest):
     result = 0
     if Graph.has_key(Source) and member(Dest, Graph[Source]):
-       result = 1
+        result = 1
     return result
 
+
 def Sources(Graph):
     return Graph.keys()
 
+
 # when G1, G2 and G3 are different graphs this results in
 #   G1 = G1 U ( G2 o G3 )
 # If G1 is identical to one of G2,G3 the result is somewhat
@@ -151,20 +165,22 @@
         for Middle in Neighbors(G2,G2Source):
             for G3Dest in Neighbors(G3, Middle):
                 if not HasArc(G1, G2Source, G3Dest):
-                   change = 1
-                   AddArc(G1, G2Source, G3Dest)
+                    change = 1
+                    AddArc(G1, G2Source, G3Dest)
     return change
 
+
 # in place transitive closure of a graph
 def TransClose(Graph):
     change = AddComposition(Graph, Graph, Graph)
     somechange = change
     while change:
-       change = AddComposition(Graph, Graph, Graph)
-       if not somechange:
-          somechange = change
+        change = AddComposition(Graph, Graph, Graph)
+        if not somechange:
+            somechange = change
     return somechange
 
+
 ########### SQueue stuff
 #
 #  A GrabBag should be used to hold objects temporarily for future
@@ -183,10 +199,12 @@
     B[NEW] = START
     return B
 
+
 def BGempty(B):
     # other ops must maintain this: old == new iff empty
     return B[OLD] == B[NEW]
 
+
 # may return new, larger structure
 # must be used with assignment...  B = BGadd(e,B)
 def BGadd(elt, B):
@@ -194,43 +212,51 @@
     oldlen = len(B)
     # look for an available position
     while B[cursor] != None:
-       cursor = cursor+1
-       if cursor >= oldlen: cursor = START
-       if cursor == B[NEW]: #back to beginning
-          break
+        cursor = cursor+1
+        if cursor >= oldlen: cursor = START
+        if cursor == B[NEW]: #back to beginning
+            break
+
     # resize if wrapped
     if B[cursor] != None:
-       B = B + [None] * oldlen
-       cursor = oldlen
-       B[OLD] = START
+        B = B + [None] * oldlen
+        cursor = oldlen
+        B[OLD] = START
+
     if B[cursor] != None:
-       raise IndexError, "can't insert?"
+        raise IndexError, "can't insert?"
+
     # add the elt
     B[cursor] = (elt,)
     B[NEW] = cursor
+
     # B nonempty so OLD and NEW should differ.
     if B[OLD] == cursor:
-       B[NEW] = cursor + 1
-       if B[NEW]<=len(B): B[NEW] = START
+        B[NEW] = cursor + 1
+        if B[NEW]<=len(B): B[NEW] = START
     return B
 
+
 def BGgetdel(B):
     # find something to delete:
     cursor = B[OLD]
     blen = len(B)
     while B[cursor]==None:
-       cursor = cursor+1
-       if cursor>=blen: cursor = START
-       if cursor == B[OLD]: break # wrapped
+        cursor = cursor+1
+        if cursor>=blen: cursor = START
+        if cursor == B[OLD]: break # wrapped
+
     if B[cursor] == None:
-       raise IndexError, "delete from empty grabbag(?)"
+        raise IndexError, "delete from empty grabbag(?)"
     # test to see if bag is empty (position cursor2 at nonempty slot)
     cursor2 = cursor+1
     if cursor2>=blen: cursor2 = START
+
     while B[cursor2]==None:
-       cursor2 = cursor2+1
-       if cursor2>=blen: cursor2 = START
-       # since B[cursor] not yet deleted while will terminate
+        cursor2 = cursor2+1
+        if cursor2>=blen: cursor2 = START
+        # since B[cursor] not yet deleted while will terminate
+
     # get and delete the elt
     (result,) = B[cursor]
     B[cursor] = None
@@ -238,6 +264,7 @@
     B[OLD] = cursor2
     if B[NEW] == cursor2: B[NEW] = cursor
     return result
+
 
 def BGtest(n):
     B = NewBG()