[ZPT] CVS: Packages/TAL - HTMLTALParser.py:1.19 README.txt:1.5 TALGenerator.py:1.22

guido@digicool.com guido@digicool.com
Wed, 21 Mar 2001 17:49:37 -0500 (EST)


Update of /cvs-repository/Packages/TAL
In directory korak:/tmp/cvs-serv18168

Modified Files:
	HTMLTALParser.py README.txt TALGenerator.py 
Log Message:
Pile of major changes -- the tests should all succeed again now:

- NestingError derives from HTMLParseError, and is hence simplified.

- Thread the input position through all the code generation routines;
  all compile-time exceptions now possess lineno and offset.

- Restructured the code that inserts implied end-tags, and made it
  generate output that is the same as the input more often.  This was
  the hardest to get right, and I expect to be working more on it.




--- Updated File HTMLTALParser.py in package Packages/TAL --
--- HTMLTALParser.py	2001/03/19 22:57:07	1.18
+++ HTMLTALParser.py	2001/03/21 22:49:37	1.19
@@ -91,7 +91,7 @@
 
 from TALGenerator import TALGenerator
 from TALDefs import ZOPE_METAL_NS, ZOPE_TAL_NS, METALError, TALError
-from HTMLParser import HTMLParser
+from HTMLParser import HTMLParser, HTMLParseError
 
 BOOLEAN_HTML_ATTRS = [
     # List of Boolean attributes in HTML that may be given in
@@ -135,20 +135,14 @@
                                + BLOCK_CLOSING_TAG_MAP.keys())
 
 
-class NestingError(Exception):
+class NestingError(HTMLParseError):
     """Exception raised when elements aren't properly nested."""
 
-    def __init__(self, tag, lineno, offset):
+    def __init__(self, tag, position=(None, None)):
         self.tag = tag
-        self.lineno = lineno
-        self.offset = offset
+        HTMLParseError.__init__(self, "unmatched </%s>" % tag, position)
 
-    def __str__(self):
-        s = "line %d, offset %d: unmatched </%s>" % (
-            self.lineno, self.offset, self.tag)
-        return s
 
-
 class HTMLTALParser(HTMLParser):
 
     # External API
@@ -156,7 +150,7 @@
     def __init__(self, gen=None):
         HTMLParser.__init__(self)
         if gen is None:
-            gen = TALGenerator()
+            gen = TALGenerator(xml=0)
         self.gen = gen
         self.tagstack = []
         self.nsstack = []
@@ -172,7 +166,7 @@
         self.feed(data)
         self.close()
         while self.tagstack:
-            self.finish_endtag(self.tagstack[-1])
+            self.implied_endtag(self.tagstack[-1], 2)
         assert self.nsstack == [], self.nsstack
         assert self.nsdict == {}, self.nsdict
 
@@ -182,12 +176,43 @@
     # Overriding HTMLParser methods
 
     def finish_starttag(self, tag, attrs):
+        self.close_para_tags(tag)
+        self.tagstack.append(tag)
+        self.scan_xmlns(attrs)
+        attrlist, taldict, metaldict = self.extract_attrs(attrs)
+        self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
+                                  self.getpos())
+        if tag in EMPTY_HTML_TAGS:
+            self.implied_endtag(tag, -1)
+
+    def finish_startendtag(self, tag, attrs):
+        self.close_para_tags(tag)
         self.scan_xmlns(attrs)
+        attrlist, taldict, metaldict = self.extract_attrs(attrs)
+        if taldict.get("replace") or taldict.get("content"):
+            self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
+                                      self.getpos())
+            self.gen.emitEndElement(tag)
+        else:
+            self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
+                                      self.getpos(), isend=1)
+        self.pop_xmlns()
+
+    def finish_endtag(self, tag):
         if tag in EMPTY_HTML_TAGS:
-            self.pop_xmlns()
-        elif BLOCK_CLOSING_TAG_MAP.has_key(tag):
+            # </img> etc. in the source is an error
+            raise NestingError(tag, self.getpos())
+        self.close_enclosed_tags(tag)
+        self.gen.emitEndElement(tag)
+        self.pop_xmlns()
+        self.tagstack.pop()
+
+    def close_para_tags(self, tag):
+        if tag in EMPTY_HTML_TAGS:
+            return
+        close_to = -1
+        if BLOCK_CLOSING_TAG_MAP.has_key(tag):
             blocks_to_close = BLOCK_CLOSING_TAG_MAP[tag]
-            close_to = -1
             for i in range(len(self.tagstack)):
                 t = self.tagstack[i]
                 if t in blocks_to_close:
@@ -195,51 +220,39 @@
                         close_to = i
                 elif t in BLOCK_LEVEL_HTML_TAGS:
                     close_to = -1
-            self.close_to_level(close_to)
-            self.tagstack.append(tag)
         elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS:
-            close_to = -1
             for i in range(len(self.tagstack)):
                 if self.tagstack[i] in BLOCK_LEVEL_HTML_TAGS:
                     close_to = -1
                 elif self.tagstack[i] in PARA_LEVEL_HTML_TAGS:
                     if close_to == -1:
                         close_to = i
-            self.close_to_level(close_to)
-            self.tagstack.append(tag)
-        else:
-            self.tagstack.append(tag)
-        attrlist, taldict, metaldict = self.extract_attrs(attrs)
-        self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
-                                  self.getpos())
+        if close_to >= 0:
+            while len(self.tagstack) > close_to:
+                self.implied_endtag(self.tagstack[-1], 1)
 
-    def finish_endtag(self, tag, implied=0):
-        if tag in EMPTY_HTML_TAGS:
-            return
+    def close_enclosed_tags(self, tag):
         if tag not in self.tagstack:
-            lineno, offset = self.getpos()
-            raise NestingError(tag, lineno, offset)
-        while self.tagstack[-1] != tag:
-            self.finish_endtag(self.tagstack[-1], implied=1)
+            raise NestingError(tag, self.getpos())
+        while tag != self.tagstack[-1]:
+            self.implied_endtag(self.tagstack[-1], 1)
+        assert self.tagstack[-1] == tag
+
+    def implied_endtag(self, tag, implied):
+        assert tag == self.tagstack[-1]
+        assert implied in (-1, 1, 2)
+        if implied > 0:
+            if tag in TIGHTEN_IMPLICIT_CLOSE_TAGS:
+                # Pick out trailing whitespace from the program, and
+                # insert the close tag before the whitespace.
+                white = self.gen.unEmitWhitespace()
+                self.gen.emitEndElement(tag)
+                if white:
+                    self.gen.emitRawText(white)
+            else:
+                self.gen.emitEndElement(tag)
         self.tagstack.pop()
         self.pop_xmlns()
-        if implied \
-           and tag in TIGHTEN_IMPLICIT_CLOSE_TAGS \
-           and self.gen.program \
-           and self.gen.program[-1][0] == "rawtext":
-            # Pick out trailing whitespace from the last instruction,
-            # if it was a "rawtext" instruction, and insert the close
-            # tag before the whitespace.
-            data = self.gen.program.pop()[1]
-            prefix = string.rstrip(data)
-            white = data[len(prefix):]
-            if data:
-                self.gen.emitRawText(prefix)
-            self.gen.emitEndElement(tag)
-            if white:
-                self.gen.emitRawText(white)
-        else:
-            self.gen.emitEndElement(tag)
 
     def handle_charref(self, name):
         self.gen.emitRawText("&#%s;" % name)
@@ -301,10 +314,3 @@
                     taldict[suffix] = value
             attrlist.append(item)
         return attrlist, taldict, metaldict
-
-    def close_to_level(self, close_to):
-        if close_to > -1:
-            closing = self.tagstack[close_to:]
-            closing.reverse()
-            for t in closing:
-                self.finish_endtag(t, implied=1)

--- Updated File README.txt in package Packages/TAL --
--- README.txt	2001/03/20 23:05:12	1.4
+++ README.txt	2001/03/21 22:49:37	1.5
@@ -80,19 +80,11 @@
 
 - Bring DummyEngine.py up to specs.
 
-- Disallow TAL on start-tags whose end-tag is implied.
-
-- The call to emitSubstitution() in emitEndElement() for "replace"
-  doesn't pass in anything for attrDict.
-
 - Finish implementing insertStructure(): attribute replacement isn't
   implemented yet.
 
 - TALInterpreter currently always uses an XML parser to parse inserted
   structure; it should use a parser appropriate to the mode.
-
-- Incorporate line number and offset information into remaining
-  compile-time exceptions.
 
 - HTMLTALParser.py and TALParser.py are silly names.  Should be
   HTMLTALCompiler.py and XMLTALCompiler.py (or maybe shortened,

--- Updated File TALGenerator.py in package Packages/TAL --
--- TALGenerator.py	2001/03/17 04:06:53	1.21
+++ TALGenerator.py	2001/03/21 22:49:37	1.22
@@ -99,7 +99,7 @@
 
 class TALGenerator:
 
-    def __init__(self, expressionCompiler=None):
+    def __init__(self, expressionCompiler=None, xml=1):
         if not expressionCompiler:
             expressionCompiler = DummyCompiler()
         self.expressionCompiler = expressionCompiler
@@ -109,6 +109,7 @@
         self.macros = {}
         self.slots = {}
         self.slotStack = []
+        self.xml = xml
 
     def getCode(self):
         return self.optimize(self.program), self.macros
@@ -186,11 +187,15 @@
     def emit(self, *instruction):
         self.program.append(instruction)
 
-    def emitStartTag(self, name, attrlist):
-        self.program.append(("startTag", name, attrlist))
+    def emitStartTag(self, name, attrlist, isend=0):
+        if isend:
+            opcode = "startEndTag"
+        else:
+            opcode = "startTag"
+        self.program.append((opcode, name, attrlist))
 
     def emitEndTag(self, name):
-        if self.program and self.program[-1][0] == "startTag":
+        if self.xml and self.program and self.program[-1][0] == "startTag":
             # Minimize empty element
             self.program[-1] = ("startEndTag",) + self.program[-1][1:]
         else:
@@ -207,8 +212,7 @@
             m = re.match(
                 r"(?s)\s*(?:(global|local)\s+)?(%s)\s+(.*)\Z" % NAME_RE, part)
             if not m:
-                raise TALError("invalid define syntax: " + `part`,
-                               position)
+                raise TALError("invalid define syntax: " + `part`, position)
             scope, name, expr = m.group(1, 2, 3)
             scope = scope or "local"
             cexpr = self.compileExpression(expr)
@@ -222,19 +226,19 @@
         program = self.popProgram()
         self.emit("condition", cexpr, program)
 
-    def emitRepeat(self, arg):
+    def emitRepeat(self, arg, position=(None, None)):
         m = re.match("(?s)\s*(%s)\s+(.*)\Z" % NAME_RE, arg)
         if not m:
-            raise TALError("invalid repeat syntax: " + `repeat`)
+            raise TALError("invalid repeat syntax: " + `repeat`, position)
         name, expr = m.group(1, 2)
         cexpr = self.compileExpression(expr)
         program = self.popProgram()
         self.emit("loop", name, cexpr, program)
 
-    def emitSubstitution(self, arg, attrDict={}):
+    def emitSubstitution(self, arg, attrDict={}, position=(None, None)):
         key, expr = parseSubstitution(arg)
         if not key:
-            raise TALError("Bad syntax in insert/replace: " + `arg`)
+            raise TALError("Bad syntax in content/replace: " + `arg`, position)
         cexpr = self.compileExpression(expr)
         program = self.popProgram()
         if key == "text":
@@ -243,10 +247,11 @@
             assert key == "structure"
             self.emit("insertStructure", cexpr, attrDict, program)
 
-    def emitDefineMacro(self, macroName):
+    def emitDefineMacro(self, macroName, position=(None, None)):
         program = self.popProgram()
         if self.macros.has_key(macroName):
-            raise METALError("duplicate macro definition: %s" % macroName)
+            raise METALError("duplicate macro definition: %s" % macroName,
+                             position)
         self.macros[macroName] = program
         self.emit("defineMacro", macroName, program)
 
@@ -259,13 +264,36 @@
         program = self.popProgram()
         self.emit("defineSlot", slotName, program)
 
-    def emitFillSlot(self, slotName):
+    def emitFillSlot(self, slotName, position=(None, None)):
         program = self.popProgram()
         if self.slots.has_key(slotName):
-            raise METALError("duplicate slot definition: %s" % slotName)
+            raise METALError("duplicate slot definition: %s" % slotName,
+                             position)
         self.slots[slotName] = program
         self.emit("fillSlot", slotName, program)
 
+    def unEmitWhitespace(self):
+        collect = []
+        i = len(self.program) - 1
+        while i >= 0:
+            item = self.program[i]
+            if item[0] != "rawtext":
+                break
+            text = item[1]
+            if not re.match(r"\A\s*\Z", text):
+                break
+            collect.append(text)
+            i = i-1
+        del self.program[i+1:]
+        if i >= 0 and self.program[i][0] == "rawtext":
+            text = self.program[i][1]
+            m = re.search(r"\s+\Z", text)
+            if m:
+                self.program[i] = ("rawtext", text[:m.start()])
+                collect.append(m.group())
+        collect.reverse()
+        return string.join(collect, "")
+
     def unEmitNewlineWhitespace(self):
         collect = []
         i = len(self.program)
@@ -306,7 +334,7 @@
         return newlist
 
     def emitStartElement(self, name, attrlist, taldict, metaldict,
-                         position=(None, None)):
+                         position=(None, None), isend=0):
         for key in taldict.keys():
             if key not in KNOWN_TAL_ATTRIBUTES:
                 raise TALError("bad TAL attribute: " + `key`, position)
@@ -380,46 +408,56 @@
         if replace:
             todo["repldict"] = repldict
             repldict = {}
-        self.emitStartTag(name, self.replaceAttrs(attrlist, repldict))
+        self.emitStartTag(name, self.replaceAttrs(attrlist, repldict), isend)
         if content:
             self.pushProgram()
+        if todo and position != (None, None):
+            todo["position"] = position
         self.todoPush(todo)
+        if isend:
+            self.emitEndElement(name, isend)
 
-    def emitEndElement(self, name):
+    def emitEndElement(self, name, isend=0):
         todo = self.todoPop()
         if not todo:
             # Shortcut
-            self.emitEndTag(name)
+            if not isend:
+                self.emitEndTag(name)
             return
+
+        position = todo.get("position", (None, None))
+        defineMacro = todo.get("defineMacro")
+        useMacro = todo.get("useMacro")
+        defineSlot = todo.get("defineSlot")
+        fillSlot = todo.get("fillSlot")
         content = todo.get("content")
-        if content:
-            self.emitSubstitution(content)
-        self.emitEndTag(name)
         repeat = todo.get("repeat")
+        replace = todo.get("replace")
+        condition = todo.get("condition")
+        define = todo.get("define")
+        repldict = todo.get("repldict", {})
+
+        if content:
+            self.emitSubstitution(content, {}, position)
+        if not isend:
+            self.emitEndTag(name)
         if repeat:
-            self.emitRepeat(repeat)
+            self.emitRepeat(repeat, position)
             self.emit("endScope")
-        replace = todo.get("replace")
         if replace:
-            repldict = todo.get("repldict", {})
-            self.emitSubstitution(replace, repldict)
-        condition = todo.get("condition")
+            self.emitSubstitution(replace, repldict, position)
         if condition:
             self.emitCondition(condition)
-        if todo.get("define"):
+        if define:
             self.emit("endScope")
-        defineMacro = todo.get("defineMacro")
-        useMacro = todo.get("useMacro")
-        defineSlot = todo.get("defineSlot")
-        fillSlot = todo.get("fillSlot")
         if defineMacro:
-            self.emitDefineMacro(defineMacro)
+            self.emitDefineMacro(defineMacro, position)
         if useMacro:
             self.emitUseMacro(useMacro)
         if defineSlot:
             self.emitDefineSlot(defineSlot)
         if fillSlot:
-            self.emitFillSlot(fillSlot)
+            self.emitFillSlot(fillSlot, position)
 
 def test():
     t = TALGenerator()