[Checkins] SVN: z3c.pt/trunk/ Added preliminary support for falling back to xml.etree.

Malthe Borch mborch at gmail.com
Tue Aug 12 08:47:12 EDT 2008


Log message for revision 89721:
  Added preliminary support for falling back to xml.etree.

Changed:
  U   z3c.pt/trunk/CHANGES.txt
  A   z3c.pt/trunk/src/z3c/pt/etree.py
  U   z3c.pt/trunk/src/z3c/pt/translation.py

-=-
Modified: z3c.pt/trunk/CHANGES.txt
===================================================================
--- z3c.pt/trunk/CHANGES.txt	2008-08-12 12:34:44 UTC (rev 89720)
+++ z3c.pt/trunk/CHANGES.txt	2008-08-12 12:47:11 UTC (rev 89721)
@@ -4,6 +4,9 @@
 Version 1.0dev
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+- Preliminary support for using ``xml.etree`` as fallback for
+  ``lxml.etree``. [malthe]
+
 - Fixed edge case bug where inserting both a numeric entity and a literal 
   set of unicode bytes into the same document would cause a
   UnicodeDecodeError. See also 

Added: z3c.pt/trunk/src/z3c/pt/etree.py
===================================================================
--- z3c.pt/trunk/src/z3c/pt/etree.py	                        (rev 0)
+++ z3c.pt/trunk/src/z3c/pt/etree.py	2008-08-12 12:47:11 UTC (rev 89721)
@@ -0,0 +1,132 @@
+import htmlentitydefs
+import config
+import utils
+from StringIO import StringIO
+
+try:
+    import lxml.etree
+
+    lookup = lxml.etree.ElementNamespaceClassLookup()
+    parser = lxml.etree.XMLParser(resolve_entities=False, strip_cdata=False)
+    parser.setElementClassLookup(lookup)
+
+    # lxml 1.3-compatibility
+    try:
+        ns_lookup = lookup.get_namespace
+    except AttributeError:
+        ns_lookup = lxml.etree.Namespace
+
+    class ElementBase(lxml.etree.ElementBase):
+        def tostring(self):
+            return lxml.etree.tostring(self)
+
+    element_factory = parser.makeelement
+
+    def parse(body):
+        tree = lxml.etree.parse(StringIO(body), parser)
+        root = tree.getroot()
+        return root, tree.docinfo.doctype
+
+except ImportError:
+    import xml.etree.ElementTree
+    import html5lib.treebuilders.etree
+    
+    class ElementBase(object, xml.etree.ElementTree._ElementInterface):
+        _parent = None
+        
+        def __new__(cls, tag, attrs=None):
+            return element_factory(tag, attrs)
+
+        def __init__(self, tag, attrs=None):
+            if attrs is None:
+                attrs = {}
+            
+            xml.etree.ElementTree._ElementInterface.__init__(self, tag, attrs)
+            
+        def getparent(self):
+            return self._parent
+
+        def insert(self, position, element):
+            element._parent = self
+            xml.etree.ElementTree._ElementInterface.insert(self, position, element)
+
+        def tostring(self):
+            return xml.etree.ElementTree.tostring(self)
+
+        def xpath(self, expression, namespaces={}):
+            return []
+            
+        @property
+        def nsmap(self):
+            return {None: config.XML_NS}
+        
+    namespaces = {}
+    def ns_lookup(ns):
+        return namespaces.setdefault(ns, {})
+
+    class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
+        def start(self, tag, attrs):
+            if len(self._elem):
+                parent = self._elem[-1]
+            else:
+                parent = None
+            elem = xml.etree.ElementTree.TreeBuilder.start(self, tag, attrs)
+            elem._parent = parent
+
+    class XMLParser(xml.etree.ElementTree.XMLParser):
+        def __init__(self, **kwargs):
+            xml.etree.ElementTree.XMLParser.__init__(self, **kwargs)
+
+            # this makes up for ET's lack of support for comments and
+            # processing instructions
+            self._parser.CommentHandler = self.handle_comment
+            self._parser.ProcessingInstructionHandler = self.handle_pi
+            #self._target.start("document", {})
+       
+        def doctype(self, name, pubid, system):
+            self.doctype = u'<!DOCTYPE %(name)s PUBLIC "%(pubid)s" "%(system)s">' % \
+                           dict(name=name, pubid=pubid, system=system)
+
+        def handle_comment(self, data):
+            name = utils.tal_attr('comment')
+            self._target.start(name, {})
+            self._target.data("<!-- %s -->" % data)
+            self._target.end(name)
+
+        def handle_pi(self, target, data):
+            name = utils.tal_attr('pi')
+            self._target.start(name, {})
+            self._target.data("<?%(target)s %(data)s?>" % dict(target=target, data=data))
+            self._target.end(name)
+
+    def element_factory(tag, attrs=None, nsmap=None):
+        if attrs is None:
+            attrs = {}
+
+        if '{' in tag:
+            ns = tag[tag.find('{')+1:tag.find('}')]
+            ns_tag = tag[tag.find('}')+1:]
+        else:
+            ns = None
+            ns_tag = None
+
+        namespace = ns_lookup(ns)
+        factory = namespace.get(ns_tag) or namespace.get(None) or Element
+            
+        element = object.__new__(factory)
+        element.__init__(tag, attrs)
+        return element
+        #return factory(tag, attrs)
+
+    def parse(body):
+        target = TreeBuilder(element_factory=element_factory)
+
+        parser = XMLParser(target=target)
+        parser.entity = dict([(name, "&%s;" % name) for name in htmlentitydefs.entitydefs])
+        parser.feed(body)
+        root = parser.close()
+
+        return root, parser.doctype
+
+    def CDATA(text):
+        return text

Modified: z3c.pt/trunk/src/z3c/pt/translation.py
===================================================================
--- z3c.pt/trunk/src/z3c/pt/translation.py	2008-08-12 12:34:44 UTC (rev 89720)
+++ z3c.pt/trunk/src/z3c/pt/translation.py	2008-08-12 12:47:11 UTC (rev 89721)
@@ -1,7 +1,6 @@
 from zope import component
 
 from StringIO import StringIO
-import lxml.etree
 
 import generation
 import clauses
@@ -11,15 +10,16 @@
 import types
 import utils
 import config
+import etree
 
-class Element(lxml.etree.ElementBase):
+class Element(etree.ElementBase):
     """Base compiler element class.
 
     This class represents a node in the template tree. To start
     compilation at this node, use the ``start`` method, providing a
     code stream object.
     """
-    
+
     metal_slot_prefix = '_fill'
 
     def start(self, stream):
@@ -168,7 +168,7 @@
                 if m is None:
                     break
 
-                t = parser.makeelement(utils.tal_attr('interpolation'))
+                t = etree.element_factory(utils.tal_attr('interpolation'))
                 t.attrib['replace'] = m.group('expression')
                 t.tail = self.text[m.end():]
                 self.insert(0, t)
@@ -185,7 +185,7 @@
                 if m is None:
                     break
 
-                t = parser.makeelement(utils.tal_attr('interpolation'))
+                t = etree.element_factory(utils.tal_attr('interpolation'))
                 t.attrib['replace'] = m.group('expression')
                 t.tail = self.tail[m.end():]
                 parent = self.getparent()
@@ -209,7 +209,7 @@
                     
     def _serialize(self):
         """Serialize element into clause-statements."""
-        
+
         _ = []
 
         # i18n domain
@@ -254,8 +254,11 @@
             _.append(clauses.Repeat(variables[0], expression))
 
         # tag tail (deferred)
-        if self.tail and not self.metal_fillslot:
-            _.append(clauses.Out(self.tail.encode('utf-8'), defer=True))
+        tail = self.tail
+        if tail and not self.metal_fillslot:
+            if isinstance(tail, unicode):
+                tail = tail.encode('utf-8')
+            _.append(clauses.Out(tail, defer=True))
 
         # dynamic content and content translation
         replace = self._replace
@@ -286,8 +289,11 @@
                 _.append(tag)
 
         # tag text (if we're not replacing tag body)
-        if self.text and not dynamic:
-            _.append(clauses.Out(self.text.encode('utf-8')))
+        text = self.text
+        if text and not dynamic:
+            if isinstance(text, unicode):
+                text = text.encode('utf-8')
+            _.append(clauses.Out(text))
 
         if replace and content:
             raise ValueError("Can't use replace clause together with "
@@ -380,9 +386,7 @@
                         value = types.value("%s['%s']" % (mapping, name))
                         subclauses.append(clauses.Write(value))
                     else:
-                        subclauses.append(clauses.Out(
-                            lxml.etree.tostring(element)))
-
+                        subclauses.append(clauses.Out(element.tostring()))
                 if subclauses:
                     _.append(clauses.Else(subclauses))
 
@@ -391,7 +395,7 @@
     def _wrap_literal(self, element):
         index = self.index(element)
 
-        t = parser.makeelement(utils.tal_attr('literal'))
+        t = etree.element_factory(utils.tal_attr('literal'))
         t.attrib['omit-tag'] = ''
         t.tail = element.tail
         t.text = unicode(element)
@@ -411,7 +415,7 @@
                 out.write("${%s}" % name)
                 out.write(element.tail)
             else:
-                out.write(lxml.etree.tostring(element))
+                out.write(element.tostring())
 
         msgid = out.getvalue().strip()
         msgid = msgid.replace('  ', ' ').replace('\n', '')
@@ -627,31 +631,21 @@
     py_match = utils.attribute("path")
 
 # set up namespaces for XML parsing
-lookup = lxml.etree.ElementNamespaceClassLookup()
-parser = lxml.etree.XMLParser(resolve_entities=False)
-parser.setElementClassLookup(lookup)
+etree.ns_lookup(config.XML_NS)[None] = Element
+etree.ns_lookup(config.TAL_NS)[None] = TALElement
+etree.ns_lookup(config.METAL_NS)[None] = METALElement
+etree.ns_lookup(config.PY_NS)["if"] = PyIfElement
+etree.ns_lookup(config.PY_NS)["for"] = PyForElement
+etree.ns_lookup(config.PY_NS)["def"] = PyDefElement
+etree.ns_lookup(config.PY_NS)["with"] = PyWithElement
+etree.ns_lookup(config.PY_NS)["match"] = PyMatchElement
 
-try:
-    ns_lookup = lookup.get_namespace
-except AttributeError:
-    ns_lookup = lxml.etree.Namespace
-    
-ns_lookup(config.XML_NS)[None] = Element
-ns_lookup(config.TAL_NS)[None] = TALElement
-ns_lookup(config.METAL_NS)[None] = METALElement
-ns_lookup(config.PY_NS)["if"] = PyIfElement
-ns_lookup(config.PY_NS)["for"] = PyForElement
-ns_lookup(config.PY_NS)["def"] = PyDefElement
-ns_lookup(config.PY_NS)["with"] = PyWithElement
-ns_lookup(config.PY_NS)["match"] = PyMatchElement
-
 def translate_xml(body, *args, **kwargs):
-    tree = lxml.etree.parse(StringIO(body), parser)
-    root = tree.getroot()
+    root, doctype = etree.parse(body)
+    return translate_etree(root, doctype=doctype, *args, **kwargs)
 
-    return translate_etree(root, *args, **kwargs)
-
-def translate_etree(root, macro=None ,params=[], default_expression='python'):
+def translate_etree(root, macro=None, doctype=None,
+                    params=[], default_expression='python'):
     if None not in root.nsmap:
         raise ValueError, "Must set default namespace."
 
@@ -681,9 +675,8 @@
     stream = generator.stream
 
     # output doctype if any
-    tree = root.getroottree()
-    if tree.docinfo.doctype:
-        dt = (tree.docinfo.doctype +'\n').encode('utf-8')
+    if isinstance(doctype, (str, unicode)):
+        dt = (doctype +'\n').encode('utf-8')
         doctype = clauses.Out(dt)
         stream.scope.append(set())
         stream.begin([doctype])
@@ -695,12 +688,13 @@
     return generator
 
 def translate_text(body, *args, **kwargs):
-    xml = parser.makeelement(
+    root = etree.element_factory(
         utils.xml_attr('text'), nsmap={None: config.XML_NS})
-    xml.text = body
-    xml.attrib[utils.tal_attr('omit-tag')] = ''
-    return translate_etree(xml, *args, **kwargs)
     
+    root.text = body
+    root.attrib[utils.tal_attr('omit-tag')] = ''
+    return translate_etree(root, *args, **kwargs)
+    
 def _translate(value, mapping=None, default=None):
     format = ("_translate(%s, domain=_domain, mapping=%s, context=_context, "
               "target_language=_target_language, default=%s)")



More information about the Checkins mailing list