[Checkins] SVN: z3c.pt/trunk/ Added preliminary support for falling back to xml.etree.
Malthe Borch
mborch at gmail.com
Tue Aug 12 08:47:12 EDT 2008
Log message for revision 89721:
Added preliminary support for falling back to xml.etree.
Changed:
U z3c.pt/trunk/CHANGES.txt
A z3c.pt/trunk/src/z3c/pt/etree.py
U z3c.pt/trunk/src/z3c/pt/translation.py
-=-
Modified: z3c.pt/trunk/CHANGES.txt
===================================================================
--- z3c.pt/trunk/CHANGES.txt 2008-08-12 12:34:44 UTC (rev 89720)
+++ z3c.pt/trunk/CHANGES.txt 2008-08-12 12:47:11 UTC (rev 89721)
@@ -4,6 +4,9 @@
Version 1.0dev
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- Preliminary support for using ``xml.etree`` as fallback for
+ ``lxml.etree``. [malthe]
+
- Fixed edge case bug where inserting both a numeric entity and a literal
set of unicode bytes into the same document would cause a
UnicodeDecodeError. See also
Added: z3c.pt/trunk/src/z3c/pt/etree.py
===================================================================
--- z3c.pt/trunk/src/z3c/pt/etree.py (rev 0)
+++ z3c.pt/trunk/src/z3c/pt/etree.py 2008-08-12 12:47:11 UTC (rev 89721)
@@ -0,0 +1,132 @@
+import htmlentitydefs
+import config
+import utils
+from StringIO import StringIO
+
+try:
+ import lxml.etree
+
+ lookup = lxml.etree.ElementNamespaceClassLookup()
+ parser = lxml.etree.XMLParser(resolve_entities=False, strip_cdata=False)
+ parser.setElementClassLookup(lookup)
+
+ # lxml 1.3-compatibility
+ try:
+ ns_lookup = lookup.get_namespace
+ except AttributeError:
+ ns_lookup = lxml.etree.Namespace
+
+ class ElementBase(lxml.etree.ElementBase):
+ def tostring(self):
+ return lxml.etree.tostring(self)
+
+ element_factory = parser.makeelement
+
+ def parse(body):
+ tree = lxml.etree.parse(StringIO(body), parser)
+ root = tree.getroot()
+ return root, tree.docinfo.doctype
+
+except ImportError:
+ import xml.etree.ElementTree
+ import html5lib.treebuilders.etree
+
+ class ElementBase(object, xml.etree.ElementTree._ElementInterface):
+ _parent = None
+
+ def __new__(cls, tag, attrs=None):
+ return element_factory(tag, attrs)
+
+ def __init__(self, tag, attrs=None):
+ if attrs is None:
+ attrs = {}
+
+ xml.etree.ElementTree._ElementInterface.__init__(self, tag, attrs)
+
+ def getparent(self):
+ return self._parent
+
+ def insert(self, position, element):
+ element._parent = self
+ xml.etree.ElementTree._ElementInterface.insert(self, position, element)
+
+ def tostring(self):
+ return xml.etree.ElementTree.tostring(self)
+
+ def xpath(self, expression, namespaces={}):
+ return []
+
+ @property
+ def nsmap(self):
+ return {None: config.XML_NS}
+
+ namespaces = {}
+ def ns_lookup(ns):
+ return namespaces.setdefault(ns, {})
+
+ class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
+ def start(self, tag, attrs):
+ if len(self._elem):
+ parent = self._elem[-1]
+ else:
+ parent = None
+ elem = xml.etree.ElementTree.TreeBuilder.start(self, tag, attrs)
+ elem._parent = parent
+
+ class XMLParser(xml.etree.ElementTree.XMLParser):
+ def __init__(self, **kwargs):
+ xml.etree.ElementTree.XMLParser.__init__(self, **kwargs)
+
+ # this makes up for ET's lack of support for comments and
+ # processing instructions
+ self._parser.CommentHandler = self.handle_comment
+ self._parser.ProcessingInstructionHandler = self.handle_pi
+ #self._target.start("document", {})
+
+ def doctype(self, name, pubid, system):
+ self.doctype = u'<!DOCTYPE %(name)s PUBLIC "%(pubid)s" "%(system)s">' % \
+ dict(name=name, pubid=pubid, system=system)
+
+ def handle_comment(self, data):
+ name = utils.tal_attr('comment')
+ self._target.start(name, {})
+ self._target.data("<!-- %s -->" % data)
+ self._target.end(name)
+
+ def handle_pi(self, target, data):
+ name = utils.tal_attr('pi')
+ self._target.start(name, {})
+ self._target.data("<?%(target)s %(data)s?>" % dict(target=target, data=data))
+ self._target.end(name)
+
+ def element_factory(tag, attrs=None, nsmap=None):
+ if attrs is None:
+ attrs = {}
+
+ if '{' in tag:
+ ns = tag[tag.find('{')+1:tag.find('}')]
+ ns_tag = tag[tag.find('}')+1:]
+ else:
+ ns = None
+ ns_tag = None
+
+ namespace = ns_lookup(ns)
+ factory = namespace.get(ns_tag) or namespace.get(None) or Element
+
+ element = object.__new__(factory)
+ element.__init__(tag, attrs)
+ return element
+ #return factory(tag, attrs)
+
+ def parse(body):
+ target = TreeBuilder(element_factory=element_factory)
+
+ parser = XMLParser(target=target)
+ parser.entity = dict([(name, "&%s;" % name) for name in htmlentitydefs.entitydefs])
+ parser.feed(body)
+ root = parser.close()
+
+ return root, parser.doctype
+
+ def CDATA(text):
+ return text
Modified: z3c.pt/trunk/src/z3c/pt/translation.py
===================================================================
--- z3c.pt/trunk/src/z3c/pt/translation.py 2008-08-12 12:34:44 UTC (rev 89720)
+++ z3c.pt/trunk/src/z3c/pt/translation.py 2008-08-12 12:47:11 UTC (rev 89721)
@@ -1,7 +1,6 @@
from zope import component
from StringIO import StringIO
-import lxml.etree
import generation
import clauses
@@ -11,15 +10,16 @@
import types
import utils
import config
+import etree
-class Element(lxml.etree.ElementBase):
+class Element(etree.ElementBase):
"""Base compiler element class.
This class represents a node in the template tree. To start
compilation at this node, use the ``start`` method, providing a
code stream object.
"""
-
+
metal_slot_prefix = '_fill'
def start(self, stream):
@@ -168,7 +168,7 @@
if m is None:
break
- t = parser.makeelement(utils.tal_attr('interpolation'))
+ t = etree.element_factory(utils.tal_attr('interpolation'))
t.attrib['replace'] = m.group('expression')
t.tail = self.text[m.end():]
self.insert(0, t)
@@ -185,7 +185,7 @@
if m is None:
break
- t = parser.makeelement(utils.tal_attr('interpolation'))
+ t = etree.element_factory(utils.tal_attr('interpolation'))
t.attrib['replace'] = m.group('expression')
t.tail = self.tail[m.end():]
parent = self.getparent()
@@ -209,7 +209,7 @@
def _serialize(self):
"""Serialize element into clause-statements."""
-
+
_ = []
# i18n domain
@@ -254,8 +254,11 @@
_.append(clauses.Repeat(variables[0], expression))
# tag tail (deferred)
- if self.tail and not self.metal_fillslot:
- _.append(clauses.Out(self.tail.encode('utf-8'), defer=True))
+ tail = self.tail
+ if tail and not self.metal_fillslot:
+ if isinstance(tail, unicode):
+ tail = tail.encode('utf-8')
+ _.append(clauses.Out(tail, defer=True))
# dynamic content and content translation
replace = self._replace
@@ -286,8 +289,11 @@
_.append(tag)
# tag text (if we're not replacing tag body)
- if self.text and not dynamic:
- _.append(clauses.Out(self.text.encode('utf-8')))
+ text = self.text
+ if text and not dynamic:
+ if isinstance(text, unicode):
+ text = text.encode('utf-8')
+ _.append(clauses.Out(text))
if replace and content:
raise ValueError("Can't use replace clause together with "
@@ -380,9 +386,7 @@
value = types.value("%s['%s']" % (mapping, name))
subclauses.append(clauses.Write(value))
else:
- subclauses.append(clauses.Out(
- lxml.etree.tostring(element)))
-
+ subclauses.append(clauses.Out(element.tostring()))
if subclauses:
_.append(clauses.Else(subclauses))
@@ -391,7 +395,7 @@
def _wrap_literal(self, element):
index = self.index(element)
- t = parser.makeelement(utils.tal_attr('literal'))
+ t = etree.element_factory(utils.tal_attr('literal'))
t.attrib['omit-tag'] = ''
t.tail = element.tail
t.text = unicode(element)
@@ -411,7 +415,7 @@
out.write("${%s}" % name)
out.write(element.tail)
else:
- out.write(lxml.etree.tostring(element))
+ out.write(element.tostring())
msgid = out.getvalue().strip()
msgid = msgid.replace(' ', ' ').replace('\n', '')
@@ -627,31 +631,21 @@
py_match = utils.attribute("path")
# set up namespaces for XML parsing
-lookup = lxml.etree.ElementNamespaceClassLookup()
-parser = lxml.etree.XMLParser(resolve_entities=False)
-parser.setElementClassLookup(lookup)
+etree.ns_lookup(config.XML_NS)[None] = Element
+etree.ns_lookup(config.TAL_NS)[None] = TALElement
+etree.ns_lookup(config.METAL_NS)[None] = METALElement
+etree.ns_lookup(config.PY_NS)["if"] = PyIfElement
+etree.ns_lookup(config.PY_NS)["for"] = PyForElement
+etree.ns_lookup(config.PY_NS)["def"] = PyDefElement
+etree.ns_lookup(config.PY_NS)["with"] = PyWithElement
+etree.ns_lookup(config.PY_NS)["match"] = PyMatchElement
-try:
- ns_lookup = lookup.get_namespace
-except AttributeError:
- ns_lookup = lxml.etree.Namespace
-
-ns_lookup(config.XML_NS)[None] = Element
-ns_lookup(config.TAL_NS)[None] = TALElement
-ns_lookup(config.METAL_NS)[None] = METALElement
-ns_lookup(config.PY_NS)["if"] = PyIfElement
-ns_lookup(config.PY_NS)["for"] = PyForElement
-ns_lookup(config.PY_NS)["def"] = PyDefElement
-ns_lookup(config.PY_NS)["with"] = PyWithElement
-ns_lookup(config.PY_NS)["match"] = PyMatchElement
-
def translate_xml(body, *args, **kwargs):
- tree = lxml.etree.parse(StringIO(body), parser)
- root = tree.getroot()
+ root, doctype = etree.parse(body)
+ return translate_etree(root, doctype=doctype, *args, **kwargs)
- return translate_etree(root, *args, **kwargs)
-
-def translate_etree(root, macro=None ,params=[], default_expression='python'):
+def translate_etree(root, macro=None, doctype=None,
+ params=[], default_expression='python'):
if None not in root.nsmap:
raise ValueError, "Must set default namespace."
@@ -681,9 +675,8 @@
stream = generator.stream
# output doctype if any
- tree = root.getroottree()
- if tree.docinfo.doctype:
- dt = (tree.docinfo.doctype +'\n').encode('utf-8')
+ if isinstance(doctype, (str, unicode)):
+ dt = (doctype +'\n').encode('utf-8')
doctype = clauses.Out(dt)
stream.scope.append(set())
stream.begin([doctype])
@@ -695,12 +688,13 @@
return generator
def translate_text(body, *args, **kwargs):
- xml = parser.makeelement(
+ root = etree.element_factory(
utils.xml_attr('text'), nsmap={None: config.XML_NS})
- xml.text = body
- xml.attrib[utils.tal_attr('omit-tag')] = ''
- return translate_etree(xml, *args, **kwargs)
+ root.text = body
+ root.attrib[utils.tal_attr('omit-tag')] = ''
+ return translate_etree(root, *args, **kwargs)
+
def _translate(value, mapping=None, default=None):
format = ("_translate(%s, domain=_domain, mapping=%s, context=_context, "
"target_language=_target_language, default=%s)")
More information about the Checkins
mailing list