[Checkins] SVN: Sandbox/malthe/chameleon.core/ Changed parser to xml.parsers.expat; this yields greater control and flexibility and provides much-needed separation between the XML syntax tree and document parsing. A number of issues were fixed as a result of this. Note that this changeset also removes the ability of operation without lxml.
Malthe Borch
mborch at gmail.com
Tue Dec 2 08:34:30 EST 2008
Log message for revision 93536:
Changed parser to xml.parsers.expat; this yields greater control and flexibility and provides much-needed separation between the XML syntax tree and document parsing. A number of issues were fixed as a result of this. Note that this changeset also removes the ability of operation without lxml.
Changed:
U Sandbox/malthe/chameleon.core/CHANGES.txt
U Sandbox/malthe/chameleon.core/src/chameleon/core/config.py
U Sandbox/malthe/chameleon.core/src/chameleon/core/etree.py
U Sandbox/malthe/chameleon.core/src/chameleon/core/template.py
U Sandbox/malthe/chameleon.core/src/chameleon/core/template.txt
U Sandbox/malthe/chameleon.core/src/chameleon/core/testing.py
U Sandbox/malthe/chameleon.core/src/chameleon/core/translation.py
U Sandbox/malthe/chameleon.core/src/chameleon/core/translation.txt
U Sandbox/malthe/chameleon.core/src/chameleon/core/utils.py
-=-
Modified: Sandbox/malthe/chameleon.core/CHANGES.txt
===================================================================
--- Sandbox/malthe/chameleon.core/CHANGES.txt 2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/CHANGES.txt 2008-12-02 13:34:30 UTC (rev 93536)
@@ -4,6 +4,16 @@
HEAD
~~~~
+- Fixed root cause of issue with self-closing tags; an empty element
+ text is now correctly differentiated from a self-closed
+ tag (which does not have this attribute set). [malthe]
+
+- Removed support for compilation without ``lxml``. [malthe]
+
+- Use ``xml.parsers.expat`` to parse template document instead of
+ using ``lxml``. This gives much greater control over edge-cases and
+ allows us to lose many workarounds. [malthe]
+
- Do not use XPath-expressions during compilation if lxml is not
available. [malthe]
Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/config.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/config.py 2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/config.py 2008-12-02 13:34:30 UTC (rev 93536)
@@ -28,8 +28,17 @@
XI_NS = "http://www.w3.org/2001/XInclude"
I18N_NS = "http://xml.zope.org/namespaces/i18n"
PY_NS = "http://genshi.edgewall.org/"
-NS_MAP = dict(py=PY_NS, tal=TAL_NS, metal=METAL_NS)
+# default prefix namespace mapping
+DEFAULT_NS_MAP = {
+ None: XHTML_NS,
+ 'meta': META_NS,
+ 'py': PY_NS,
+ 'tal': TAL_NS,
+ 'metal': METAL_NS,
+ 'i18n': I18N_NS,
+ 'xi': XI_NS}
+
# the symbols table below is used internally be the compiler
class SYMBOLS(object):
# internal use only
Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/etree.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/etree.py 2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/etree.py 2008-12-02 13:34:30 UTC (rev 93536)
@@ -2,12 +2,20 @@
import config
import utils
import base64
+
+import lxml.etree
import xml.parsers.expat
from cPickle import dumps, loads
+from StringIO import StringIO
+# this exception is imported for historic reasons
+XMLSyntaxError = lxml.etree.XMLSyntaxError
def import_elementtree():
+ """The ElementTree is used to validate output in debug-mode. We
+ attempt to load the library from several locations."""
+
try:
import xml.etree.ElementTree as ET
except:
@@ -30,6 +38,149 @@
except xml.parsers.expat.ExpatError:
raise ValidationError(string)
+class ExpatParser(object):
+ """XML tree parser using the ``xml.parsers.expat`` stream
+ parser. This parser serve to accept template input which lacks a
+ proper prefix namespace mapping or entity definitions. It also
+ works around an issue where the expat parser incorrectly parses an
+ element with trivial body text as self-closing."""
+
+ root = None
+
+ # doctype
+ doctype = None
+
+ # xml-declaration
+ xml_version = None
+ encoding = None
+ standalone = None
+
+ def __init__(self, parser, body, expat):
+ self.parser = parser
+ self.body = body
+ self.expat = expat
+
+ def StartElementHandler(self, tag, attrs):
+ # update prefix to namespace mapping
+ if self.root is None:
+ nsmap = {}
+ else:
+ nsmap = self.root.nsmap.copy()
+
+ # process namespace declarations
+ for key, value in attrs.items():
+ if key.startswith('xmlns:'):
+ prefix, name = key.split(':')
+ nsmap[name] = value
+ del attrs[key]
+
+ for key, value in attrs.items():
+ try:
+ prefix, name = key.split(':')
+ except (ValueError, TypeError):
+ continue
+
+ del attrs[key]
+
+ try:
+ namespace = nsmap.get(prefix) or config.DEFAULT_NS_MAP[prefix]
+ except KeyError:
+ raise KeyError(
+ "Attribute prefix unknown: '%s'." % prefix)
+ attrs['{%s}%s' % (namespace, name)] = value
+
+ # process tag
+ try:
+ prefix, name = tag.split(':')
+ namespace = nsmap.get(prefix) or config.DEFAULT_NS_MAP[prefix]
+ tag = '{%s}%s' % (namespace, name)
+ except ValueError:
+ pass
+
+ # create element using parser
+ element = self.parser.makeelement(tag, attrs, nsmap=nsmap)
+
+ if self.root is None:
+ document = []
+ if self.xml_version:
+ document.append(
+ '<?xml version="%s" encoding="%s" standalone="%s" ?>' % (
+ self.xml_version, self.encoding, self.standalone))
+
+ if self.doctype:
+ document.append(self.doctype)
+
+ # render element
+ document.append(element.tostring())
+
+ # parse document
+ self.parser.feed("\n".join(document))
+ element = self.parser.close()
+
+ # set this element as tree root
+ self.root = element
+ else:
+ self.element.append(element)
+
+ # set as current element
+ self.element = element
+
+ def EndElementHandler(self, name):
+ if self.element.text is None and self.body[
+ self.expat.CurrentByteIndex-2] != '/':
+ self.element.text = ""
+ self.element = self.element.getparent()
+
+ def CharacterDataHandler(self, data):
+ if len(self.element) == 0:
+ current = self.element.text or ""
+ self.element.text = current + data
+ else:
+ current = self.element[-1].tail or ""
+ self.element[-1].tail = current + data
+
+ def ProcessingInstructionHandler(self, target, data):
+ self.element.append(
+ lxml.etree.PI(target, data))
+
+ def StartCdataSectionHandler(self):
+ element = self.parser.makeelement(
+ utils.xhtml_attr('cdata'))
+ element.meta_cdata = ""
+ self.element.append(element)
+ self.element = element
+
+ def EndCdataSectionHandler(self):
+ self.element = self.element.getparent()
+
+ def CommentHandler(self, text):
+ self.element.append(
+ lxml.etree.Comment(text))
+
+ def XmlDeclHandler(self, xml_version, encoding, standalone):
+ self.xml_version = xml_version
+ self.encoding = encoding
+
+ if standalone:
+ self.standalone = 'yes'
+ else:
+ self.standalone = 'no'
+
+ def ExternalEntityRefHandler(self, context, base, sysid, pubid):
+ parser = self.expat.ExternalEntityParserCreate(context)
+ parser.ProcessingInstructionHandler = self.ProcessingInstructionHandler
+ parser.ParseFile(StringIO(utils.entities))
+ return 1
+
+ def DefaultHandler(self, userdata):
+ if userdata.startswith('&'):
+ return self.CharacterDataHandler(userdata)
+
+ def StartDoctypeDeclHandler(self, *args):
+ doctype_name, sysid, pubid, has_internal_subset = args
+ self.doctype = '<!DOCTYPE %s PUBLIC "%s" "%s">' % (
+ doctype_name, pubid, sysid)
+
class ValidationError(ValueError):
def __str__(self):
value, = self.args
@@ -43,6 +194,9 @@
def parse(self, body):
return parse(body, self.element_mapping, fallback=self.fallback)
+ def serialize(self, tree):
+ return serialize(tree)
+
class Annotation(property):
def __init__(self, name, default=None):
property.__init__(self, self._get, self._set)
@@ -58,329 +212,58 @@
def _set(instance, element, value):
element.attrib[instance.name] = base64.encodestring(dumps(value))
-try:
- import lxml.etree
+def elements_with_attribute(element, ns, name, value=None):
+ if value is None:
+ expression = 'descendant-or-self::*[@prefix:%s] '\
+ '| descendant-or-self::prefix:*[@%s]' % (
+ name, name)
+ else:
+ expression = 'descendant-or-self::*[@prefix:%s="%s"] '\
+ '| descendant-or-self::prefix:*[@%s="%s"]' % (
+ name, value, name, value)
- XMLSyntaxError = lxml.etree.XMLSyntaxError
+ return element.xpath(
+ expression,
+ namespaces={'prefix': ns})
- def elements_with_attribute(element, ns, name, value=None):
- if value is None:
- expression = './/*[@prefix:%s] | .//prefix:*[@%s]' % (name, name)
- else:
- expression = './/*[@prefix:%s="%s"] | .//prefix:*[@%s="%s"]' % (
- name, value, name, value)
-
- return element.xpath(
- expression,
- namespaces={'prefix': ns})
+class ElementBase(lxml.etree.ElementBase):
+ def tostring(self):
+ return lxml.etree.tostring(self)
- class BufferIO(list):
- write = list.append
+def parse(body, element_mapping, fallback=None):
+ """Parse XML document using expat and build lxml tree."""
+
+ # set up namespace lookup class
+ lookup = lxml.etree.ElementNamespaceClassLookup(
+ fallback=lxml.etree.ElementDefaultClassLookup(fallback))
+ for key, mapping in element_mapping.items():
+ lookup.get_namespace(key).update(mapping)
- def __init__(self, value):
- self.append(value)
+ # set up lxml parser
+ parser = lxml.etree.XMLParser(resolve_entities=False, strip_cdata=False)
+ parser.setElementClassLookup(lookup)
- def tell(self):
- return 0
+ # set up expat parser
+ expat = xml.parsers.expat.ParserCreate(None)
+ expat.UseForeignDTD()
+ expat.SetParamEntityParsing(
+ xml.parsers.expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
- def getvalue(self):
- return ''.join(self)
-
- class ElementBase(lxml.etree.ElementBase):
- def tostring(self):
- return lxml.etree.tostring(self)
-
- def _convert_cdata_sections(self):
- start = '<![CDATA['
- end = ']]>'
-
- text = self._raw_text or ""
- tail = self._raw_tail or ""
-
- if start in text:
- before, rest = text.split(start, 1)
- cdata, after = rest.split(end, 1)
-
- element = self.makeelement(
- utils.xhtml_attr('cdata'))
- element.meta_cdata = ""
- element.text = cdata
- element.tail = after
-
- self.text = before
- self.insert(0, element)
- element._convert_cdata_sections()
-
- if start in tail:
- before, rest = tail.split(start, 1)
- cdata, after = rest.split(end, 1)
-
- element = self.makeelement(
- utils.xhtml_attr('cdata'))
- element.meta_cdata = ""
- self.addnext(element)
-
- element.text = cdata
- element.tail = after
- self.tail = before
- element._convert_cdata_sections()
-
- @property
- def _raw_text(self):
- """Return raw text.
-
- CDATA sections are returned in their original formatting;
- the routine relies on the fact that ``tostring`` will
- output CDATA sections even though they're not present in
- the .text-attribute.
- """
-
- if self.text in ("", None):
- return self.text
-
- elements = tuple(self)
- del self[:]
- xml = lxml.etree.tostring(self, encoding='utf-8', with_tail=False)
- self.extend(elements)
-
- element = self.makeelement(self.tag, nsmap=self.nsmap)
- for attr, value in self.items():
- element.attrib[attr] = value
-
- html = lxml.etree.tostring(element)
- tag = len(element.tag.split('}')[-1])+3
- text = xml[len(html)-tag:-tag]
-
- return text
-
- @property
- def _raw_tail(self):
- """Return raw tail.
-
- CDATA sections are returned in their original formatting;
- the routine relies on the fact that ``tostring`` will
- output CDATA sections even though they're not present in
- the .tail-attribute.
- """
-
- if self.tail in ("", None):
- return self.tail
-
- elements = tuple(self)
- del self[:]
-
- parent = self.getparent()
- if parent is None:
- return self.tail
-
- length = len(lxml.etree.tostring(self, encoding='utf-8', with_tail=False))
-
- # wrap element
- index = parent.index(self)
- element = self.makeelement(self.tag, nsmap=self.nsmap)
- element.append(self)
- xml = lxml.etree.tostring(element, encoding='utf-8', with_tail=False)
- self.extend(elements)
- parent.insert(index, self)
-
- ns = self.tag[self.tag.find('{')+1:self.tag.find('}')]
- for prefix, namespace in self.nsmap.items():
- if ns == namespace:
- if prefix is None:
- tag = len(self.tag) - len(ns)
- else:
- tag = len(self.tag) - len(ns) + len(prefix) + 1
- break
- else:
- raise ValueError(
- "Unable to determine tag length: %s." % self.tag)
-
- tail = xml[length+tag:-tag-1]
-
- return tail
-
- def convert_cdata_section(node):
- parent = node.getparent()
- if parent is not None:
- index = parent.index(node)
- element = node.makeelement(node.tag, nsmap=node.nsmap)
- element.append(node)
- xml = lxml.etree.tostring(element, encoding='utf-8', with_tail=False)
- parent.insert(index, node)
- else:
- xml = lxml.etree.tostring(node, encoding='utf-8', with_tail=False)
-
- if 'CDATA' in xml:
- node._convert_cdata_sections()
- for child in tuple(node):
- convert_cdata_section(child)
-
- def parse(body, element_mapping, fallback=None):
- lookup = lxml.etree.ElementNamespaceClassLookup(
- fallback=lxml.etree.ElementDefaultClassLookup(fallback))
- parser = lxml.etree.XMLParser(resolve_entities=False, strip_cdata=False)
- parser.setElementClassLookup(lookup)
-
- # lxml 1.3-compatibility
+ # attach expat parser methods
+ parser = ExpatParser(parser, body, expat)
+ for name in type(parser).__dict__.keys():
try:
- ns_lookup = lookup.get_namespace
+ setattr(expat, name, getattr(parser, name))
except AttributeError:
- ns_lookup = lxml.etree.Namespace
+ pass
- for key, mapping in element_mapping.items():
- ns_lookup(key).update(mapping)
+ # parse document body
+ expat.Parse(body, 1)
- tree = lxml.etree.parse(BufferIO(body), parser)
- root = tree.getroot()
+ # return document root tree
+ return parser.root.getroottree()
- convert_cdata_section(root)
-
- return root, tree.docinfo.doctype
-
-except ImportError:
- ET = import_elementtree()
-
- XMLSyntaxError = SyntaxError
-
- def elements_with_attribute(element, ns, name, value=None):
- attributes = utils.get_attributes_from_namespace(
- element, ns)
-
- if value is not None:
- if value in (
- attributes.get(name), attributes.get('{%s}%s' % (ns, name))):
- yield element
- elif 'name' in attributes or '{%s}%s' % (ns, name) in attributes:
- yield element
-
- for child in element:
- for match in elements_with_attribute(child, ns, name):
- yield match
-
- class ElementBase(object, ET._ElementInterface):
- _parent = None
-
- def __new__(cls, tag, attrs=None):
- return element_factory(tag, attrs)
-
- def __init__(self, tag, attrs=None):
- if attrs is None:
- attrs = {}
-
- ET._ElementInterface.__init__(self, tag, attrs)
-
- def getparent(self):
- return self._parent
-
- def getroottree(self):
- while self._parent is not None:
- self = self._parent
- class roottree(object):
- @classmethod
- def getroot(cls):
- return self
- return roottree
-
- def insert(self, position, element):
- element._parent = self
- ET._ElementInterface.insert(self, position, element)
-
- def tostring(self):
- return ET.tostring(self)
-
- def xpath(self, path, namespaces={}):
- raise NotImplementedError(
- "No XPath-engine available.")
-
- @property
- def nsmap(self):
- # TODO: Return correct namespace map
- return {None: config.XHTML_NS}
-
- @property
- def prefix(self):
- try:
- ns, prefix = self.tag.split('}')
- except:
- return None
-
- for prefix, namespace in self.nsmap.items():
- if namespace == ns:
- return prefix
-
- namespaces = {}
- def ns_lookup(ns):
- return namespaces.setdefault(ns, {})
-
- class TreeBuilder(ET.TreeBuilder):
- def start(self, tag, attrs):
- if len(self._elem):
- parent = self._elem[-1]
- else:
- parent = None
- elem = ET.TreeBuilder.start(self, tag, attrs)
- elem._parent = parent
- elem.makeelement = self._factory
-
- class XMLParser(ET.XMLParser):
- def __init__(self, **kwargs):
- ET.XMLParser.__init__(self, **kwargs)
-
- # this makes up for ET's lack of support for comments and
- # processing instructions
- self._parser.CommentHandler = self.handle_comment
- self._parser.ProcessingInstructionHandler = self.handle_pi
- self._parser.StartCdataSectionHandler = self.handle_cdata_start
- self._parser.EndCdataSectionHandler = self.handle_cdata_end
-
- def doctype(self, name, pubid, system):
- self.doctype = u'<!DOCTYPE %(name)s PUBLIC "%(pubid)s" "%(system)s">' % \
- dict(name=name, pubid=pubid, system=system)
-
- def handle_comment(self, data):
- name = utils.tal_attr('comment')
- self._target.start(name, {})
- self._target.data("<!-- %s -->" % data)
- self._target.end(name)
-
- def handle_pi(self, target, data):
- name = utils.tal_attr('pi')
- self._target.start(name, {})
- self._target.data("<?%(target)s %(data)s?>" % dict(target=target, data=data))
- self._target.end(name)
-
- def handle_cdata_start(self):
- self._target.start(utils.xhtml_attr('cdata'), {
- utils.tal_attr('cdata'): ''})
-
- def handle_cdata_end(self):
- self._target.end(utils.xhtml_attr('cdata'))
-
- def parse(body, element_mapping, fallback=None):
- def element_factory(tag, attrs=None, nsmap=None):
- if attrs is None:
- attrs = {}
-
- if '{' in tag:
- ns = tag[tag.find('{')+1:tag.find('}')]
- ns_tag = tag[tag.find('}')+1:]
- else:
- ns = None
- ns_tag = None
-
- namespace = element_mapping[ns]
- factory = namespace.get(ns_tag) or namespace.get(None) or fallback
-
- element = object.__new__(factory)
- element.__init__(tag, attrs)
-
- return element
-
- target = TreeBuilder(element_factory=element_factory)
- parser = XMLParser(target=target)
- parser.entity = dict([(name, "&%s;" % name) for name in htmlentitydefs.entitydefs])
- parser.feed(body)
-
- root = parser.close()
-
- return root, parser.doctype
+def serialize(tree):
+ """Serialize tree using lxml."""
+
+ return lxml.etree.tostring(tree, encoding='utf-8')
Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/template.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/template.py 2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/template.py 2008-12-02 13:34:30 UTC (rev 93536)
@@ -22,7 +22,6 @@
format = 'xml'
filename = '<string>'
- implicit_doctype = doctypes.xhtml
explicit_doctype = None
def __init__(self, body, parser, format=None, doctype=None, encoding=None):
@@ -52,7 +51,6 @@
def compiler(self):
return self.compilers[self.format](
self.body, self.parser,
- implicit_doctype=self.implicit_doctype,
explicit_doctype=self.explicit_doctype,
encoding=self.encoding)
Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/template.txt
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/template.txt 2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/template.txt 2008-12-02 13:34:30 UTC (rev 93536)
@@ -11,7 +11,7 @@
... <div xmlns="http://www.w3.org/1999/xhtml">
... Hello World!
... </div>""", mock_parser)()
- <div>
+ <div xmlns="http://www.w3.org/1999/xhtml">
Hello World!
</div>
@@ -23,7 +23,7 @@
>>> path = tests.__path__[0]
>>> t = TemplateFile(path+'/helloworld.pt', mock_parser)
>>> print t()
- <div>
+ <div xmlns="http://www.w3.org/1999/xhtml">
Hello World!
</div>
@@ -50,7 +50,7 @@
>>> template = TemplateFile(path+"/xinclude1.pt", mock_parser)
>>> print template()
- <div>
+ <div xmlns="http://www.w3.org/1999/xhtml">
<div>
<span>Hello, world!</span>
</div>
@@ -61,7 +61,7 @@
... template.registry[key] = loads(dumps(bct))
>>> print template()
- <div>
+ <div xmlns="http://www.w3.org/1999/xhtml">
<div>
<span>Hello, world!</span>
</div>
Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/testing.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/testing.py 2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/testing.py 2008-12-02 13:34:30 UTC (rev 93536)
@@ -25,7 +25,7 @@
def compile_xhtml(body, **kwargs):
compiler = TestCompiler(
- body, mock_parser, implicit_doctype=doctypes.xhtml)
+ body, mock_parser)
return compiler(parameters=sorted(kwargs.keys()))
def render_xhtml(body, **kwargs):
@@ -34,14 +34,14 @@
def render_text(body, **kwargs):
compiler = TestCompiler.from_text(
- body, mock_parser, implicit_doctype=doctypes.xhtml)
+ body, mock_parser)
template = compiler(parameters=sorted(kwargs.keys()))
template.compile()
return template.render(**kwargs)
def compile_template(parser, body, encoding=None, **kwargs):
compiler = TestCompiler(
- body, parser, encoding=encoding, implicit_doctype=doctypes.xhtml)
+ body, parser, encoding=encoding)
template = compiler(parameters=sorted(kwargs.keys()))
template.compile()
return template.render(**kwargs)
@@ -128,7 +128,7 @@
def render(self, **kwargs):
compiler = TestCompiler(
self.body, self.parser,
- implicit_doctype=doctypes.xhtml, explicit_doctype=self.doctype)
+ explicit_doctype=self.doctype)
template = compiler(parameters=sorted(kwargs.keys()))
return template.render(**kwargs)
Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/translation.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/translation.py 2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/translation.py 2008-12-02 13:34:30 UTC (rev 93536)
@@ -76,6 +76,10 @@
@property
def ns_attributes(self):
+ root = self.element.getroottree().getroot()
+ if root is None or utils.get_namespace(root) == config.META_NS:
+ return {}
+
prefix_omit = set()
namespaces = self.element.nsmap.values()
@@ -86,10 +90,12 @@
prefix_omit.add(prefix)
parent = parent.getparent()
- return dict(
+ attrs = dict(
((prefix and "xmlns:%s" % prefix or "xmlns", ns) for (prefix, ns) in \
self.element.nsmap.items() if \
ns not in self.ns_omit and prefix not in prefix_omit))
+
+ return attrs
@property
def static_attributes(self):
@@ -109,7 +115,7 @@
if self.element.prefix is None:
result.update(
utils.get_attributes_from_namespace(self.element, None))
-
+
return result
@property
@@ -350,9 +356,9 @@
# include
elif self.include:
# compute macro function arguments and create argument string
- arguments = ", ".join(
- ("%s=%s" % (arg, arg) for arg in \
- set(itertools.chain(*self.stream.scope))))
+ arguments = [
+ "%s=%s" % (arg, arg) for arg in \
+ set(itertools.chain(*self.stream.scope))]
# XInclude's are similar to METAL macros, except the macro
# is always defined as the entire template.
@@ -365,7 +371,7 @@
_.append(clauses.Write(
types.template(
"%%(xincludes)s.get(%%(include)s, %s).render_xinclude(%s)" % \
- (repr(self.format), arguments))))
+ (repr(self.format), ", ".join(arguments)))))
# use macro
elif self.use_macro:
@@ -604,70 +610,25 @@
doctype = None
xml_declaration = None
- implicit_doctype = ""
-
- def __init__(self, body, parser, implicit_doctype=None,
- explicit_doctype=None, encoding=None):
- # documents without a document type declaration are augmented
- # with default namespace declarations and proper XML entity
- # definitions; this represents a 'convention' over
- # 'configuration' approach to template documents
- no_doctype_declaration = '<!DOCTYPE' not in body
- no_xml_declaration = not body.startswith('<?xml ')
- require_wrapping = no_xml_declaration and no_doctype_declaration
+
+ def __init__(self, body, parser, explicit_doctype=None, encoding=None):
+ self.tree = parser.parse(body)
+ self.parser = parser
- if no_xml_declaration is False:
- self.xml_declaration = body[:body.find('\n', body.find('?>'))+1]
+ # it's not clear from the tree if an XML declaration was
+ # present in the document source; the following is a
+ # work-around to ensure that output matches input
+ if '<?xml ' in body:
+ self.xml_declaration = \
+ """<?xml version="%s" encoding="%s" standalone="no" ?>""" % (
+ self.tree.docinfo.xml_version, self.tree.docinfo.encoding)
- # add default namespace declaration if no explicit document
- # type has been set
- if implicit_doctype and explicit_doctype is None and require_wrapping:
- body = """\
- <meta:declare-ns
- xmlns="%s" xmlns:tal="%s" xmlns:metal="%s" xmlns:i18n="%s"
- xmlns:py="%s" xmlns:xinclude="%s" xmlns:meta="%s"
- >%s</meta:declare-ns>""" % (
- config.XHTML_NS, config.TAL_NS,
- config.METAL_NS, config.I18N_NS,
- config.PY_NS, config.XI_NS, config.META_NS,
- body)
-
- # prepend the implicit doctype to the document source and add
- # entity definitions
- if implicit_doctype and require_wrapping:
- implicit_doctype = implicit_doctype[:-1] + ' [ %s ]>' % utils.entities
- self.implicit_doctype = implicit_doctype
- body = implicit_doctype + "\n" + body
-
- # parse document
- self.root, parsed_doctype = parser.parse(body)
-
- # explicit document type has priority
+ # explicit document type has priority over a parsed doctype
if explicit_doctype is not None:
self.doctype = explicit_doctype
- elif parsed_doctype and not no_doctype_declaration:
- self.doctype = parsed_doctype
-
- # if the document has no XML declaration, limit self-closing
- # tags to the allowed subset for templates with a non-XML
- # compliant document type (non-strict); see
- # http://www.w3.org/TR/xhtml1/#C_3 for more information.
- ldoctype = (self.doctype or implicit_doctype or "").lower()
- if no_xml_declaration and \
- 'html' in ldoctype and 'strict' not in ldoctype:
- for element in self.root.getiterator():
- try:
- tag = element.tag.split('}')[-1]
- except AttributeError:
- continue
-
- if element.text is None and tag not in (
- 'area', 'base', 'basefont', 'br',
- 'hr', 'input', 'img', 'link', 'meta'):
- element.text = ""
-
- self.parser = parser
-
+ elif self.tree.docinfo.doctype:
+ self.doctype = self.tree.docinfo.doctype
+
if utils.coerces_gracefully(encoding):
self.encoding = None
else:
@@ -677,36 +638,47 @@
def from_text(cls, body, parser, **kwargs):
compiler = Compiler(
"<html xmlns='%s'></html>" % config.XHTML_NS, parser,
- implicit_doctype=None, encoding=kwargs.get('encoding'))
- compiler.root.text = body
- compiler.root.meta_omit = ""
+ encoding=kwargs.get('encoding'))
+ root = compiler.tree.getroot()
+ root.text = body
+ root.meta_omit = ""
return compiler
def __call__(self, macro=None, global_scope=True, parameters=()):
- if not isinstance(self.root, Element):
+ root = self.tree.getroot()
+
+ if not isinstance(root, Element):
raise ValueError(
- "Must define valid namespace for tag: '%s.'" % self.root.tag)
+ "Must define valid namespace for tag: '%s.'" % root.tag)
# if macro is non-trivial, start compilation at the element
# where the macro is defined
if macro:
elements = tuple(etree.elements_with_attribute(
- self.root, config.METAL_NS, 'define-macro', macro))
+ root, config.METAL_NS, 'define-macro', macro))
if not elements:
raise ValueError("Macro not found: %s." % macro)
- self.root = element = elements[0]
+ element = elements[0]
+ element.meta_translator = root.meta_translator
- # remove attribute from tag
- if element.nsmap[element.prefix] == config.METAL_NS:
+ # if element is the document root, render as a normal
+ # template, e.g. unset the `macro` mode
+ if root is element:
+ macro = None
+ else:
+ root = element
+
+ # remove macro definition attribute from element
+ if element.nsmap.get(element.prefix) == config.METAL_NS:
del element.attrib['define-macro']
else:
del element.attrib[utils.metal_attr('define-macro')]
-
+
# initialize code stream object
stream = generation.CodeIO(
- self.root.node.symbols, encoding=self.encoding,
+ root.node.symbols, encoding=self.encoding,
indentation=0, indentation_string="\t")
# initialize variable scope
@@ -747,11 +719,18 @@
clause.begin(stream)
clause.end(stream)
+ if macro is not None:
+ if macro == "" and 'xmlns' in root.attrib:
+ del root.attrib['xmlns']
+ wrap = root.makeelement(utils.meta_attr('wrap'))
+ wrap.append(root)
+ root = wrap
+
# output XML headers, if applicable
if not macro:
header = ""
if self.xml_declaration is not None:
- header += self.xml_declaration
+ header += self.xml_declaration + '\n'
if self.doctype:
doctype = self.doctype + '\n'
if self.encoding:
@@ -765,13 +744,9 @@
stream.scope.pop()
# start generation
- self.root.start(stream)
+ root.start(stream)
body = stream.getvalue()
- # remove namespace declaration
- if 'xmlns' in self.root.attrib:
- del self.root.attrib['xmlns']
-
# symbols dictionary
__dict__ = stream.symbols.__dict__
@@ -804,11 +779,10 @@
source = generation.function_wrap(
'render', defaults, _globals, body)
- # serialize document
- xmldoc = self.implicit_doctype + "\n" + self.root.tostring()
+ xmldoc = self.parser.serialize(self.tree)
return ByteCodeTemplate(
- source, xmldoc, self.parser, self.root)
+ source, xmldoc, self.parser, root)
class ByteCodeTemplate(object):
"""Template compiled to byte-code."""
@@ -875,7 +849,7 @@
source = state['source']
xmldoc = state['xmldoc']
parser = state['parser']
- tree, doctype = parser.parse(xmldoc)
+ tree = parser.parse(xmldoc)
bind = sys.modules['types'].FunctionType(
marshal.loads(state['code']), GLOBALS, "bind")
Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/translation.txt
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/translation.txt 2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/translation.txt 2008-12-02 13:34:30 UTC (rev 93536)
@@ -14,9 +14,9 @@
... <div xmlns="http://www.w3.org/1999/xhtml">
... Hello World!
... </div>""")
- <div>
- Hello World!
- </div>
+ <div xmlns="http://www.w3.org/1999/xhtml">
+ Hello World!
+ </div>
:: Setting DOCTYPE
@@ -38,10 +38,10 @@
... La Peña
... <img alt="La Peña" />
... </div>""")
- <div>
- La Peña
- <img alt="La Peña" />
- </div>
+ <div xmlns="http://www.w3.org/1999/xhtml">
+ La Peña
+ <img alt="La Peña" />
+ </div>
:: CDATA blocks
>>> print render_xhtml("""\
@@ -51,12 +51,12 @@
... /* ]]> */
... <span>Not protected</span> <![CDATA[ This is protected ]]>
... </div>""")
- <div>
- /* <![CDATA[ */
- This is protected
- /* ]]> */
- <span>Not protected</span> <![CDATA[ This is protected ]]>
- </div>
+ <div xmlns="http://www.w3.org/1999/xhtml">
+ /* <![CDATA[ */
+ This is protected
+ /* ]]> */
+ <span>Not protected</span> <![CDATA[ This is protected ]]>
+ </div>
Literals
--------
@@ -84,7 +84,7 @@
... >→</a>
... <span class="→"></span>
... </html>""")
- <html>
+ <html xmlns="http://www.w3.org/1999/xhtml">
Hello World!
<a href="localhost" title="Singing & Dancing">→</a>
<span class="→"></span>
@@ -97,7 +97,7 @@
... <?xml-stylesheet href="classic.xsl" type="text/xml"?>
... Hello World!
... </html>""")
- <html>
+ <html xmlns="http://www.w3.org/1999/xhtml">
<?xml-stylesheet href="classic.xsl" type="text/xml"?>
Hello World!
</html>
@@ -110,7 +110,7 @@
... <!-- a multi-
... line comment -->
... </div>""")
- <div>
+ <div xmlns="http://www.w3.org/1999/xhtml">
<!-- a comment -->
<!-- a multi-
line comment -->
@@ -122,7 +122,7 @@
... <html xmlns="http://www.w3.org/1999/xhtml">
... <!-- hello world -->
... </html>""")
- <html>
+ <html xmlns="http://www.w3.org/1999/xhtml">
<!-- hello world -->
</html>
@@ -174,7 +174,7 @@
Let's try and render the template.
>>> print template.render()
- <div>
+ <div xmlns="http://www.w3.org/1999/xhtml">
Hello World!
</div>
@@ -193,19 +193,6 @@
>>> render_xhtml(body)
Traceback (most recent call last):
...
- XMLSyntaxError: ...
+ ExpatError: unclosed token: line 1, column 0
:: Missing namespace definition
-
-If a document type is provided, namespaces must be declared.
-
- >>> body = """\
- ... <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
- ... "http://www.w3.org/TR/html4/loose.dtd">
- ... <div xmlns="http://www.w3.org/1999/xhtml" tal:content="'Hello World'" />
- ... """
-
- >>> print render_xhtml(body)
- Traceback (most recent call last):
- ...
- XMLSyntaxError: Namespace prefix tal for content on div is not defined...
Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/utils.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/utils.py 2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/utils.py 2008-12-02 13:34:30 UTC (rev 93536)
@@ -280,7 +280,7 @@
def get_namespace(element):
if '}' in element.tag:
return element.tag.split('}')[0][1:]
- return element.nsmap[None]
+ return element.nsmap.get(None)
def xhtml_attr(name):
return "{%s}%s" % (config.XHTML_NS, name)
More information about the Checkins
mailing list