[Checkins] SVN: Sandbox/malthe/chameleon.core/ Changed parser to xml.parsers.expat; this yields greater control and flexibility and provides much-needed separation between the XML syntax tree and document parsing. A number of issues were fixed as a result of this. Note that this changeset also removes the ability of operation without lxml.

Malthe Borch mborch at gmail.com
Tue Dec 2 08:34:30 EST 2008


Log message for revision 93536:
  Changed parser to xml.parsers.expat; this yields greater control and flexibility and provides much-needed separation between the XML syntax tree and document parsing. A number of issues were fixed as a result of this. Note that this changeset also removes the ability of operation without lxml.

Changed:
  U   Sandbox/malthe/chameleon.core/CHANGES.txt
  U   Sandbox/malthe/chameleon.core/src/chameleon/core/config.py
  U   Sandbox/malthe/chameleon.core/src/chameleon/core/etree.py
  U   Sandbox/malthe/chameleon.core/src/chameleon/core/template.py
  U   Sandbox/malthe/chameleon.core/src/chameleon/core/template.txt
  U   Sandbox/malthe/chameleon.core/src/chameleon/core/testing.py
  U   Sandbox/malthe/chameleon.core/src/chameleon/core/translation.py
  U   Sandbox/malthe/chameleon.core/src/chameleon/core/translation.txt
  U   Sandbox/malthe/chameleon.core/src/chameleon/core/utils.py

-=-
Modified: Sandbox/malthe/chameleon.core/CHANGES.txt
===================================================================
--- Sandbox/malthe/chameleon.core/CHANGES.txt	2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/CHANGES.txt	2008-12-02 13:34:30 UTC (rev 93536)
@@ -4,6 +4,16 @@
 HEAD
 ~~~~
 
+- Fixed root cause of issue with self-closing tags; an empty element
+  text is now correctly differentiated from a self-closed
+  tag (which does not have this attribute set). [malthe]
+
+- Removed support for compilation without ``lxml``. [malthe]
+
+- Use ``xml.parsers.expat`` to parse template document instead of
+  using ``lxml``. This gives much greater control over edge-cases and
+  allows us to lose many workarounds. [malthe]
+
 - Do not use XPath-expressions during compilation if lxml is not
   available. [malthe]
 

Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/config.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/config.py	2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/config.py	2008-12-02 13:34:30 UTC (rev 93536)
@@ -28,8 +28,17 @@
 XI_NS = "http://www.w3.org/2001/XInclude"
 I18N_NS = "http://xml.zope.org/namespaces/i18n"
 PY_NS = "http://genshi.edgewall.org/"
-NS_MAP = dict(py=PY_NS, tal=TAL_NS, metal=METAL_NS)
 
+# default prefix namespace mapping
+DEFAULT_NS_MAP = {
+    None: XHTML_NS,
+    'meta': META_NS,
+    'py': PY_NS,
+    'tal': TAL_NS,
+    'metal': METAL_NS,
+    'i18n': I18N_NS,
+    'xi': XI_NS}
+    
 # the symbols table below is used internally be the compiler
 class SYMBOLS(object):
     # internal use only

Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/etree.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/etree.py	2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/etree.py	2008-12-02 13:34:30 UTC (rev 93536)
@@ -2,12 +2,20 @@
 import config
 import utils
 import base64
+
+import lxml.etree
 import xml.parsers.expat
 
 from cPickle import dumps, loads
+from StringIO import StringIO
 
+# this exception is imported for historic reasons
+XMLSyntaxError = lxml.etree.XMLSyntaxError
 
 def import_elementtree():
+    """The ElementTree is used to validate output in debug-mode. We
+    attempt to load the library from several locations."""
+    
     try:
         import xml.etree.ElementTree as ET
     except:
@@ -30,6 +38,149 @@
     except xml.parsers.expat.ExpatError:
         raise ValidationError(string)
 
+class ExpatParser(object):
+    """XML tree parser using the ``xml.parsers.expat`` stream
+    parser. This parser serve to accept template input which lacks a
+    proper prefix namespace mapping or entity definitions. It also
+    works around an issue where the expat parser incorrectly parses an
+    element with trivial body text as self-closing."""
+    
+    root = None
+
+    # doctype
+    doctype = None
+
+    # xml-declaration
+    xml_version = None
+    encoding = None
+    standalone = None
+    
+    def __init__(self, parser, body, expat):
+        self.parser = parser
+        self.body = body
+        self.expat = expat
+            
+    def StartElementHandler(self, tag, attrs):
+        # update prefix to namespace mapping
+        if self.root is None:
+            nsmap = {}
+        else:
+            nsmap = self.root.nsmap.copy()
+
+        # process namespace declarations
+        for key, value in attrs.items():
+            if key.startswith('xmlns:'):
+                prefix, name = key.split(':')
+                nsmap[name] = value
+                del attrs[key]
+            
+        for key, value in attrs.items():
+            try:
+                prefix, name = key.split(':')
+            except (ValueError, TypeError):
+                continue
+
+            del attrs[key]
+
+            try:
+                namespace = nsmap.get(prefix) or config.DEFAULT_NS_MAP[prefix]
+            except KeyError:
+                raise KeyError(
+                    "Attribute prefix unknown: '%s'." % prefix)
+            attrs['{%s}%s' % (namespace, name)] = value
+        
+        # process tag
+        try:
+            prefix, name = tag.split(':')
+            namespace = nsmap.get(prefix) or config.DEFAULT_NS_MAP[prefix]
+            tag = '{%s}%s' % (namespace, name)
+        except ValueError:
+            pass
+                
+        # create element using parser
+        element = self.parser.makeelement(tag, attrs, nsmap=nsmap)
+
+        if self.root is None:
+            document = []
+            if self.xml_version:
+                document.append(
+                    '<?xml version="%s" encoding="%s" standalone="%s" ?>' % (
+                    self.xml_version, self.encoding, self.standalone))
+
+            if self.doctype:
+                document.append(self.doctype)
+
+            # render element
+            document.append(element.tostring())
+                
+            # parse document
+            self.parser.feed("\n".join(document))
+            element = self.parser.close()
+
+            # set this element as tree root
+            self.root = element
+        else:
+            self.element.append(element)
+
+        # set as current element
+        self.element = element
+
+    def EndElementHandler(self, name):
+        if self.element.text is None and self.body[
+            self.expat.CurrentByteIndex-2] != '/':
+            self.element.text = ""
+        self.element = self.element.getparent()
+
+    def CharacterDataHandler(self, data):
+        if len(self.element) == 0:
+            current = self.element.text or ""
+            self.element.text = current + data
+        else:
+            current = self.element[-1].tail or ""
+            self.element[-1].tail = current + data
+            
+    def ProcessingInstructionHandler(self, target, data):
+        self.element.append(
+            lxml.etree.PI(target, data))        
+        
+    def StartCdataSectionHandler(self):
+        element = self.parser.makeelement(
+            utils.xhtml_attr('cdata'))
+        element.meta_cdata = ""
+        self.element.append(element)
+        self.element = element            
+
+    def EndCdataSectionHandler(self):
+        self.element = self.element.getparent()
+
+    def CommentHandler(self, text):
+        self.element.append(
+            lxml.etree.Comment(text))
+        
+    def XmlDeclHandler(self, xml_version, encoding, standalone):
+        self.xml_version = xml_version
+        self.encoding = encoding
+
+        if standalone:
+            self.standalone = 'yes'
+        else:
+            self.standalone = 'no'
+        
+    def ExternalEntityRefHandler(self, context, base, sysid, pubid):
+        parser = self.expat.ExternalEntityParserCreate(context)
+        parser.ProcessingInstructionHandler = self.ProcessingInstructionHandler
+        parser.ParseFile(StringIO(utils.entities))
+        return 1
+
+    def DefaultHandler(self, userdata):
+        if userdata.startswith('&'):
+            return self.CharacterDataHandler(userdata)            
+                
+    def StartDoctypeDeclHandler(self, *args):
+        doctype_name, sysid, pubid, has_internal_subset = args
+        self.doctype = '<!DOCTYPE %s PUBLIC "%s" "%s">' % (
+            doctype_name, pubid, sysid)
+        
 class ValidationError(ValueError):
     def __str__(self):
         value, = self.args
@@ -43,6 +194,9 @@
     def parse(self, body):
         return parse(body, self.element_mapping, fallback=self.fallback)
 
+    def serialize(self, tree):
+        return serialize(tree)
+        
 class Annotation(property):
     def __init__(self, name, default=None):
         property.__init__(self, self._get, self._set)
@@ -58,329 +212,58 @@
     def _set(instance, element, value):
         element.attrib[instance.name] = base64.encodestring(dumps(value))
 
-try:
-    import lxml.etree
+def elements_with_attribute(element, ns, name, value=None):
+    if value is None:
+        expression = 'descendant-or-self::*[@prefix:%s] '\
+                     '| descendant-or-self::prefix:*[@%s]' % (
+            name, name)
+    else:
+        expression = 'descendant-or-self::*[@prefix:%s="%s"] '\
+                     '| descendant-or-self::prefix:*[@%s="%s"]' % (
+            name, value, name, value)
 
-    XMLSyntaxError = lxml.etree.XMLSyntaxError
+    return element.xpath(
+        expression,
+        namespaces={'prefix': ns})
 
-    def elements_with_attribute(element, ns, name, value=None):
-        if value is None:
-            expression = './/*[@prefix:%s] | .//prefix:*[@%s]' % (name, name)
-        else:
-            expression = './/*[@prefix:%s="%s"] | .//prefix:*[@%s="%s"]' % (
-                name, value, name, value)
-            
-        return element.xpath(
-            expression,
-            namespaces={'prefix': ns})
+class ElementBase(lxml.etree.ElementBase):
+    def tostring(self):
+        return lxml.etree.tostring(self)
 
-    class BufferIO(list):
-        write = list.append
+def parse(body, element_mapping, fallback=None):
+    """Parse XML document using expat and build lxml tree."""
+    
+    # set up namespace lookup class
+    lookup = lxml.etree.ElementNamespaceClassLookup(
+        fallback=lxml.etree.ElementDefaultClassLookup(fallback))
+    for key, mapping in element_mapping.items():
+        lookup.get_namespace(key).update(mapping)
 
-        def __init__(self, value):
-            self.append(value)
+    # set up lxml parser
+    parser = lxml.etree.XMLParser(resolve_entities=False, strip_cdata=False)
+    parser.setElementClassLookup(lookup)
 
-        def tell(self):
-            return 0
+    # set up expat parser
+    expat = xml.parsers.expat.ParserCreate(None)
+    expat.UseForeignDTD()
+    expat.SetParamEntityParsing(
+        xml.parsers.expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
 
-        def getvalue(self):
-            return ''.join(self)
-
-    class ElementBase(lxml.etree.ElementBase):
-        def tostring(self):
-            return lxml.etree.tostring(self)
-
-        def _convert_cdata_sections(self):
-            start = '<![CDATA['
-            end = ']]>'
-
-            text = self._raw_text or ""
-            tail = self._raw_tail or ""
-
-            if start in text:
-                before, rest = text.split(start, 1)
-                cdata, after = rest.split(end, 1)
-
-                element = self.makeelement(
-                    utils.xhtml_attr('cdata'))
-                element.meta_cdata = ""
-                element.text = cdata
-                element.tail = after
-                
-                self.text = before
-                self.insert(0, element)
-                element._convert_cdata_sections()
-                
-            if start in tail:
-                before, rest = tail.split(start, 1)
-                cdata, after = rest.split(end, 1)
-
-                element = self.makeelement(
-                    utils.xhtml_attr('cdata'))
-                element.meta_cdata = ""
-                self.addnext(element)
-
-                element.text = cdata
-                element.tail = after
-                self.tail = before
-                element._convert_cdata_sections()
-                
-        @property
-        def _raw_text(self):
-            """Return raw text.
-
-            CDATA sections are returned in their original formatting;
-            the routine relies on the fact that ``tostring`` will
-            output CDATA sections even though they're not present in
-            the .text-attribute.
-            """
-
-            if self.text in ("", None):
-                return self.text
-            
-            elements = tuple(self)
-            del self[:]
-            xml = lxml.etree.tostring(self, encoding='utf-8', with_tail=False)
-            self.extend(elements)
-                
-            element = self.makeelement(self.tag, nsmap=self.nsmap)
-            for attr, value in self.items():
-                element.attrib[attr] = value
-
-            html = lxml.etree.tostring(element)
-            tag = len(element.tag.split('}')[-1])+3
-            text = xml[len(html)-tag:-tag]
-
-            return text
-
-        @property
-        def _raw_tail(self):
-            """Return raw tail.
-
-            CDATA sections are returned in their original formatting;
-            the routine relies on the fact that ``tostring`` will
-            output CDATA sections even though they're not present in
-            the .tail-attribute.
-            """
-
-            if self.tail in ("", None):
-                return self.tail
-
-            elements = tuple(self)
-            del self[:]
-
-            parent = self.getparent()
-            if parent is None:
-                return self.tail
-            
-            length = len(lxml.etree.tostring(self, encoding='utf-8', with_tail=False))
-            
-            # wrap element
-            index = parent.index(self)
-            element = self.makeelement(self.tag, nsmap=self.nsmap)
-            element.append(self)
-            xml = lxml.etree.tostring(element, encoding='utf-8', with_tail=False)
-            self.extend(elements)
-            parent.insert(index, self)
-
-            ns = self.tag[self.tag.find('{')+1:self.tag.find('}')]
-            for prefix, namespace in self.nsmap.items():
-                if ns == namespace:
-                    if prefix is None:
-                        tag = len(self.tag) - len(ns)
-                    else:
-                        tag = len(self.tag) - len(ns) + len(prefix) + 1
-                    break
-            else:
-                raise ValueError(
-                    "Unable to determine tag length: %s." % self.tag)
-                
-            tail = xml[length+tag:-tag-1]
-            
-            return tail
-
-    def convert_cdata_section(node):
-        parent = node.getparent()
-        if parent is not None:
-            index = parent.index(node)
-            element = node.makeelement(node.tag, nsmap=node.nsmap)
-            element.append(node)
-            xml = lxml.etree.tostring(element, encoding='utf-8', with_tail=False)
-            parent.insert(index, node)
-        else:
-            xml = lxml.etree.tostring(node, encoding='utf-8', with_tail=False)
-            
-        if 'CDATA' in xml:
-            node._convert_cdata_sections()
-            for child in tuple(node):
-                convert_cdata_section(child)
-        
-    def parse(body, element_mapping, fallback=None):
-        lookup = lxml.etree.ElementNamespaceClassLookup(
-            fallback=lxml.etree.ElementDefaultClassLookup(fallback))
-        parser = lxml.etree.XMLParser(resolve_entities=False, strip_cdata=False)
-        parser.setElementClassLookup(lookup)
-
-        # lxml 1.3-compatibility
+    # attach expat parser methods
+    parser = ExpatParser(parser, body, expat)
+    for name in type(parser).__dict__.keys():
         try:
-            ns_lookup = lookup.get_namespace
+            setattr(expat, name, getattr(parser, name))
         except AttributeError:
-            ns_lookup = lxml.etree.Namespace
+            pass
 
-        for key, mapping in element_mapping.items():
-            ns_lookup(key).update(mapping)
+    # parse document body 
+    expat.Parse(body, 1)
 
-        tree = lxml.etree.parse(BufferIO(body), parser)
-        root = tree.getroot()
+    # return document root tree
+    return parser.root.getroottree()
 
-        convert_cdata_section(root)
-        
-        return root, tree.docinfo.doctype
-
-except ImportError:
-    ET = import_elementtree()
-
-    XMLSyntaxError = SyntaxError
-
-    def elements_with_attribute(element, ns, name, value=None):
-        attributes = utils.get_attributes_from_namespace(
-            element, ns)
-
-        if value is not None:
-            if value in (
-                attributes.get(name), attributes.get('{%s}%s' % (ns, name))):
-                yield element
-        elif 'name' in attributes or '{%s}%s' % (ns, name) in attributes:
-            yield element
-            
-        for child in element:
-            for match in elements_with_attribute(child, ns, name):
-                yield match
-            
-    class ElementBase(object, ET._ElementInterface):
-        _parent = None
-        
-        def __new__(cls, tag, attrs=None):
-            return element_factory(tag, attrs)
-
-        def __init__(self, tag, attrs=None):
-            if attrs is None:
-                attrs = {}
-            
-            ET._ElementInterface.__init__(self, tag, attrs)
-            
-        def getparent(self):
-            return self._parent
-
-        def getroottree(self):
-            while self._parent is not None:
-                self = self._parent
-            class roottree(object):
-                @classmethod
-                def getroot(cls):
-                    return self
-            return roottree
-            
-        def insert(self, position, element):
-            element._parent = self
-            ET._ElementInterface.insert(self, position, element)
-
-        def tostring(self):
-            return ET.tostring(self)
-
-        def xpath(self, path, namespaces={}):
-            raise NotImplementedError(
-                "No XPath-engine available.")
-            
-        @property
-        def nsmap(self):
-            # TODO: Return correct namespace map
-            return {None: config.XHTML_NS}
-
-        @property
-        def prefix(self):
-            try:
-                ns, prefix = self.tag.split('}')
-            except:
-                return None
-            
-            for prefix, namespace in self.nsmap.items():
-                if namespace == ns:
-                    return prefix
-            
-    namespaces = {}
-    def ns_lookup(ns):
-        return namespaces.setdefault(ns, {})
-
-    class TreeBuilder(ET.TreeBuilder):
-        def start(self, tag, attrs):
-            if len(self._elem):
-                parent = self._elem[-1]
-            else:
-                parent = None
-            elem = ET.TreeBuilder.start(self, tag, attrs)
-            elem._parent = parent
-            elem.makeelement = self._factory
-            
-    class XMLParser(ET.XMLParser):
-        def __init__(self, **kwargs):
-            ET.XMLParser.__init__(self, **kwargs)
-
-            # this makes up for ET's lack of support for comments and
-            # processing instructions
-            self._parser.CommentHandler = self.handle_comment
-            self._parser.ProcessingInstructionHandler = self.handle_pi
-            self._parser.StartCdataSectionHandler = self.handle_cdata_start
-            self._parser.EndCdataSectionHandler = self.handle_cdata_end
-       
-        def doctype(self, name, pubid, system):
-            self.doctype = u'<!DOCTYPE %(name)s PUBLIC "%(pubid)s" "%(system)s">' % \
-                           dict(name=name, pubid=pubid, system=system)
-
-        def handle_comment(self, data):
-            name = utils.tal_attr('comment')
-            self._target.start(name, {})
-            self._target.data("<!-- %s -->" % data)
-            self._target.end(name)
-
-        def handle_pi(self, target, data):
-            name = utils.tal_attr('pi')
-            self._target.start(name, {})
-            self._target.data("<?%(target)s %(data)s?>" % dict(target=target, data=data))
-            self._target.end(name)
-
-        def handle_cdata_start(self):
-            self._target.start(utils.xhtml_attr('cdata'), {
-                utils.tal_attr('cdata'): ''})
-
-        def handle_cdata_end(self):
-            self._target.end(utils.xhtml_attr('cdata'))
-            
-    def parse(body, element_mapping, fallback=None):
-        def element_factory(tag, attrs=None, nsmap=None):
-            if attrs is None:
-                attrs = {}
-
-            if '{' in tag:
-                ns = tag[tag.find('{')+1:tag.find('}')]
-                ns_tag = tag[tag.find('}')+1:]
-            else:
-                ns = None
-                ns_tag = None
-
-            namespace = element_mapping[ns]
-            factory = namespace.get(ns_tag) or namespace.get(None) or fallback
-
-            element = object.__new__(factory)
-            element.__init__(tag, attrs)
-
-            return element
-        
-        target = TreeBuilder(element_factory=element_factory)
-        parser = XMLParser(target=target)
-        parser.entity = dict([(name, "&%s;" % name) for name in htmlentitydefs.entitydefs])
-        parser.feed(body)
-
-        root = parser.close()
-
-        return root, parser.doctype
+def serialize(tree):
+    """Serialize tree using lxml."""
+    
+    return lxml.etree.tostring(tree, encoding='utf-8')

Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/template.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/template.py	2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/template.py	2008-12-02 13:34:30 UTC (rev 93536)
@@ -22,7 +22,6 @@
     format = 'xml'
     filename = '<string>'
     
-    implicit_doctype = doctypes.xhtml
     explicit_doctype = None
     
     def __init__(self, body, parser, format=None, doctype=None, encoding=None):
@@ -52,7 +51,6 @@
     def compiler(self):
         return self.compilers[self.format](
             self.body, self.parser,
-            implicit_doctype=self.implicit_doctype,
             explicit_doctype=self.explicit_doctype,
             encoding=self.encoding)
 

Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/template.txt
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/template.txt	2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/template.txt	2008-12-02 13:34:30 UTC (rev 93536)
@@ -11,7 +11,7 @@
   ... <div xmlns="http://www.w3.org/1999/xhtml">
   ...   Hello World!
   ... </div>""", mock_parser)()
-  <div>
+  <div xmlns="http://www.w3.org/1999/xhtml">
     Hello World!
   </div>
 
@@ -23,7 +23,7 @@
   >>> path = tests.__path__[0]
   >>> t = TemplateFile(path+'/helloworld.pt', mock_parser)
   >>> print t()
-  <div>
+  <div xmlns="http://www.w3.org/1999/xhtml">
     Hello World!
   </div>
 
@@ -50,7 +50,7 @@
 
   >>> template = TemplateFile(path+"/xinclude1.pt", mock_parser)
   >>> print template()
-  <div>
+  <div xmlns="http://www.w3.org/1999/xhtml">
     <div>
     <span>Hello, world!</span>
     </div>
@@ -61,7 +61,7 @@
   ...     template.registry[key] = loads(dumps(bct))
 
   >>> print template()
-  <div>
+  <div xmlns="http://www.w3.org/1999/xhtml">
     <div>
     <span>Hello, world!</span>
     </div>

Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/testing.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/testing.py	2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/testing.py	2008-12-02 13:34:30 UTC (rev 93536)
@@ -25,7 +25,7 @@
 
 def compile_xhtml(body, **kwargs):
     compiler = TestCompiler(
-        body, mock_parser, implicit_doctype=doctypes.xhtml)
+        body, mock_parser)
     return compiler(parameters=sorted(kwargs.keys()))
 
 def render_xhtml(body, **kwargs):
@@ -34,14 +34,14 @@
     
 def render_text(body, **kwargs):
     compiler = TestCompiler.from_text(
-        body, mock_parser, implicit_doctype=doctypes.xhtml)
+        body, mock_parser)
     template = compiler(parameters=sorted(kwargs.keys()))
     template.compile()
     return template.render(**kwargs)    
 
 def compile_template(parser, body, encoding=None, **kwargs):
     compiler = TestCompiler(
-        body, parser, encoding=encoding, implicit_doctype=doctypes.xhtml)
+        body, parser, encoding=encoding)
     template = compiler(parameters=sorted(kwargs.keys()))
     template.compile()
     return template.render(**kwargs)    
@@ -128,7 +128,7 @@
     def render(self, **kwargs):
         compiler = TestCompiler(
             self.body, self.parser,
-            implicit_doctype=doctypes.xhtml, explicit_doctype=self.doctype)
+            explicit_doctype=self.doctype)
         template = compiler(parameters=sorted(kwargs.keys()))
         return template.render(**kwargs)    
 

Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/translation.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/translation.py	2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/translation.py	2008-12-02 13:34:30 UTC (rev 93536)
@@ -76,6 +76,10 @@
 
     @property
     def ns_attributes(self):
+        root = self.element.getroottree().getroot()
+        if root is None or utils.get_namespace(root) == config.META_NS:
+            return {}
+        
         prefix_omit = set()
         namespaces = self.element.nsmap.values()
 
@@ -86,10 +90,12 @@
                     prefix_omit.add(prefix)
             parent = parent.getparent()
 
-        return dict(
+        attrs = dict(
             ((prefix and "xmlns:%s" % prefix or "xmlns", ns) for (prefix, ns) in \
              self.element.nsmap.items() if \
              ns not in self.ns_omit and prefix not in prefix_omit))
+
+        return attrs
     
     @property
     def static_attributes(self):
@@ -109,7 +115,7 @@
         if self.element.prefix is None:
             result.update(
                 utils.get_attributes_from_namespace(self.element, None))
-            
+
         return result
 
     @property
@@ -350,9 +356,9 @@
         # include
         elif self.include:
             # compute macro function arguments and create argument string
-            arguments = ", ".join(
-                ("%s=%s" % (arg, arg) for arg in \
-                 set(itertools.chain(*self.stream.scope))))
+            arguments = [
+                "%s=%s" % (arg, arg) for arg in \
+                 set(itertools.chain(*self.stream.scope))]
 
             # XInclude's are similar to METAL macros, except the macro
             # is always defined as the entire template.
@@ -365,7 +371,7 @@
             _.append(clauses.Write(
                 types.template(
                 "%%(xincludes)s.get(%%(include)s, %s).render_xinclude(%s)" % \
-                (repr(self.format), arguments))))
+                (repr(self.format), ", ".join(arguments)))))
             
         # use macro
         elif self.use_macro:
@@ -604,70 +610,25 @@
 
     doctype = None
     xml_declaration = None
-    implicit_doctype = ""
-    
-    def __init__(self, body, parser, implicit_doctype=None,
-                 explicit_doctype=None, encoding=None):
-        # documents without a document type declaration are augmented
-        # with default namespace declarations and proper XML entity
-        # definitions; this represents a 'convention' over
-        # 'configuration' approach to template documents
-        no_doctype_declaration = '<!DOCTYPE' not in body
-        no_xml_declaration = not body.startswith('<?xml ')
-        require_wrapping = no_xml_declaration and no_doctype_declaration
+        
+    def __init__(self, body, parser, explicit_doctype=None, encoding=None):
+        self.tree = parser.parse(body)
+        self.parser = parser
 
-        if no_xml_declaration is False:
-            self.xml_declaration = body[:body.find('\n', body.find('?>'))+1]
+        # it's not clear from the tree if an XML declaration was
+        # present in the document source; the following is a
+        # work-around to ensure that output matches input
+        if '<?xml ' in body:
+            self.xml_declaration = \
+            """<?xml version="%s" encoding="%s" standalone="no" ?>""" % (
+                self.tree.docinfo.xml_version, self.tree.docinfo.encoding)
             
-        # add default namespace declaration if no explicit document
-        # type has been set
-        if implicit_doctype and explicit_doctype is None and require_wrapping:
-            body = """\
-            <meta:declare-ns
-            xmlns="%s" xmlns:tal="%s" xmlns:metal="%s" xmlns:i18n="%s"
-            xmlns:py="%s" xmlns:xinclude="%s" xmlns:meta="%s"
-            >%s</meta:declare-ns>""" % (
-                config.XHTML_NS, config.TAL_NS,
-                config.METAL_NS, config.I18N_NS,
-                config.PY_NS, config.XI_NS, config.META_NS,
-                body)
-
-        # prepend the implicit doctype to the document source and add
-        # entity definitions
-        if implicit_doctype and require_wrapping:
-            implicit_doctype = implicit_doctype[:-1] + '  [ %s ]>' % utils.entities
-            self.implicit_doctype = implicit_doctype
-            body = implicit_doctype + "\n" + body
-
-        # parse document
-        self.root, parsed_doctype = parser.parse(body)
-
-        # explicit document type has priority
+        # explicit document type has priority over a parsed doctype
         if explicit_doctype is not None:
             self.doctype = explicit_doctype
-        elif parsed_doctype and not no_doctype_declaration:
-            self.doctype = parsed_doctype
-
-        # if the document has no XML declaration, limit self-closing
-        # tags to the allowed subset for templates with a non-XML
-        # compliant document type (non-strict); see
-        # http://www.w3.org/TR/xhtml1/#C_3 for more information.
-        ldoctype = (self.doctype or implicit_doctype or "").lower()
-        if no_xml_declaration and \
-               'html' in ldoctype and 'strict' not in ldoctype:
-            for element in self.root.getiterator():
-                try:
-                    tag = element.tag.split('}')[-1]
-                except AttributeError:
-                    continue
-
-                if element.text is None and tag not in (
-                    'area', 'base', 'basefont', 'br',
-                    'hr', 'input', 'img', 'link', 'meta'):
-                    element.text = ""
-                    
-        self.parser = parser
-
+        elif self.tree.docinfo.doctype:
+            self.doctype = self.tree.docinfo.doctype
+        
         if utils.coerces_gracefully(encoding):
             self.encoding = None
         else:
@@ -677,36 +638,47 @@
     def from_text(cls, body, parser, **kwargs):
         compiler = Compiler(
             "<html xmlns='%s'></html>" % config.XHTML_NS, parser,
-            implicit_doctype=None, encoding=kwargs.get('encoding'))
-        compiler.root.text = body
-        compiler.root.meta_omit = ""
+            encoding=kwargs.get('encoding'))
+        root = compiler.tree.getroot()
+        root.text = body
+        root.meta_omit = ""
         return compiler
 
     def __call__(self, macro=None, global_scope=True, parameters=()):
-        if not isinstance(self.root, Element):
+        root = self.tree.getroot()
+
+        if not isinstance(root, Element):
             raise ValueError(
-                "Must define valid namespace for tag: '%s.'" % self.root.tag)
+                "Must define valid namespace for tag: '%s.'" % root.tag)
 
         # if macro is non-trivial, start compilation at the element
         # where the macro is defined
         if macro:
             elements = tuple(etree.elements_with_attribute(
-                self.root, config.METAL_NS, 'define-macro', macro))
+                root, config.METAL_NS, 'define-macro', macro))
 
             if not elements:
                 raise ValueError("Macro not found: %s." % macro)
 
-            self.root = element = elements[0]
+            element = elements[0]
+            element.meta_translator = root.meta_translator
 
-            # remove attribute from tag
-            if element.nsmap[element.prefix] == config.METAL_NS:
+            # if element is the document root, render as a normal
+            # template, e.g. unset the `macro` mode
+            if root is element:
+                macro = None
+            else:
+                root = element
+
+            # remove macro definition attribute from element
+            if element.nsmap.get(element.prefix) == config.METAL_NS:
                 del element.attrib['define-macro']
             else:
                 del element.attrib[utils.metal_attr('define-macro')]
-                
+
         # initialize code stream object
         stream = generation.CodeIO(
-            self.root.node.symbols, encoding=self.encoding,
+            root.node.symbols, encoding=self.encoding,
             indentation=0, indentation_string="\t")
 
         # initialize variable scope
@@ -747,11 +719,18 @@
             clause.begin(stream)
             clause.end(stream)
 
+        if macro is not None:
+            if macro == "" and 'xmlns' in root.attrib:
+                del root.attrib['xmlns']        
+            wrap = root.makeelement(utils.meta_attr('wrap'))
+            wrap.append(root)
+            root = wrap        
+            
         # output XML headers, if applicable
         if not macro:
             header = ""
             if self.xml_declaration is not None:
-                header += self.xml_declaration
+                header += self.xml_declaration + '\n'
             if self.doctype:
                 doctype = self.doctype + '\n'
                 if self.encoding:
@@ -765,13 +744,9 @@
                 stream.scope.pop()
 
         # start generation
-        self.root.start(stream)
+        root.start(stream)
         body = stream.getvalue()
 
-        # remove namespace declaration
-        if 'xmlns' in self.root.attrib:
-            del self.root.attrib['xmlns']
-        
         # symbols dictionary
         __dict__ = stream.symbols.__dict__
 
@@ -804,11 +779,10 @@
             source = generation.function_wrap(
                 'render', defaults, _globals, body)
 
-        # serialize document
-        xmldoc = self.implicit_doctype + "\n" + self.root.tostring()
+        xmldoc = self.parser.serialize(self.tree)
 
         return ByteCodeTemplate(
-            source, xmldoc, self.parser, self.root)
+            source, xmldoc, self.parser, root)
 
 class ByteCodeTemplate(object):
     """Template compiled to byte-code."""
@@ -875,7 +849,7 @@
         source = state['source']
         xmldoc = state['xmldoc']
         parser = state['parser']
-        tree, doctype = parser.parse(xmldoc)        
+        tree = parser.parse(xmldoc)        
 
         bind = sys.modules['types'].FunctionType(
             marshal.loads(state['code']), GLOBALS, "bind")

Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/translation.txt
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/translation.txt	2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/translation.txt	2008-12-02 13:34:30 UTC (rev 93536)
@@ -14,9 +14,9 @@
   ... <div xmlns="http://www.w3.org/1999/xhtml">
   ...   Hello World!
   ... </div>""")
-    <div>
-      Hello World!
-    </div>
+  <div xmlns="http://www.w3.org/1999/xhtml">
+    Hello World!
+  </div>
 
 :: Setting DOCTYPE
 
@@ -38,10 +38,10 @@
   ...   La Peña
   ...   <img alt="La Peña" />
   ... </div>""")
-    <div>
-      La Peña
-      <img alt="La Pe&ntilde;a" />
-    </div>
+  <div xmlns="http://www.w3.org/1999/xhtml">
+    La Peña
+    <img alt="La Pe&ntilde;a" />
+  </div>
 
 :: CDATA blocks
   >>> print render_xhtml("""\
@@ -51,12 +51,12 @@
   ...   /* ]]> */
   ...   <span>Not protected</span> <![CDATA[ This is protected ]]>
   ... </div>""")
-    <div>
-      /* <![CDATA[ */
-      This is protected
-      /* ]]> */
-      <span>Not protected</span> <![CDATA[ This is protected ]]>
-    </div>
+  <div xmlns="http://www.w3.org/1999/xhtml">
+    /* <![CDATA[ */
+    This is protected
+    /* ]]> */
+    <span>Not protected</span> <![CDATA[ This is protected ]]>
+  </div>
 
 Literals
 --------
@@ -84,7 +84,7 @@
   ...   >&rarr;</a>
   ...   <span class="&rarr;"></span>
   ... </html>""")
-    <html>
+    <html xmlns="http://www.w3.org/1999/xhtml">
       Hello &nbsp; World!
       <a href="localhost" title="Singing &amp; Dancing">&rarr;</a>
       <span class="&rarr;"></span>
@@ -97,7 +97,7 @@
   ...   <?xml-stylesheet href="classic.xsl" type="text/xml"?>
   ...   Hello World!
   ... </html>""")
-    <html>
+    <html xmlns="http://www.w3.org/1999/xhtml">
       <?xml-stylesheet href="classic.xsl" type="text/xml"?>
       Hello World!
     </html>
@@ -110,7 +110,7 @@
   ...   <!-- a multi-
   ...        line comment -->
   ... </div>""")
-  <div>
+  <div xmlns="http://www.w3.org/1999/xhtml">
     <!-- a comment -->
     <!-- a multi-
          line comment -->
@@ -122,7 +122,7 @@
   ... <html xmlns="http://www.w3.org/1999/xhtml">
   ...   <!-- hello world -->
   ... </html>""")
-  <html>
+  <html xmlns="http://www.w3.org/1999/xhtml">
     <!-- hello world -->
   </html>
   
@@ -174,7 +174,7 @@
 Let's try and render the template.
  
   >>> print template.render()
-  <div>
+  <div xmlns="http://www.w3.org/1999/xhtml">
     Hello World!
   </div>
 
@@ -193,19 +193,6 @@
   >>> render_xhtml(body)
   Traceback (most recent call last):
     ...
-  XMLSyntaxError: ...
+  ExpatError: unclosed token: line 1, column 0
 
 :: Missing namespace definition
-
-If a document type is provided, namespaces must be declared.
-
-  >>> body = """\
-  ... <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
-  ...                       "http://www.w3.org/TR/html4/loose.dtd">
-  ... <div xmlns="http://www.w3.org/1999/xhtml" tal:content="'Hello World'" />
-  ... """
-
-  >>> print render_xhtml(body)
-  Traceback (most recent call last):
-    ...
-  XMLSyntaxError: Namespace prefix tal for content on div is not defined...

Modified: Sandbox/malthe/chameleon.core/src/chameleon/core/utils.py
===================================================================
--- Sandbox/malthe/chameleon.core/src/chameleon/core/utils.py	2008-12-02 13:30:40 UTC (rev 93535)
+++ Sandbox/malthe/chameleon.core/src/chameleon/core/utils.py	2008-12-02 13:34:30 UTC (rev 93536)
@@ -280,7 +280,7 @@
 def get_namespace(element):
     if '}' in element.tag:
         return element.tag.split('}')[0][1:]
-    return element.nsmap[None]
+    return element.nsmap.get(None)
 
 def xhtml_attr(name):
     return "{%s}%s" % (config.XHTML_NS, name)



More information about the Checkins mailing list