[Zope-Checkins] SVN: Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/ improved XML preamble handling, not complete so far

Andreas Jung andreas at andreas-jung.com
Sat Jan 27 04:34:54 EST 2007


Log message for revision 72237:
  improved XML preamble handling, not complete so far
  

Changed:
  U   Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/ZopePageTemplate.py
  U   Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
  U   Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/utils.py
  U   Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/ZPublisher/HTTPResponse.py

-=-
Modified: Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/ZopePageTemplate.py
===================================================================
--- Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/ZopePageTemplate.py	2007-01-26 16:50:29 UTC (rev 72236)
+++ Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/ZopePageTemplate.py	2007-01-27 09:34:51 UTC (rev 72237)
@@ -41,7 +41,7 @@
 from Products.PageTemplates.Expressions import SecureModuleImporter
 
 from Products.PageTemplates.utils import encodingFromXMLPreamble, \
-         charsetFromMetaEquiv, convertToUnicode
+         charsetFromMetaEquiv, convertToUnicode, removeXMLPreamble
             
 
 preferred_encodings = ['utf-8', 'iso-8859-15']
@@ -160,6 +160,8 @@
                                                content_type, 
                                                preferred_encodings)
             output_encoding = encoding
+        if content_type in ('text/xml',):
+            text = removeXMLPreamble(text)
 
         # for content updated through WebDAV, FTP 
         if not keep_output_encoding:
@@ -231,8 +233,6 @@
             text = file.read()
 
         content_type = guess_type(filename, text)   
-#        if not content_type in ('text/html', 'text/xml'):
-#            raise ValueError('Unsupported mimetype: %s' % content_type)
 
         self.pt_edit(text, content_type)
         return self.pt_editForm(manage_tabs_message='Saved changes')
@@ -294,6 +294,9 @@
                                               preferred_encodings)
             self.output_encoding = encoding
 
+        if self.content_type in ('text/xml',):
+            text = removeXMLPreamble(text)
+
         self.ZCacheable_invalidate()
         ZopePageTemplate.inheritedAttribute('write')(self, text)
 
@@ -359,9 +362,20 @@
     security.declareProtected(ftp_access, 'manage_FTPget')
     def manage_FTPget(self):
         "Get source for FTP download"
+
         result = self.pt_render()
-        return result.encode(self.output_encoding)
+        if self.content_type in ('text/xml', ):
+            result = '<?xml version="1.0" encoding="%s"?>\n' \
+                     % self.output_encoding + result
 
+        try:
+            return result.encode(self.output_encoding)
+        except UnicodeDecodeError:
+            raise PTRuntimeError('Unicode string could not be converted to '
+                                 'configured output encoding (%s)' % \
+                                 self.output_encoding)
+
+
     security.declareProtected(view_management_screens, 'html')
     def html(self):
         return self.content_type == 'text/html'
@@ -409,7 +423,6 @@
             # acquisition context, so we don't know where it is. :-(
             return None
 
-
     def __setstate__(self, state):
         # Perform on-the-fly migration to unicode.
         # Perhaps it might be better to work with the 'generation' module 
@@ -422,10 +435,12 @@
             state['output_encoding'] = encoding
         self.__dict__.update(state) 
 
-
     def pt_render(self, source=False, extra_context={}):
         result = PageTemplate.pt_render(self, source, extra_context)
         assert isinstance(result, unicode)
+#        if self.content_type in ('text/xml', ):
+#            result = u'<?xml version="1.0" encoding="%s"?>\n' \
+#                     % self.output_encoding + result
         return result
 
 
@@ -459,7 +474,6 @@
         else:
             content_type = guess_type(filename, text) 
 
-
     else:
         if hasattr(text, 'read'):
             filename = getattr(text, 'filename', '')

Modified: Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
===================================================================
--- Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py	2007-01-26 16:50:29 UTC (rev 72236)
+++ Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py	2007-01-27 09:34:51 UTC (rev 72237)
@@ -19,7 +19,8 @@
 from Testing.makerequest import makerequest
 from Testing.ZopeTestCase import ZopeTestCase, installProduct
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
-from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
+from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv, \
+                      removeXMLPreamble
 from zope.component import provideUtility
 from Products.PageTemplates.interfaces import IUnicodeEncodingConflictResolver
 from Products.PageTemplates.unicodeconflictresolver import PreferredCharsetResolver
@@ -250,7 +251,13 @@
         pt = self.app.pt1
         self.assertEqual(pt.document_src(), self.text)
 
+    def testRemoveXMLPreamble(self):
+        xml = '<?xml version="1.0"?><foo>bar</foo>'
+        self.assertEqual(removeXMLPreamble(xml), '<foo>bar</foo>')
+        xml = '<foo>bar</foo>'
+        self.assertEqual(removeXMLPreamble(xml), xml)
 
+
 class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase):
 
     def setUp(self):

Modified: Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/utils.py
===================================================================
--- Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/utils.py	2007-01-26 16:50:29 UTC (rev 72236)
+++ Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/Products/PageTemplates/utils.py	2007-01-27 09:34:51 UTC (rev 72237)
@@ -86,3 +86,10 @@
 
     return unicode(source), None
 
+
+def removeXMLPreamble(xml):
+    """ Remove the preamble """
+    if xml.startswith('<?'):
+        rpos = xml.find('?>')
+        xml = xml[rpos+2:]
+    return xml

Modified: Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/ZPublisher/HTTPResponse.py
===================================================================
--- Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/ZPublisher/HTTPResponse.py	2007-01-26 16:50:29 UTC (rev 72236)
+++ Zope/branches/ajung-final-zpt-zpublisher-fixes/lib/python/ZPublisher/HTTPResponse.py	2007-01-27 09:34:51 UTC (rev 72237)
@@ -338,14 +338,14 @@
         isHTML = self.isHTML(self.body)
         if not self.headers.has_key('content-type'):
             if isHTML:
-                c = 'text/html; charset=%s' % default_encoding
+                c = 'text/html; charset=%s' % getattr(self, 'default_encoding', default_encoding)
             else:
-                c = 'text/plain; charset=%s' % default_encoding
+                c = 'text/plain; charset=%s' % getattr(self, 'default_encoding', default_encoding)
             self.setHeader('content-type', c)
         else:
             c = self.headers['content-type']
             if c.startswith('text/') and not 'charset=' in  c:
-                c = '%s; charset=%s' % (c, default_encoding)                
+                c = '%s; charset=%s' % (c, getattr(self, 'default_encoding', default_encoding))                
                 self.setHeader('content-type', c)
 
         # Some browsers interpret certain characters in Latin 1 as html
@@ -442,38 +442,47 @@
         return self.use_HTTP_content_compression
 
     def _encode_unicode(self,body,
-                        charset_re=re.compile(r'(?:application|text)/[-+0-9a-z]+\s*;\s*' +
+                        charset_re=re.compile(r'((?:application|text)/[-+0-9a-z]+)\s*;\s*' +
                                               r'charset=([-_0-9a-z]+' +
                                               r')(?:(?:\s*;)|\Z)',
                                               re.IGNORECASE)):
 
         def fix_xml_preamble(body, encoding):
-            """ fixes the encoding in the XML preamble according
-                to the charset specified in the content-type header.
+            """ Either fix the 'encoding' of the XML preamble
+                or ensure that the XML startswith a premable.
             """
 
+            preamble = u'<?xml version="1.0" encoding="%s" ?>' % encoding 
+
             if body.startswith('<?xml'):
                 pos_right = body.find('?>')  # right end of the XML preamble
-                body = ('<?xml version="1.0" encoding="%s" ?>' % encoding) + body[pos_right+2:]
-            return body
+                return preamble + body[pos_right+2:]
+            else:
+                return preamble + body
 
         # Encode the Unicode data as requested
 
-        ct = self.headers.get('content-type')
-        if ct:
-            match = charset_re.match(ct)
-            if match:
-                encoding = match.group(1)
-                body = body.encode(encoding)
+        content_type = self.headers.get('content-type')
+        if content_type:
+
+            mo = charset_re.match(content_type)
+            if mo:
+                ct = mo.group(1)
+                encoding = mo.group(2)
+            else:
+                ct = content_type
+                encoding = getattr(self, 'default_encoding', default_encoding)
+
+            if ct == 'text/xml':
                 body = fix_xml_preamble(body, encoding)
-                return body
+                self.headers['content-type'] = '%s; charset=%s' % (ct, encoding)
+                return body.encode(encoding)
             else:
-                if ct.startswith('text/') or ct.startswith('application/'):
-                    self.headers['content-type'] = '%s; charset=%s' % (ct, default_encoding)
+                self.headers['content-type'] = '%s; charset=%s' % (ct, encoding)
+                return body.encode(encoding)
 
         # Use the default character encoding
-        body = body.encode(default_encoding, 'replace')
-        body = fix_xml_preamble(body, default_encoding)
+        body = body.encode(getattr(self, 'default_encoding', default_encoding), 'replace')
         return body
 
     def setBase(self,base):



More information about the Zope-Checkins mailing list