[Zope-Checkins] SVN: Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/ refactored code to extract the encoding from the XML preamble and the charset

Andreas Jung andreas at andreas-jung.com
Mon Dec 18 04:30:10 EST 2006


Log message for revision 71585:
  refactored code to extract the encoding from the XML preamble and the charset
  from a <meta http-equiv..> tag
  

Changed:
  U   Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
  A   Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py

-=-
Modified: Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
===================================================================
--- Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py	2006-12-18 09:20:36 UTC (rev 71584)
+++ Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py	2006-12-18 09:30:09 UTC (rev 71585)
@@ -16,6 +16,7 @@
 from Testing.makerequest import makerequest
 from Testing.ZopeTestCase import ZopeTestCase, installProduct
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
+from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
 
 
 ascii_str = '<html><body>hello world</body></html>'
@@ -59,7 +60,24 @@
 
 installProduct('PageTemplates')
 
+class ZPTUtilsTests(unittest.TestCase):
 
+    def testExtractEncodingFromXMLPreamble(self):
+        extract = encodingFromXMLPreamble
+        self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
+        self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
+        self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
+        self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
+        self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
+
+    def testExtractCharsetFromMetaHTTPEquivTag(self):
+        extract = charsetFromMetaEquiv
+        self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
+        self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
+        self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
+        self.assertEqual(extract('<html>...<html>'), None)
+        
+
 class ZopePageTemplateFileTests(ZopeTestCase):
 
     def testPT_RenderWithAscii(self):
@@ -67,7 +85,7 @@
         zpt = self.app['test']
         result = zpt.pt_render()
         # use startswith() because the renderer appends a trailing \n
-        self.assertEqual(result.startswith(ascii_str), True)
+        self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
         self.assertEqual(zpt.output_encoding, 'iso-8859-15')
 
     def testPT_RenderWithISO885915(self):
@@ -75,15 +93,16 @@
         zpt = self.app['test']
         result = zpt.pt_render()
         # use startswith() because the renderer appends a trailing \n
-        self.assertEqual(result.startswith(iso885915_str), True)
+        self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
         self.assertEqual(zpt.output_encoding, 'iso-8859-15')
 
     def testPT_RenderWithUTF8(self):
+        import pdb; pdb.set_trace() 
         manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
         zpt = self.app['test']
         result = zpt.pt_render()
         # use startswith() because the renderer appends a trailing \n
-        self.assertEqual(result.startswith(utf8_str), True)
+        self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
         self.assertEqual(zpt.output_encoding, 'iso-8859-15')
 
     def _createZPT(self):
@@ -243,9 +262,11 @@
 
        
 def test_suite():
-    suite = unittest.makeSuite(ZPTRegressions)
-    suite.addTests(unittest.makeSuite(ZPTMacros))
-    suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
+#    suite = unittest.makeSuite(ZPTRegressions)
+    suite = unittest.makeSuite(ZPTUtilsTests)
+#    suite.addTests(unittest.makeSuite(ZPTUtilsTests))
+#    suite.addTests(unittest.makeSuite(ZPTMacros))
+#    suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
     return suite
 
 if __name__ == '__main__':

Added: Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py
===================================================================
--- Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py	2006-12-18 09:20:36 UTC (rev 71584)
+++ Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py	2006-12-18 09:30:09 UTC (rev 71585)
@@ -0,0 +1,59 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+
+""" Some helper methods 
+
+$Id: ZopePageTemplate.py 71579 2006-12-17 20:26:10Z andreasjung $
+"""
+
+import re 
+
+
+xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
+http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S)
+http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
+
+def encodingFromXMLPreamble(xml):
+    """ Extract the encoding from a xml preamble.
+        Return 'utf-8' if not available
+    """
+
+    mo = xml_preamble_reg.match(xml)
+
+    if not mo:
+        return 'utf-8'
+    else:
+        return mo.group(1).lower()
+
+
+def charsetFromMetaEquiv(html):                                    
+    """ Return the value of the 'charset' from a html document
+        containing <meta http-equiv="content-type" content="text/html; charset=utf8>.
+        Returns None, if not available.
+    """
+
+    # first check for the <meta...> tag
+    mo = http_equiv_reg.search(html)
+    if mo:
+        # extract the meta tag
+        meta = mo.group(1)
+
+        # search for the charset value
+        mo = http_equiv_reg2.search(meta)
+        if mo:
+            # return charset 
+            return mo.group(1).lower()
+
+    return None
+
+



More information about the Zope-Checkins mailing list