[Checkins] SVN: CMF/trunk/CMFCore/ merging ajung-unicode-minefield
branch.
Andreas Jung
andreas at andreas-jung.com
Sat Jan 6 06:52:16 EST 2007
Log message for revision 71731:
merging ajung-unicode-minefield branch.
This branch uses unicode as internal representation for FSPageTemplate
instances. Instances can specify their encoding through
[default]
charset=xxxxx
within their .metadata file
Changed:
U CMF/trunk/CMFCore/FSPageTemplate.py
U CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT2.pt.metadata
A CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT3.pt
A CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT3.pt.metadata
A CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT4.pt
A CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT4.pt.metadata
U CMF/trunk/CMFCore/tests/test_FSPageTemplate.py
-=-
Modified: CMF/trunk/CMFCore/FSPageTemplate.py
===================================================================
--- CMF/trunk/CMFCore/FSPageTemplate.py 2007-01-06 02:25:16 UTC (rev 71730)
+++ CMF/trunk/CMFCore/FSPageTemplate.py 2007-01-06 11:52:14 UTC (rev 71731)
@@ -35,6 +35,9 @@
from utils import _dtmldir
from utils import _setCacheHeaders
+
+from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
+
xml_detect_re = re.compile('^\s*<\?xml\s+(?:[^>]*?encoding=["\']([^"\'>]+))?')
_marker = object()
@@ -94,6 +97,7 @@
# attempt further detection if the default is encountered.
# One previous misbehavior remains: It is not possible to
# force a text./html type if parsing detects it as XML.
+ encoding = None
if getattr(self, 'content_type', 'text/html') == 'text/html':
xml_info = xml_detect_re.match(data)
if xml_info:
@@ -102,8 +106,26 @@
encoding = xml_info.group(1) or 'utf-8'
self.content_type = 'text/xml; charset=%s' % encoding
+
+ if encoding is None:
+ charset = getattr(self, 'charset', None)
+ if charset is None:
+ if self.content_type.startswith('text/html'):
+ charset = charsetFromMetaEquiv(data) or 'iso-8859-15'
+ elif self.content_type.startswith('text/xml'):
+ charset = encodingFromXMLPreamble(data)
+ else:
+ raise ValueError('Unsupported content-type: %s' % self.content_type)
+
+ if not isinstance(data, unicode):
+ data = unicode(data, charset)
+ else:
+ if not isinstance(data, unicode):
+ data = unicode(data, encoding)
+
self.write(data)
+
security.declarePrivate('read')
def read(self):
# Tie in on an opportunity to auto-update
Modified: CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT2.pt.metadata
===================================================================
--- CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT2.pt.metadata 2007-01-06 02:25:16 UTC (rev 71730)
+++ CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT2.pt.metadata 2007-01-06 11:52:14 UTC (rev 71731)
@@ -1,2 +1,2 @@
[default]
-content_type = text/plain
+content_type = text/xml
Copied: CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT3.pt (from rev 71730, CMF/branches/ajung-unicode-minefield/CMFCore/tests/fake_skins/fake_skin/testPT3.pt)
Copied: CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT3.pt.metadata (from rev 71730, CMF/branches/ajung-unicode-minefield/CMFCore/tests/fake_skins/fake_skin/testPT3.pt.metadata)
Copied: CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT4.pt (from rev 71730, CMF/branches/ajung-unicode-minefield/CMFCore/tests/fake_skins/fake_skin/testPT4.pt)
Copied: CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testPT4.pt.metadata (from rev 71730, CMF/branches/ajung-unicode-minefield/CMFCore/tests/fake_skins/fake_skin/testPT4.pt.metadata)
Modified: CMF/trunk/CMFCore/tests/test_FSPageTemplate.py
===================================================================
--- CMF/trunk/CMFCore/tests/test_FSPageTemplate.py 2007-01-06 02:25:16 UTC (rev 71730)
+++ CMF/trunk/CMFCore/tests/test_FSPageTemplate.py 2007-01-06 11:52:14 UTC (rev 71731)
@@ -88,9 +88,26 @@
script = script.__of__(self.root)
script()
self.assertEqual( self.RESPONSE.getHeader('content-type')
- , 'text/plain'
+ , 'text/xml'
)
+ def test_CharsetFromFSMetadata(self):
+ # testPT3 is an UTF-16 encoded file (see its .metadatafile)
+ # is respected
+ script = self._makeOne('testPT3', 'testPT3.pt')
+ script = script.__of__(self.root)
+ data = script.read()
+ self.assertEqual(unicode('123üöäß', 'iso-8859-15') in data, True)
+ self.assertEqual(script.content_type, 'text/html')
+ def test_CharsetFrom2FSMetadata(self):
+ # testPT4 is an UTF-8 encoded file (see its .metadatafile)
+ # is respected
+ script = self._makeOne('testPT4', 'testPT4.pt')
+ script = script.__of__(self.root)
+ data = script.read()
+ self.assertEqual(unicode('123üöäß', 'iso-8859-15') in data, True)
+ self.assertEqual(script.content_type, 'text/html')
+
def test_BadCall( self ):
script = self._makeOne( 'testPTbad', 'testPTbad.pt' )
script = script.__of__(self.root)
More information about the Checkins
mailing list