[Checkins]
SVN: CMF/branches/ajung-unicode-minefield/CMFCore/FSPageTemplate.py
- smarter sniffing for the content-encoding
Andreas Jung
andreas at andreas-jung.com
Wed Dec 20 13:13:41 EST 2006
Log message for revision 71631:
- smarter sniffing for the content-encoding
- checking for default..charset in the metadata
- internal unicode storage
Changed:
U CMF/branches/ajung-unicode-minefield/CMFCore/FSPageTemplate.py
-=-
Modified: CMF/branches/ajung-unicode-minefield/CMFCore/FSPageTemplate.py
===================================================================
--- CMF/branches/ajung-unicode-minefield/CMFCore/FSPageTemplate.py 2006-12-20 18:12:47 UTC (rev 71630)
+++ CMF/branches/ajung-unicode-minefield/CMFCore/FSPageTemplate.py 2006-12-20 18:13:40 UTC (rev 71631)
@@ -35,6 +35,9 @@
from utils import _dtmldir
from utils import _setCacheHeaders
+
+from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
+
xml_detect_re = re.compile('^\s*<\?xml\s+(?:[^>]*?encoding=["\']([^"\'>]+))?')
_marker = object()
@@ -94,6 +97,7 @@
# attempt further detection if the default is encountered.
# One previous misbehavior remains: It is not possible to
# force a text./html type if parsing detects it as XML.
+ encoding = None
if getattr(self, 'content_type', 'text/html') == 'text/html':
xml_info = xml_detect_re.match(data)
if xml_info:
@@ -102,8 +106,26 @@
encoding = xml_info.group(1) or 'utf-8'
self.content_type = 'text/xml; charset=%s' % encoding
+
+ if encoding is None:
+ charset = getattr(self, 'charset', None)
+ if charset is None:
+ if self.content_type.startswith('text/html'):
+ charset = charsetFromMetaEquiv(data) or 'iso-8859-15'
+ elif self.content_type.startswith('text/xml'):
+ charset = encodingFromXMLPreamble(data)
+ else:
+ raise ValueError('Unsupported content-type: %s' % self.content_type)
+
+ if not isinstance(data, unicode):
+ data = unicode(data, charset)
+ else:
+ if not isinstance(data, unicode):
+ data = unicode(data, encoding)
+
self.write(data)
+
security.declarePrivate('read')
def read(self):
# Tie in on an opportunity to auto-update
More information about the Checkins
mailing list