[Checkins] SVN: zope.contenttype/trunk/ Improved text_type(). Based on the patch from

Sun Sep 16 06:34:56 EDT 2007

Log message for revision 79688:
  Improved text_type(). Based on the patch from
  http://www.zope.org/Collectors/Zope/2355/
  

Changed:
  U   zope.contenttype/trunk/CHANGES.txt
  U   zope.contenttype/trunk/src/zope/contenttype/__init__.py
  U   zope.contenttype/trunk/src/zope/contenttype/tests/testContentTypes.py

-=-
Modified: zope.contenttype/trunk/CHANGES.txt
===================================================================

--- zope.contenttype/trunk/CHANGES.txt	2007-09-15 22:43:25 UTC (rev 79687)
+++ zope.contenttype/trunk/CHANGES.txt	2007-09-16 10:34:55 UTC (rev 79688)
@@ -1,7 +1,13 @@
 Change History
 ==============
 
+3.5.0 (unreleased)
+------------------
 
+Improved text_type(). Based on the patch from
+http://www.zope.org/Collectors/Zope/2355/
+
+
 3.4.0 (2007-09-13)
 ------------------
 

Modified: zope.contenttype/trunk/src/zope/contenttype/__init__.py
===================================================================
--- zope.contenttype/trunk/src/zope/contenttype/__init__.py	2007-09-15 22:43:25 UTC (rev 79687)
+++ zope.contenttype/trunk/src/zope/contenttype/__init__.py	2007-09-16 10:34:55 UTC (rev 79688)
@@ -13,9 +13,10 @@
 
 """A utility module for content-type handling.
 
-$Id: content_types.py 24764 2004-05-17 06:13:48Z philikon $
+$Id$
 """
 
+import string
 import re
 import os.path
 import mimetypes
@@ -23,20 +24,35 @@
 
 find_binary = re.compile('[\0-\7]').search
 
+  
 def text_type(s):
-    s = s.strip()
+    """See if we can figure out the type by content.
+    We may want this to be efficient for WebDAV et al.
+    """
 
-    # Yuk. See if we can figure out the type by content.
-    if s.lower().startswith('<html>') or '</' in s:
+    # at least the maximum length of any tags we look for
+    iMAXLEN=14 
+    if len(s) < iMAXLEN: return 'text/plain'
+
+    i = 0
+    while s[i] in string.whitespace: 
+       i += 1
+
+    s = s[i : i+iMAXLEN].lower()
+    
+    if s.startswith('<html>'):
         return 'text/html'
+  
+    if s.startswith('<!doctype html'):
+        return 'text/html'
 
-    elif s.startswith('<?xml'):
+    # what about encodings??
+    if s.startswith('<?xml'):
         return 'text/xml'
+    
+    return 'text/plain'
+ 
 
-    else:
-        return 'text/plain'
-
-
 def guess_content_type(name='', body='', default=None):
     # Attempt to determine the content type (and possibly
     # content-encoding) based on an an object's name and

Modified: zope.contenttype/trunk/src/zope/contenttype/tests/testContentTypes.py
===================================================================
--- zope.contenttype/trunk/src/zope/contenttype/tests/testContentTypes.py	2007-09-15 22:43:25 UTC (rev 79687)
+++ zope.contenttype/trunk/src/zope/contenttype/tests/testContentTypes.py	2007-09-16 10:34:55 UTC (rev 79688)
@@ -13,7 +13,7 @@
 ##############################################################################
 """Tests of the contenttypes extension mechanism.
 
-$Id: testContenttype.py 24763 2004-05-17 05:59:28Z philikon $
+$Id$
 """
 
 import mimetypes
@@ -69,7 +69,17 @@
         self.assertEqual(ctype, "text/x-vnd.zope.test-mime-type-2")
         self.check_types_count(2)
 
+    
+    def test_text_type(self):
+        t = contenttype.text_type
+        self.assertEqual(t('<HtmL><body>hello world</body></html>'), 
+                         'text/html')
+        self.assertEqual(t('<?xml version="1.0"><foo/>'), 'text/xml')
+        self.assertEqual(t('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"' +
+                           ' "http://www.w3.org/TR/html4/loose.dtd">'),
+                           'text/html')
 
+
 def test_suite():
     return unittest.makeSuite(ContentTypesTestCase)