[Checkins] SVN: zope3org/trunk/src/zorg/restsupport/__init__.py Added some convenient conversion functions that guess whether a text is html or ReSt

Uwe Oestermeier uwe_oestermeier at iwm-kmrc.de
Wed Mar 29 03:29:45 EST 2006


Log message for revision 66254:
  Added some convenient conversion functions that guess whether a text is html or ReSt

Changed:
  U   zope3org/trunk/src/zorg/restsupport/__init__.py

-=-
Modified: zope3org/trunk/src/zorg/restsupport/__init__.py
===================================================================
--- zope3org/trunk/src/zorg/restsupport/__init__.py	2006-03-29 00:00:39 UTC (rev 66253)
+++ zope3org/trunk/src/zorg/restsupport/__init__.py	2006-03-29 08:29:44 UTC (rev 66254)
@@ -11,6 +11,9 @@
 # FOR A PARTICULAR PURPOSE.
 #
 ##############################################################################
+
+import sgmllib
+
 from cStringIO import StringIO
 from zope.app.renderer.rest import ReStructuredTextSourceFactory
 from zope.app.renderer.rest import ReStructuredTextToHTMLRenderer
@@ -96,3 +99,87 @@
         if not lines :
             raise RuntimeError, "cannot convert fragments"
         return "\n".join(lines)
+
+
+
+def guess_html(text) :
+    """
+    Tries to guess wether a text is html. Extracts the tags
+    and returns true if the text contains tags.
+    
+    Modified from 
+    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/303227
+    
+    >>> guess_html("paragraph")
+    False
+    
+    >>> guess_html("<p>paragraph</p>")
+    True
+    
+    """
+    
+    class Cleaner(sgmllib.SGMLParser):
+    
+      entitydefs={"nbsp": " "}
+    
+      def __init__(self):
+        sgmllib.SGMLParser.__init__(self)
+        self.result = []
+        
+      def do_p(self, *junk):
+        self.result.append('\n')
+        
+      def do_br(self, *junk):
+        self.result.append('\n')
+        
+      def handle_data(self, data):
+        self.result.append(data)
+        
+      def cleaned_text(self):
+        return ''.join(self.result)
+
+    def stripHTML(text):
+      c=Cleaner()
+      try:
+        c.feed(text)
+      except sgmllib.SGMLParseError:
+        return text
+      else:
+        t=c.cleaned_text()
+        return t
+        
+    lt=len(text)
+    if lt==0:
+        return False
+    textWithoutTags=stripHTML(text)
+    tagsChars=lt-len(textWithoutTags)
+    return tagsChars > 0
+    
+def text2html(text) :
+    """ Converts rest to html if necessary. 
+    
+    >>> print text2html('<p>A paragraph</p>')
+    <p>A paragraph</p>
+    
+    >>> print text2html("A paragraph")
+    <p>A paragraph</p>
+    <BLANKLINE>
+    
+    """
+    if guess_html(text) :
+        return text
+    return rest2html(text)
+    
+    
+def text2rest(text) :
+    """ Converts html to rest if necessary. 
+    
+    >>> print text2rest('<p>A paragraph</p>')
+    A paragraph
+    <BLANKLINE>
+    
+    """
+    if guess_html(text) :
+        return html2rest(text, fragment=True)
+    return text
+    
\ No newline at end of file



More information about the Checkins mailing list