[Checkins] SVN: zope3org/trunk/src/zorg/restsupport/__init__.py
Added some convenient conversion functions that guess whether
a text is html or ReSt
Uwe Oestermeier
uwe_oestermeier at iwm-kmrc.de
Wed Mar 29 03:29:45 EST 2006
Log message for revision 66254:
Added some convenient conversion functions that guess whether a text is html or ReSt
Changed:
U zope3org/trunk/src/zorg/restsupport/__init__.py
-=-
Modified: zope3org/trunk/src/zorg/restsupport/__init__.py
===================================================================
--- zope3org/trunk/src/zorg/restsupport/__init__.py 2006-03-29 00:00:39 UTC (rev 66253)
+++ zope3org/trunk/src/zorg/restsupport/__init__.py 2006-03-29 08:29:44 UTC (rev 66254)
@@ -11,6 +11,9 @@
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
+
+import sgmllib
+
from cStringIO import StringIO
from zope.app.renderer.rest import ReStructuredTextSourceFactory
from zope.app.renderer.rest import ReStructuredTextToHTMLRenderer
@@ -96,3 +99,87 @@
if not lines :
raise RuntimeError, "cannot convert fragments"
return "\n".join(lines)
+
+
+
+def guess_html(text) :
+ """
+ Tries to guess wether a text is html. Extracts the tags
+ and returns true if the text contains tags.
+
+ Modified from
+ http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/303227
+
+ >>> guess_html("paragraph")
+ False
+
+ >>> guess_html("<p>paragraph</p>")
+ True
+
+ """
+
+ class Cleaner(sgmllib.SGMLParser):
+
+ entitydefs={"nbsp": " "}
+
+ def __init__(self):
+ sgmllib.SGMLParser.__init__(self)
+ self.result = []
+
+ def do_p(self, *junk):
+ self.result.append('\n')
+
+ def do_br(self, *junk):
+ self.result.append('\n')
+
+ def handle_data(self, data):
+ self.result.append(data)
+
+ def cleaned_text(self):
+ return ''.join(self.result)
+
+ def stripHTML(text):
+ c=Cleaner()
+ try:
+ c.feed(text)
+ except sgmllib.SGMLParseError:
+ return text
+ else:
+ t=c.cleaned_text()
+ return t
+
+ lt=len(text)
+ if lt==0:
+ return False
+ textWithoutTags=stripHTML(text)
+ tagsChars=lt-len(textWithoutTags)
+ return tagsChars > 0
+
+def text2html(text) :
+ """ Converts rest to html if necessary.
+
+ >>> print text2html('<p>A paragraph</p>')
+ <p>A paragraph</p>
+
+ >>> print text2html("A paragraph")
+ <p>A paragraph</p>
+ <BLANKLINE>
+
+ """
+ if guess_html(text) :
+ return text
+ return rest2html(text)
+
+
+def text2rest(text) :
+ """ Converts html to rest if necessary.
+
+ >>> print text2rest('<p>A paragraph</p>')
+ A paragraph
+ <BLANKLINE>
+
+ """
+ if guess_html(text) :
+ return html2rest(text, fragment=True)
+ return text
+
\ No newline at end of file
More information about the Checkins
mailing list