[Checkins] SVN: zope3org/trunk/src/zorg/wikification/ Refactored
link processors
Uwe Oestermeier
uwe_oestermeier at iwm-kmrc.de
Tue Apr 11 18:23:45 EDT 2006
Log message for revision 66869:
Refactored link processors
Changed:
U zope3org/trunk/src/zorg/wikification/browser/wikilink.py
U zope3org/trunk/src/zorg/wikification/browser/wikipage.py
U zope3org/trunk/src/zorg/wikification/parser.py
-=-
Modified: zope3org/trunk/src/zorg/wikification/browser/wikilink.py
===================================================================
--- zope3org/trunk/src/zorg/wikification/browser/wikilink.py 2006-04-11 22:20:36 UTC (rev 66868)
+++ zope3org/trunk/src/zorg/wikification/browser/wikilink.py 2006-04-11 22:23:45 UTC (rev 66869)
@@ -118,7 +118,49 @@
class BaseLinkProcessor(BaseHTMLProcessor) :
""" Implements a processor that is able to visit and modify all links.
"""
+
+ absolute_prefixes = 'http:', 'ftp:', 'https:', 'mailto:'
+
+ link_refs = dict(a='href', img='src') # treat 'a href' and 'img src'
+ _url = r'''(?=[a-zA-Z0-9./#]) # Must start correctly
+ ((?: # Match the leading part
+ (?:ftp|https?|telnet|nntp) # protocol
+ :// # ://
+ (?: # Optional 'username:password@'
+ \w+ # username
+ (?::\w+)? # optional :password
+ @ # @
+ )? #
+ [-\w]+(?:\.\w[-\w]*)+ # hostname (sub.example.com)
+ ) #
+ (?::\d+)? # Optional port number
+ (?: # Rest of the URL, optional
+ /? # Start with '/'
+ [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]* # Can't start with these
+ (?: #
+ [.!,?;:]+ # One or more of these
+ [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]+ # Can't finish
+ #'" # # or ' or "
+ )* #
+ )?) #
+ '''
+
+ _email = r'''(?:mailto:)? # Optional mailto:
+ ([-\+\w]+ # username
+ \@ # at
+ [-\w]+(?:\.\w[-\w]*)+) # hostname
+ '''
+
+ url_link = re.compile(_url, re.VERBOSE)
+ email_link = re.compile(_email, re.VERBOSE)
+ text_link = re.compile('\[.*?\]', re.VERBOSE)
+
+
+ def reset(self):
+ BaseHTMLProcessor.reset(self)
+ self.traversed = {}
+
def isAbsoluteURL(self, link) :
""" Returns true if the link is a complete URL.
@@ -126,11 +168,80 @@
might point to a local object.
"""
- for prefix in 'http:', 'ftp:', 'https:', 'mailto:' :
+ for prefix in self.absolute_prefixes :
if link.startswith(prefix) :
return True
return False
+ def onRelativeLink(self, link) :
+ """ Event handler that can be specialized. """
+ return link
+
+ def onWikiTextLink(self, link) :
+ """ Event handler that can be specialized. """
+ return link
+
+ def onAbsoluteLink(self, link) :
+ """ Event handler that can be specialized. """
+ return link
+
+ def traverseLink(self, node, link) :
+ """ Help method that follows a relative link from a context node. """
+ remaining = urllib.unquote(link)
+ if link in self.traversed :
+ return self.traversed[link]
+ path = [x for x in remaining.split("/") if x]
+ while path :
+ try :
+ name = path[0]
+ name = unicode(name, encoding='utf-8')
+ node = zapi.traverseName(node, name)
+ name = path.pop(0)
+ except (TraversalError, UnicodeEncodeError) :
+ break
+ self.traversed[link] = node, path
+ return node, path
+
+ def handle_data(self, text) :
+ """ Called for each text block. Extracts wiki text links. """
+
+ text = re.sub(self.url_link, r'''<a href="\1">\1</a>''', text)
+ text = re.sub(self.email_link, r'''<a href="mailto:\1">\1</a>''', text)
+
+ result = ""
+ end = 0
+ for m in self.text_link.finditer(text):
+
+ start = m.start()
+ result += text[end:start]
+ end = m.end()
+ between = text[start+1:end-1]
+ result += self.onWikiTextLink(between)
+ result += text[end:]
+
+ self.pieces.append(result)
+
+
+ def unknown_starttag(self, tag, attrs):
+ """ Called for each tag. Calls link event handlers. """
+
+ if tag in self.link_refs :
+ result = []
+ for key, value in attrs :
+ if key == self.link_refs[tag] :
+ if self.isAbsoluteURL(value) :
+ value = self.onAbsoluteLink(value)
+ else :
+ value = self.onRelativeLink(value)
+ result.append((key, value))
+
+ BaseHTMLProcessor.unknown_starttag(self, tag, result)
+ return True
+
+ BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
+
+
+
class RelativeLinkProcessor(BaseLinkProcessor) :
""" Implements a processor that converts all relative links
into absolute ones.
@@ -150,25 +261,10 @@
def __init__(self, base_url) :
BaseHTMLProcessor.__init__(self)
self.base_url = base_url
-
- def unknown_starttag(self, tag, attrs):
- """ Called for each tag. Wikifies links. """
- if tag == "a" :
- href = ""
- result = []
- for key, value in attrs :
- if key == "href" :
- if value and not self.isAbsoluteURL(value) :
- value = "%s/%s" % (self.base_url, value)
- result.append((key, value))
-
- BaseHTMLProcessor.unknown_starttag(self, tag, result)
- return True
-
- BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
-
-
+ def onRelativeLink(self, link) :
+ """ Event handler that can be specialized. """
+ return "%s/%s" % (self.base_url, link)
class WikiLinkProcessor(BaseLinkProcessor) :
@@ -182,41 +278,8 @@
adapts(IWikiPage)
command = None
-
- _url = r'''(?=[a-zA-Z0-9./#]) # Must start correctly
- ((?: # Match the leading part
- (?:ftp|https?|telnet|nntp) # protocol
- :// # ://
- (?: # Optional 'username:password@'
- \w+ # username
- (?::\w+)? # optional :password
- @ # @
- )? #
- [-\w]+(?:\.\w[-\w]*)+ # hostname (sub.example.com)
- ) #
- (?::\d+)? # Optional port number
- (?: # Rest of the URL, optional
- /? # Start with '/'
- [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]* # Can't start with these
- (?: #
- [.!,?;:]+ # One or more of these
- [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]+ # Can't finish
- #'" # # or ' or "
- )* #
- )?) #
- '''
+
- _email = r'''(?:mailto:)? # Optional mailto:
- ([-\+\w]+ # username
- \@ # at
- [-\w]+(?:\.\w[-\w]*)+) # hostname
- '''
-
- url_link = re.compile(_url, re.VERBOSE)
- email_link = re.compile(_email, re.VERBOSE)
- text_link = re.compile('\[.*?\]', re.VERBOSE)
-
-
def __init__(self, page) :
BaseHTMLProcessor.__init__(self)
self.page = page
@@ -312,20 +375,6 @@
return False, self.absoluteWikiLink(node)
-
- def traverseLink(self, node, link) :
- remaining = urllib.unquote(link)
- path = [x for x in remaining.split("/") if x]
- while path :
- try :
- name = path[0]
- name = unicode(name, encoding='utf-8')
- node = zapi.traverseName(node, name)
- name = path.pop(0)
- except (TraversalError, UnicodeEncodeError) :
- break
- return node, path
-
def absoluteWikiLink(self, node) :
return zapi.absoluteURL(node, self.page.request) + self.page.action
@@ -418,29 +467,14 @@
self.pieces.append(text)
return
+ BaseLinkProcessor.handle_data(self, text)
+
+ def onWikiTextLink(self, label) :
+ name = label.replace(" ", "")
+ placeholder = self.createPlaceholder(label, name)
+ self.placeholder = None
+ return placeholder.textLink()
- text = re.sub(self.url_link, r'''<a href="\1">\1</a>''', text)
- text = re.sub(self.email_link, r'''<a href="mailto:\1">\1</a>''', text)
-
- result = ""
- end = 0
- for m in self.text_link.finditer(text):
-
- start = m.start()
- result += text[end:start]
- end = m.end()
- between = text[start+1:end-1]
-
- name = between.replace(" ", "")
- placeholder = self.createPlaceholder(between, name)
- result += placeholder.textLink()
- self.placeholder = None
-
- result += text[end:]
-
- self.pieces.append(result)
-
-
def output(self) :
""" Returns the processing result.
@@ -538,6 +572,7 @@
def startTag(self, attrs) :
""" Called when a starttag for a placeholder is detected. """
pattern = '<a href="%s"%s>'
+ print "Saving", attrs
return pattern % (self.link, self._tagAttrs(attrs))
Modified: zope3org/trunk/src/zorg/wikification/browser/wikipage.py
===================================================================
--- zope3org/trunk/src/zorg/wikification/browser/wikipage.py 2006-04-11 22:20:36 UTC (rev 66868)
+++ zope3org/trunk/src/zorg/wikification/browser/wikipage.py 2006-04-11 22:23:45 UTC (rev 66869)
@@ -99,8 +99,11 @@
self.dc = dc
self.title = dc.title or self.untitled
- self.language = dc.Language()
+ self.language = dc.Language()
+ def message(self, msg) :
+ return '<div class="system-message">%s</div>' % msg
+
def verb(self) :
""" Returns a descriptive verb. """
return _('View')
@@ -180,6 +183,8 @@
if current_stamp is None :
current_stamp = self.getModificationStamp()
+ if placeholder is None :
+ return self.message('Invalid link "%s"' % menu_id)
placeholder.outdated = modification_stamp != current_stamp
return placeholder._menu()
Modified: zope3org/trunk/src/zorg/wikification/parser.py
===================================================================
--- zope3org/trunk/src/zorg/wikification/parser.py 2006-04-11 22:20:36 UTC (rev 66868)
+++ zope3org/trunk/src/zorg/wikification/parser.py 2006-04-11 22:23:45 UTC (rev 66869)
@@ -20,8 +20,12 @@
import htmlentitydefs
class BaseHTMLProcessor(SGMLParser):
+
+ unicode_html = False
+
def reset(self):
# extend (called by SGMLParser.__init__)
+ self.unicode_html = False
self.pieces = []
SGMLParser.reset(self)
@@ -84,10 +88,22 @@
# Reconstruct original DOCTYPE
self.pieces.append("<!%(text)s>" % locals())
- def output(self):
- """Return processed HTML as a single string"""
- return "".join(self.pieces)
+
+ def feed(self, html) :
+ """ Specialization that remembers whether we process unicode or not. """
+ if isinstance(html, unicode) :
+ html = html.encode('utf-8')
+ self.unicode_html = True
+ SGMLParser.feed(self, html)
+
+ def output(self) :
+ """ Returns unicode if the processor was feeded with unicode. """
+ out = "".join(self.pieces)
+ if self.unicode_html :
+ out = unicode(out, encoding='utf-8')
+ return out
+
if __name__ == "__main__":
for k, v in globals().items():
print k, "=", v
More information about the Checkins
mailing list