[Checkins] SVN: zope3org/trunk/src/zorg/wikification/ Refactored link processors

Uwe Oestermeier uwe_oestermeier at iwm-kmrc.de
Tue Apr 11 18:23:45 EDT 2006


Log message for revision 66869:
  Refactored link processors

Changed:
  U   zope3org/trunk/src/zorg/wikification/browser/wikilink.py
  U   zope3org/trunk/src/zorg/wikification/browser/wikipage.py
  U   zope3org/trunk/src/zorg/wikification/parser.py

-=-
Modified: zope3org/trunk/src/zorg/wikification/browser/wikilink.py
===================================================================
--- zope3org/trunk/src/zorg/wikification/browser/wikilink.py	2006-04-11 22:20:36 UTC (rev 66868)
+++ zope3org/trunk/src/zorg/wikification/browser/wikilink.py	2006-04-11 22:23:45 UTC (rev 66869)
@@ -118,7 +118,49 @@
 class BaseLinkProcessor(BaseHTMLProcessor) :
     """ Implements a processor that is able to visit and modify all links. 
     """
+    
+    absolute_prefixes = 'http:', 'ftp:', 'https:', 'mailto:'
+         
+    link_refs = dict(a='href', img='src')       # treat 'a href' and 'img src'
 
+    _url = r'''(?=[a-zA-Z0-9./#])    # Must start correctly
+                  ((?:              # Match the leading part
+                      (?:ftp|https?|telnet|nntp) #     protocol
+                      ://                        #     ://
+                      (?:                       # Optional 'username:password@'
+                          \w+                   #         username
+                          (?::\w+)?             #         optional :password
+                          @                     #         @
+                      )?                        # 
+                      [-\w]+(?:\.\w[-\w]*)+     #  hostname (sub.example.com)
+                  )                             #
+                  (?::\d+)?                     # Optional port number
+                  (?:                           # Rest of the URL, optional
+                      /?                                # Start with '/'
+                      [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]* # Can't start with these
+                      (?:                               #
+                          [.!,?;:]+                     #  One or more of these
+                          [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]+  # Can't finish
+                          #'"                           #  # or ' or "
+                      )*                                #
+                  )?)                                   #
+               '''
+
+    _email = r'''(?:mailto:)?            # Optional mailto:
+                    ([-\+\w]+               # username
+                    \@                      # at
+                    [-\w]+(?:\.\w[-\w]*)+)  # hostname
+                 '''
+    
+    url_link = re.compile(_url, re.VERBOSE)
+    email_link = re.compile(_email, re.VERBOSE)
+    text_link = re.compile('\[.*?\]', re.VERBOSE)
+
+
+    def reset(self):
+        BaseHTMLProcessor.reset(self)
+        self.traversed = {}
+   
     def isAbsoluteURL(self, link) :
         """ Returns true if the link is a complete URL. 
             
@@ -126,11 +168,80 @@
             might point to a local object.
         """
         
-        for prefix in 'http:', 'ftp:', 'https:', 'mailto:' :
+        for prefix in self.absolute_prefixes  :
             if link.startswith(prefix) :
                 return True
         return False
 
+    def onRelativeLink(self, link) :
+        """ Event handler that can be specialized. """
+        return link
+        
+    def onWikiTextLink(self, link) :
+        """ Event handler that can be specialized. """
+        return link
+
+    def onAbsoluteLink(self, link) :
+        """ Event handler that can be specialized. """
+        return link
+
+    def traverseLink(self, node, link) :
+        """ Help method that follows a relative link from a context node. """
+        remaining = urllib.unquote(link)
+        if link in self.traversed :
+            return self.traversed[link]
+        path = [x for x in remaining.split("/") if x]        
+        while path :         
+            try :
+                name = path[0]
+                name = unicode(name, encoding='utf-8')
+                node = zapi.traverseName(node, name)
+                name = path.pop(0)
+            except (TraversalError, UnicodeEncodeError) :
+                break
+        self.traversed[link] = node, path
+        return node, path
+
+    def handle_data(self, text) :
+        """ Called for each text block. Extracts wiki text links. """
+        
+        text = re.sub(self.url_link, r'''<a href="\1">\1</a>''', text)
+        text = re.sub(self.email_link, r'''<a href="mailto:\1">\1</a>''', text)
+        
+        result = ""
+        end = 0
+        for m in self.text_link.finditer(text):
+            
+            start = m.start()
+            result += text[end:start]
+            end = m.end()
+            between = text[start+1:end-1]
+            result += self.onWikiTextLink(between)           
+        result += text[end:]       
+        
+        self.pieces.append(result)
+        
+        
+    def unknown_starttag(self, tag, attrs):
+        """ Called for each tag. Calls link event handlers. """
+        
+        if tag in self.link_refs :
+            result = []
+            for key, value in attrs :
+                if key == self.link_refs[tag] :
+                    if self.isAbsoluteURL(value) :
+                        value = self.onAbsoluteLink(value)
+                    else :
+                        value = self.onRelativeLink(value)
+                result.append((key, value))
+           
+            BaseHTMLProcessor.unknown_starttag(self, tag, result) 
+            return True
+     
+        BaseHTMLProcessor.unknown_starttag(self, tag, attrs)               
+
+
+
 class RelativeLinkProcessor(BaseLinkProcessor) :
     """ Implements a processor that converts all relative links
         into absolute ones. 
@@ -150,25 +261,10 @@
     def __init__(self, base_url) :
         BaseHTMLProcessor.__init__(self)
         self.base_url = base_url
-
-    def unknown_starttag(self, tag, attrs):
-        """ Called for each tag. Wikifies links. """
         
-        if tag == "a" :
-            href = ""
-            result = []
-            for key, value in attrs :
-                if key == "href" :
-                    if value and not self.isAbsoluteURL(value) :
-                        value = "%s/%s" % (self.base_url, value)
-                result.append((key, value))
-           
-            BaseHTMLProcessor.unknown_starttag(self, tag, result) 
-            return True
-     
-        BaseHTMLProcessor.unknown_starttag(self, tag, attrs)               
-
-       
+    def onRelativeLink(self, link) :
+        """ Event handler that can be specialized. """
+        return "%s/%s" % (self.base_url, link)
         
         
 class WikiLinkProcessor(BaseLinkProcessor) :
@@ -182,41 +278,8 @@
     adapts(IWikiPage)
         
     command = None
-    
-    _url = r'''(?=[a-zA-Z0-9./#])    # Must start correctly
-                  ((?:              # Match the leading part
-                      (?:ftp|https?|telnet|nntp) #     protocol
-                      ://                        #     ://
-                      (?:                       # Optional 'username:password@'
-                          \w+                   #         username
-                          (?::\w+)?             #         optional :password
-                          @                     #         @
-                      )?                        # 
-                      [-\w]+(?:\.\w[-\w]*)+     #  hostname (sub.example.com)
-                  )                             #
-                  (?::\d+)?                     # Optional port number
-                  (?:                           # Rest of the URL, optional
-                      /?                                # Start with '/'
-                      [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]* # Can't start with these
-                      (?:                               #
-                          [.!,?;:]+                     #  One or more of these
-                          [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]+  # Can't finish
-                          #'"                           #  # or ' or "
-                      )*                                #
-                  )?)                                   #
-               '''
+   
 
-    _email = r'''(?:mailto:)?            # Optional mailto:
-                    ([-\+\w]+               # username
-                    \@                      # at
-                    [-\w]+(?:\.\w[-\w]*)+)  # hostname
-                 '''
-    
-    url_link = re.compile(_url, re.VERBOSE)
-    email_link = re.compile(_email, re.VERBOSE)
-    text_link = re.compile('\[.*?\]', re.VERBOSE)
-
-
     def __init__(self, page) :
         BaseHTMLProcessor.__init__(self)
         self.page = page
@@ -312,20 +375,6 @@
                                 
         return False, self.absoluteWikiLink(node)
 
-    
-    def traverseLink(self, node, link) :
-        remaining = urllib.unquote(link)
-        path = [x for x in remaining.split("/") if x]        
-        while path :         
-            try :
-                name = path[0]
-                name = unicode(name, encoding='utf-8')
-                node = zapi.traverseName(node, name)
-                name = path.pop(0)
-            except (TraversalError, UnicodeEncodeError) :
-                break
-        return node, path
-
         
     def absoluteWikiLink(self, node) :
         return zapi.absoluteURL(node, self.page.request) + self.page.action
@@ -418,29 +467,14 @@
             self.pieces.append(text)
             return
             
+        BaseLinkProcessor.handle_data(self, text)
+             
+    def onWikiTextLink(self, label) :
+        name = label.replace(" ", "")
+        placeholder = self.createPlaceholder(label, name)
+        self.placeholder = None
+        return placeholder.textLink()
         
-        text = re.sub(self.url_link, r'''<a href="\1">\1</a>''', text)
-        text = re.sub(self.email_link, r'''<a href="mailto:\1">\1</a>''', text)
-        
-        result = ""
-        end = 0
-        for m in self.text_link.finditer(text):
-            
-            start = m.start()
-            result += text[end:start]
-            end = m.end()
-            between = text[start+1:end-1]
-            
-            name = between.replace(" ", "")
-            placeholder = self.createPlaceholder(between, name)
-            result += placeholder.textLink()
-            self.placeholder = None
-            
-        result += text[end:]       
-        
-        self.pieces.append(result)
-        
-        
     def output(self) :
         """ Returns the processing result.
         
@@ -538,6 +572,7 @@
     def startTag(self, attrs) :
         """ Called when a starttag for a placeholder is detected. """
         pattern = '<a href="%s"%s>'
+        print "Saving", attrs
         return pattern % (self.link, self._tagAttrs(attrs))
 
 

Modified: zope3org/trunk/src/zorg/wikification/browser/wikipage.py
===================================================================
--- zope3org/trunk/src/zorg/wikification/browser/wikipage.py	2006-04-11 22:20:36 UTC (rev 66868)
+++ zope3org/trunk/src/zorg/wikification/browser/wikipage.py	2006-04-11 22:23:45 UTC (rev 66869)
@@ -99,8 +99,11 @@
         self.dc = dc
         
         self.title = dc.title or self.untitled
-        self.language = dc.Language()        
+        self.language = dc.Language()
         
+    def message(self, msg) :
+        return '<div class="system-message">%s</div>' % msg
+        
     def verb(self) :
         """ Returns a descriptive verb. """
         return _('View')
@@ -180,6 +183,8 @@
         
         if current_stamp is None :
             current_stamp = self.getModificationStamp()
+        if placeholder is None :
+            return self.message('Invalid link &quot;%s&quot;' % menu_id)
         placeholder.outdated = modification_stamp != current_stamp
         return placeholder._menu()
       

Modified: zope3org/trunk/src/zorg/wikification/parser.py
===================================================================
--- zope3org/trunk/src/zorg/wikification/parser.py	2006-04-11 22:20:36 UTC (rev 66868)
+++ zope3org/trunk/src/zorg/wikification/parser.py	2006-04-11 22:23:45 UTC (rev 66869)
@@ -20,8 +20,12 @@
 import htmlentitydefs
 
 class BaseHTMLProcessor(SGMLParser):
+
+    unicode_html = False
+    
     def reset(self):
         # extend (called by SGMLParser.__init__)
+        self.unicode_html = False
         self.pieces = []
         SGMLParser.reset(self)
         
@@ -84,10 +88,22 @@
         # Reconstruct original DOCTYPE
         self.pieces.append("<!%(text)s>" % locals())
         
-    def output(self):
-        """Return processed HTML as a single string"""
-        return "".join(self.pieces)
+        
+    def feed(self, html) :
+        """ Specialization that remembers whether we process unicode or not. """
+        if isinstance(html, unicode) :
+            html = html.encode('utf-8')
+            self.unicode_html = True
+        SGMLParser.feed(self, html)
+        
+    def output(self) :
+        """ Returns unicode if the processor was feeded with unicode. """
+        out = "".join(self.pieces)
+        if self.unicode_html :
+            out = unicode(out, encoding='utf-8')
+        return out
 
+
 if __name__ == "__main__":
     for k, v in globals().items():
         print k, "=", v



More information about the Checkins mailing list