[Checkins] SVN: z3c.pypimirror/trunk/src/z3c/pypimirror/ - Corrected the index pages for packages (have full urls now)

Andreas Jung andreas at andreas-jung.com
Wed Aug 27 09:08:56 EDT 2008


Log message for revision 90448:
  - Corrected the index pages for packages (have full urls now)
  - Added a full.html for packages (you better gz this ;) )
  - Removed html page download for external sites
  
  

Changed:
  U   z3c.pypimirror/trunk/src/z3c/pypimirror/mirror.py
  U   z3c.pypimirror/trunk/src/z3c/pypimirror/pypimirror.cfg.sample

-=-
Modified: z3c.pypimirror/trunk/src/z3c/pypimirror/mirror.py
===================================================================
--- z3c.pypimirror/trunk/src/z3c/pypimirror/mirror.py	2008-08-27 13:08:34 UTC (rev 90447)
+++ z3c.pypimirror/trunk/src/z3c/pypimirror/mirror.py	2008-08-27 13:08:55 UTC (rev 90448)
@@ -157,6 +157,8 @@
                     continue
                 # fetch what is behind the link and see if it's html.
                 # If it is html, download anything from there.
+                # This is extremely unreliable and therefore commented out.
+                """
                 site = urllib2.urlopen(link)
                 if site.headers.type != "text/html":
                     continue
@@ -170,8 +172,8 @@
                     real_download_link = urllib.basejoin(site.url, real_download_link)
                     if not filename_matches or self.matches(real_download_link, filename_matches):
                         yield(real_download_link)
+                """
 
-
     def _links(self, filename_matches=None, external_links=False):
         """ This is an iterator which returns useful links on files for
             mirroring
@@ -206,7 +208,7 @@
         return False
 
     def ls(self, filename_matches=None, external_links=False):
-        links = self._links(filename_matches=filename_matches)
+        links = self._links(filename_matches=filename_matches, external_links=external_links)
         return [(link[0], os.path.basename(link[0]), link[1]) for link in links]
 
     def _get(self, url, filename, md5_hex=None):
@@ -285,7 +287,7 @@
         return filenames
 
     def _html_link(self, filename):
-        return "<a href='%s'>%s</a>" % (filename, filename)
+        return "<a href='%s/'>%s</a>" % (filename, filename)
 
     def _index_html(self):
         header = "<html><body><h1>PyPi Mirror</h1><h2>Last update: " + \
@@ -298,8 +300,12 @@
         content = self._index_html()
         open(os.path.join(self.base_path, "index.html"), "wb").write(content)
 
-    def mirror(self, package_list, filename_matches, verbose, cleanup, create_indexes, external_links):
+    def full_html(self, full_list):
+        open(os.path.join(self.base_path, "full.html"), "wb").write("<br />\n".join(full_list))
+
+    def mirror(self, package_list, filename_matches, verbose, cleanup, create_indexes, external_links, base_url):
         stats = Stats()
+        full_list = []
         for package_name in package_list:
             try:
                 package = Package(package_name)
@@ -335,6 +341,10 @@
                         print "Invalid URL: %s" % v
                         continue
 
+                    # XXX TODO: We should use the filename coming from
+                    # urllib2 when the thing was downloaded, as it might
+                    # follow redirects.
+                    # Example: downman.py?file=configobj-4.3.0.zip
                     mirror_package.write(filename, data, md5_hash)
                     stats.stored(filename)
                     if verbose: 
@@ -343,14 +353,16 @@
                     stats.found(filename)
                     if verbose: 
                         print "Found: %s" % filename
+                full_list.append(mirror_package._html_link(base_url, filename, md5_hash))
             if cleanup:
                 mirror_package.cleanup(links, verbose)
             if create_indexes:
-                mirror_package.index_html()
+                mirror_package.index_html(base_url)
         if cleanup:
             self.cleanup(package_list, verbose)
         if create_indexes:
             self.index_html()
+            self.full_html(full_list)
         print stats
 
 class MirrorPackage:
@@ -399,10 +411,10 @@
                 filenames.append(filename)
         return filenames
 
-    def _html_link(self, filename, md5_hash):
-        return "<a href='%s#md5=%s'>%s</a>" % (filename, md5_hash, filename)
+    def _html_link(self, base_url, filename, md5_hash):
+        return "<a href='%s%s/%s#md5=%s'>%s</a>" % (base_url, self.package_name, filename, md5_hash, filename)
 
-    def _index_html(self):
+    def _index_html(self, base_url):
         header = "<html><body>"
         footer = "</body></html>"
 
@@ -410,12 +422,12 @@
         for link in self.ls():
             file = MirrorFile(self, link)
             md5_hash = file.md5
-            link_list.append(self._html_link(link, md5_hash))
+            link_list.append(self._html_link(base_url, link, md5_hash))
         links = "<br />\n".join(link_list)
         return "%s%s%s" % (header, links, footer)
 
-    def index_html(self):
-        content = self._index_html()
+    def index_html(self, base_url):
+        content = self._index_html(base_url)
         self.write("index.html", content)
 
     def cleanup(self, original_file_list, verbose=False):
@@ -482,6 +494,7 @@
 ################# Config file parser
 
 config_defaults = {
+    'base_url': 'http://your-host.com/index/',
     'mirror_file_path': '/tmp/mirror',
     'lock_file_name': 'pypi-poll-access.lock',
     'filename_matches': '*.zip *.tgz *.egg *.tar.gz *.tar.bz2', # may be "" for *
@@ -556,6 +569,6 @@
     package_list = PypiPackageList().list(package_matches)
     mirror = Mirror(config["mirror_file_path"])
     lock = zc.lockfile.LockFile(os.path.join(config["mirror_file_path"], config["lock_file_name"]))
-    mirror.mirror(package_list, filename_matches, verbose, cleanup, create_indexes, external_links)
+    mirror.mirror(package_list, filename_matches, verbose, cleanup, create_indexes, external_links, config["base_url"])
 
 

Modified: z3c.pypimirror/trunk/src/z3c/pypimirror/pypimirror.cfg.sample
===================================================================
--- z3c.pypimirror/trunk/src/z3c/pypimirror/pypimirror.cfg.sample	2008-08-27 13:08:34 UTC (rev 90447)
+++ z3c.pypimirror/trunk/src/z3c/pypimirror/pypimirror.cfg.sample	2008-08-27 13:08:55 UTC (rev 90448)
@@ -3,6 +3,9 @@
 # if necessary it will be created for you
 mirror_file_path = /tmp/mirror
 
+# where's your mirror on the net?
+base_url = http://your-host.com/
+
 # ???
 lock_file_name = pypi-poll-access.lock
 



More information about the Checkins mailing list