[Checkins] SVN: z3c.pypimirror/trunk/ finalized experimental follow_external_index_pages
Andreas Jung
andreas at andreas-jung.com
Wed Sep 3 02:24:37 EDT 2008
Log message for revision 90725:
finalized experimental follow_external_index_pages
Changed:
U z3c.pypimirror/trunk/CHANGES.txt
U z3c.pypimirror/trunk/setup.py
U z3c.pypimirror/trunk/src/z3c/pypimirror/mirror.py
-=-
Modified: z3c.pypimirror/trunk/CHANGES.txt
===================================================================
--- z3c.pypimirror/trunk/CHANGES.txt 2008-09-02 22:24:35 UTC (rev 90724)
+++ z3c.pypimirror/trunk/CHANGES.txt 2008-09-03 06:24:35 UTC (rev 90725)
@@ -1,6 +1,12 @@
Change history
~~~~~~~~~~~~~~
+0.2.8 (2008-09-03)
+------------------
+
+- added highly experimental follow_external_index_pages option
+ in order to deal with external in a better way
+
0.2.7 (2008-09-02)
------------------
Modified: z3c.pypimirror/trunk/setup.py
===================================================================
--- z3c.pypimirror/trunk/setup.py 2008-09-02 22:24:35 UTC (rev 90724)
+++ z3c.pypimirror/trunk/setup.py 2008-09-03 06:24:35 UTC (rev 90725)
@@ -15,8 +15,8 @@
setup(
name='z3c.pypimirror',
- version='0.2.7',
- author='Daniel Kraft et al.',
+ version='0.2.8',
+ author='Daniel Kraft, Andreas Jung et al.',
author_email='dk at d9t.de',
description='A module for building a complete or a partial PyPI mirror',
long_description=long_description,
Modified: z3c.pypimirror/trunk/src/z3c/pypimirror/mirror.py
===================================================================
--- z3c.pypimirror/trunk/src/z3c/pypimirror/mirror.py 2008-09-02 22:24:35 UTC (rev 90724)
+++ z3c.pypimirror/trunk/src/z3c/pypimirror/mirror.py 2008-09-03 06:24:35 UTC (rev 90725)
@@ -143,7 +143,7 @@
links.append(href)
return links
- def _links_external(self, html, filename_matches=None):
+ def _links_external(self, html, filename_matches=None, follow_external_index_pages=False):
""" pypi has external "download_url"s. We try to get anything
from there too. This is really ugly and I'm not sure if there's
a sane way.
@@ -157,13 +157,14 @@
if link.renderContents().endswith("download_url"):
# we have a download_url!! Yeah.
url = link.get("href")
- if not url:
+ if not url:
continue
download_links.add(url)
for link in download_links:
# check if the link points directly to a file
# and get it if it matches filename_matches
+
if filename_matches:
if self.matches(link, filename_matches):
yield link
@@ -171,7 +172,7 @@
# fetch what is behind the link and see if it's html.
# If it is html, download anything from there.
# This is extremely unreliable and therefore commented out.
- """
+ import pdb; pdb.set_trace()
site = urllib2.urlopen(link)
if site.headers.type != "text/html":
continue
@@ -185,9 +186,9 @@
real_download_link = urllib.basejoin(site.url, real_download_link)
if not filename_matches or self.matches(real_download_link, filename_matches):
yield(real_download_link)
- """
+
- def _links(self, filename_matches=None, external_links=False):
+ def _links(self, filename_matches=None, external_links=False, follow_external_index_pages=False):
""" This is an iterator which returns useful links on files for
mirroring
"""
@@ -211,7 +212,7 @@
yield (url, hash)
if external_links:
- for link in self._links_external(remote_index_html, filename_matches):
+ for link in self._links_external(remote_index_html, filename_matches, follow_external_index_pages):
yield (link, None)
def matches(self, filename, filename_matches):
@@ -220,8 +221,10 @@
return True
return False
- def ls(self, filename_matches=None, external_links=False):
- links = self._links(filename_matches=filename_matches, external_links=external_links)
+ def ls(self, filename_matches=None, external_links=False, follow_external_index_pages=False):
+ links = self._links(filename_matches=filename_matches,
+ external_links=external_links,
+ follow_external_index_pages=follow_external_index_pages)
return [(link[0], os.path.basename(link[0]), link[1]) for link in links]
def _get(self, url, filename, md5_hex=None):
@@ -343,7 +346,7 @@
continue
try:
- links = package.ls(filename_matches, external_links)
+ links = package.ls(filename_matches, external_links, follow_external_index_pages)
except PackageError, v:
stats.error_404(package_name)
LOG.debug("Package not available: %s" % v)
@@ -535,7 +538,7 @@
'verbose': True, # log output
'log_filename': default_logfile,
'external_links': False, # experimental external link resolve and download
- 'follow_external_index_pages' : False, # experimental
+ 'follow_external_index_pages' : False, # experimental, scan index pages for links
}
More information about the Checkins
mailing list