[Checkins] SVN: van.reposync/trunk/ * Use the tarfile module to extract only the data we need from the tarball.

Brian Sutherland jinty at web.de
Tue Jun 16 04:42:10 EDT 2009


Log message for revision 101045:
  * Use the tarfile module to extract only the data we need from the tarball. 
    Probably a big optimization if the tarballs are large.
  * Use Phillip Eby's suggestion to parse the PKG-INFO file in the tarball rather
    than the .egg-info files. This means we can also work with plain distutils 
    distributions (however we still ignore all source packages that don't build-dep
    on setuptools).
  

Changed:
  U   van.reposync/trunk/CHANGES.txt
  U   van.reposync/trunk/setup.py
  U   van.reposync/trunk/van/reposync/__init__.py

-=-
Modified: van.reposync/trunk/CHANGES.txt
===================================================================
--- van.reposync/trunk/CHANGES.txt	2009-06-16 08:41:02 UTC (rev 101044)
+++ van.reposync/trunk/CHANGES.txt	2009-06-16 08:42:10 UTC (rev 101045)
@@ -1,10 +1,15 @@
 Changes
 =======
 
-1.0.2 (unreleased)
+1.1.0 (unreleased)
 ------------------
 
-* None yet
+* Use the tarfile module to extract only the data we need from the tarball.
+  Probably a big optimization if the tarballs are large.
+* Use Phillip Eby's suggestion to parse the PKG-INFO file in the tarball rather
+  than the .egg-info files. This means we can also work with plain distutils
+  distributions (however we still ignore all source packages that don't build-dep
+  on setuptools).
 
 1.0.1 (2009-06-15)
 ------------------

Modified: van.reposync/trunk/setup.py
===================================================================
--- van.reposync/trunk/setup.py	2009-06-16 08:41:02 UTC (rev 101044)
+++ van.reposync/trunk/setup.py	2009-06-16 08:42:10 UTC (rev 101045)
@@ -23,7 +23,7 @@
       long_description=long_description,
       author="Vanguardistas",
       url='http://pypi.python.org/pypi/van.reposync',
-      version='1.0.2dev',
+      version='1.1.0dev',
       license = 'ZPL 2.1',
       packages=find_packages(),
       entry_points = {'console_scripts': ['van-reposync = van.reposync:main',]},
@@ -31,7 +31,6 @@
       install_requires=[
           'setuptools',
           'zc.lockfile',
-          'van.pydeb',
           # 'apt >= 0.7.91', XXX not on pypi!
           ],
       classifiers=['Development Status :: 4 - Beta',

Modified: van.reposync/trunk/van/reposync/__init__.py
===================================================================
--- van.reposync/trunk/van/reposync/__init__.py	2009-06-16 08:41:02 UTC (rev 101044)
+++ van.reposync/trunk/van/reposync/__init__.py	2009-06-16 08:42:10 UTC (rev 101045)
@@ -13,16 +13,13 @@
 ##############################################################################
 import os
 import sys
-import tempfile
+import tarfile
 from os.path import exists, join
 from subprocess import Popen, PIPE, call
 import logging
 import optparse
-import shutil
 
-from van import pydeb
-from setuptools import sandbox
-from pkg_resources import to_filename, PathMetadata, Distribution
+from pkg_resources import to_filename
 import apt
 import apt_pkg
 from zc.lockfile import LockFile
@@ -127,7 +124,7 @@
             if exists(pool_metadata_filename):
                 continue # we already introspected this file, but for another package name
             owned_files.add(pool_metadata_filename)
-            py_data = _get_setuptools_data(pool_file, pydeb.bin_to_py(bin_package_name))
+            py_data = _get_setuptools_data(pool_file)
             if py_data is None:
                 cant_introspect.add(pool_file)
                 continue
@@ -213,53 +210,39 @@
         raise Exception('oops')
     return stdout.splitlines()
 
-def _query_setuptools_dist(tarball, tmpdir, py_package_name):
-    py_package_filename = to_filename(py_package_name)
-    logger.debug("Introspecting egg tarball at %s" % tarball)
-    oldcwd = os.getcwd()
-    os.chdir(tmpdir)
-    try:
-        retcode = call(['tar', '-xzf', tarball])
-        if retcode != 0:
-            logger.error("Failed to unpack egg at %s" % tarball)
-            return None
-        # find the .egg-info and load it
-        found = _find(tmpdir, '%s.egg-info' % py_package_filename)
-        if not found:
-            logging.warning("Couldn't find %s.egg-info in %s, falling back to looking for *.egg-info" % (py_package_filename, tarball))
-            found = _find(tmpdir, '*.egg-info')
-        if len(found) != 1:
-            logging.error("Found %s egg-info directories, expected 1 (in %s)" % (len(found), tarball))
-            return None
-        egg_info = found[0]
-        basedir = os.path.dirname(egg_info)
-        metadata = PathMetadata(basedir, egg_info)
-        dist_name = os.path.splitext(os.path.basename(egg_info))[0]
-        dist = Distribution(basedir, project_name=dist_name,metadata=metadata)
-        return dist
-    finally:
-        os.chdir(oldcwd)
-
-def _get_setuptools_data(filename, py_package_name):
+def _get_setuptools_data(filename):
     """Returns a 3 part tuple:
 
     (name, version, filename)
 
     where filename is the setuptools name on the filesystem."""
-    tmpdir = tempfile.mkdtemp()
+    tarball = filename
+    logger.debug("Introspecting egg tarball at %s" % tarball)
+    tar = tarfile.open(tarball, 'r:*')
+    random_tar_member = tar.next() # is it right that the top level dir will always be first?
+    top_level_dir = random_tar_member.name.split('/')[0]
+    pkg_info_tar_path = '/'.join([top_level_dir, 'PKG-INFO'])
     try:
-        try:
-            dist = _query_setuptools_dist(filename, tmpdir, py_package_name)
-        except:
-            logging.exception("Error on introspecting %s, ignoring and continuing anyway." % filename)
-            return None
-        if dist is None:
-            return None
-        # I think this is the right way of quoting
-        # see: http://mail.python.org/pipermail/distutils-sig/2009-May/011877.html
-        return (dist.project_name, dist.version, '%s-%s.tar.gz' % (to_filename(dist.project_name), to_filename(dist.version)))
-    finally:
-       shutil.rmtree(tmpdir)
+        pkg_info_tar = tar.getmember(pkg_info_tar_path)
+    except KeyError:
+        logger.warn("Could not fing PKG-INFO in tarball")
+        return None
+    pkg_config = tar.extractfile(pkg_info_tar).read()
+    project_name = None
+    version = None
+    for line in pkg_config.splitlines():
+        if line.startswith('Version: '):
+            version = line[9:]
+        if line.startswith('Name: '):
+            project_name = line[6:]
+        if project_name is not None and version is not None:
+            break
+    else:
+        logger.error("Invalid PKG-INFO file")
+        return None
+    # I think this is the right way of quoting
+    # see: http://mail.python.org/pipermail/distutils-sig/2009-May/011877.html
+    return (project_name, version, '%s-%s.tar.gz' % (to_filename(project_name), to_filename(version)))
 
 class _ExecutionContext(object):
     """Contains the global configuration for what we are doing"""



More information about the Checkins mailing list