[Checkins] SVN: Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/ refactoring of fetch

Mon Aug 6 15:40:22 EDT 2007

Log message for revision 78639:
  refactoring of fetch
  

Changed:
  _U  Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/
  _U  Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/amazon.com/
  A   Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/amazonsource.py
  A   Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/amazonsource_config.py
  U   Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/fetch.py
  D   Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/source_amazon.py
  D   Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/source_amazon_config.py
  U   Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/tests/dummy_server.py

-=-

Property changes on: Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch
___________________________________________________________________
Name: svn:ignore
   - *.kpf
*.pyc
amazon_config.py


   + *.kpf
*.pyc
amazon.com




Property changes on: Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/amazon.com
___________________________________________________________________
Name: svn:ignore
   + *.xml


Copied: Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/amazonsource.py (from rev 78635, Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/source_amazon.py)
===================================================================

--- Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/amazonsource.py	                        (rev 0)
+++ Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/amazonsource.py	2007-08-06 19:40:22 UTC (rev 78639)
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+from zope.interface import implements
+from interfaces import IMetadataSource
+
+from lxml import etree
+
+from urllib import quote
+import sys
+from StringIO import StringIO
+
+from amazonsource_config import ACCESS_KEY_ID, ASSOCIATE_TAG
+
+"""
+Structure of the AmazonECS XML response:
+
+ItemLookupResponse
+    OperationRequest
+        (...)
+    Items
+        Request
+            IsValid
+            ItemLookupRequest
+                ItemId
+                ResponseGroup
+            (Errors)
+                (Error)
+                    (Code)
+                    (Message)
+        (Item)
+            (ItemAttributes)
+                (Author)
+                (Creator Role=...)
+
+Notes:
+- Errors element occurs when ISBN is non-existent;
+        in that case, Code contains the string "AWS.InvalidParameterValue"
+- Author element is not always present
+- Author element may be duplicated with the same content,
+        except for whitespace; for example: ISBN=0141000511
+"""
+
+FIELD_MAP = [
+    # Book schema -> Amazon ECS element path, relative to Item element
+    ('title', 'ItemAttributes/Title'),
+    ('isbn13', 'ItemAttributes/EAN'),
+    ('edition', 'ItemAttributes/Edition'),
+    ('publisher', 'ItemAttributes/Publisher'),
+    ('issued', 'ItemAttributes/PublicationDate'),
+    ('subjects', 'ItemAttributes/DeweyDecimalNumber'),
+    ('image_url', 'LargeImage/URL'),
+    ('source_url', 'DetailPageURL'),
+    ('source_item_id', 'ASIN'),
+    ]
+
+CREATOR_TAGS = ['ItemAttributes/Author', 'ItemAttributes/Creator']
+
+AMAZON_CODE_NO_MATCH = 'AWS.ECommerceService.NoExactMatches'
+
+class AmazonSource(object):
+    implements(IMetadataSource)
+    
+    name = 'amazon.com'
+    max_ids_per_request = 3
+
+
+    base_url = """http://ecs.amazonaws.com/onca/xml"""
+
+    def __init__(self):
+        self.base_params = { 'Service':'AWSECommerceService',
+                             'AWSAccessKeyId':ACCESS_KEY_ID,
+                             'AssociateTag': ASSOCIATE_TAG
+                           }
+        self.xml = ''
+        self.http_response = {}
+
+    def buildURL(self, **kw):
+        query = []
+        kw.update(self.base_params)
+        for key, val in kw.items():
+            query.append('%s=%s' % (key,quote(val)))
+        return self.base_url + '?' + '&'.join(query)
+
+    def buildItemLookupURL(self,itemId,response='ItemAttributes'):
+        params = {  'Operation':'ItemLookup',
+                    'ItemId':itemId,
+                    'ResponseGroup':response
+                 }
+        return self.buildURL(**params)
+
+    def buildItemSearchURL(self,query,response='ItemAttributes,Images'):
+        params = {  'Operation':'ItemSearch',
+                    'SearchIndex':'Books',
+                    'Power':query,
+                    'ResponseGroup':response
+                 }
+        return self.buildURL(**params)
+    
+    def buildMultipleBookDetailsURL(self, isbns):
+        query = 'isbn:' + ' or '.join(isbns)
+        return self.buildItemSearchURL(query)
+
+    def nsPath(self, *paths):
+        """Prepend namespace to each part of the path."""
+        parts = []
+        for path in paths:
+            parts.extend(path.split('/'))
+        return '/'.join([self.ns+part for part in parts])
+    
+    def parseMultipleBookDetails(self, xml):
+        xml = StringIO(xml)
+        tree = etree.parse(xml)
+        root = tree.getroot()
+        # get the XML namespace from the root tag
+        self.ns = root.tag.split('}')[0] + '}'
+        request = root.find(self.nsPath('Items/Request'))
+        error_code = request.findtext(self.nsPath('Errors/Error/Code'))
+        if error_code is None:
+            book_list = []
+            for item in root.findall(self.nsPath('Items/Item')):
+                book_dic = {}
+                for field, tag in FIELD_MAP:
+                    elem = item.find(self.nsPath(tag))
+                    if elem is not None:
+                        book_dic[field] = elem.text
+                creators = []
+                for tag in CREATOR_TAGS:
+                    for elem in item.findall(self.nsPath(tag)):
+                        if elem is None: continue
+                        role = elem.attrib.get('Role')
+                        if role:
+                            creator = '%s (%s)' % (elem.text, role)
+                        else:
+                            creator = elem.text
+                        creators.append(creator)
+                if creators:
+                    book_dic['creators'] = creators
+                if book_dic.get('subjects'):
+                    # subjects is a Tuple field
+                    book_dic['subjects'] = (book_dic['subjects'],)
+                book_dic['source'] = self.name
+
+                book_list.append(book_dic)
+            return book_list
+    
+        elif error_code == AMAZON_CODE_NO_MATCH:
+            return []
+        else:
+            raise EnvironmentError, error_code
+        

Copied: Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/amazonsource_config.py (from rev 78634, Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/source_amazon_config.py)
===================================================================
--- Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/amazonsource_config.py	                        (rev 0)
+++ Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/amazonsource_config.py	2007-08-06 19:40:22 UTC (rev 78639)
@@ -0,0 +1,9 @@
+ASSOCIATE_TAG = 'circulante-20'
+ACCESS_KEY_ID = '13W2MMDG65QJJK9GG402'
+
+#UK
+#ASSOCIATE_TAG = 'circulante-21'
+#DE
+#ASSOCIATE_TAG = 'circulante0f-21'
+#FR
+#ASSOCIATE_TAG = 'circulante07-21'

Modified: Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/fetch.py
===================================================================
--- Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/fetch.py	2007-08-06 19:06:58 UTC (rev 78638)
+++ Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/fetch.py	2007-08-06 19:40:22 UTC (rev 78639)
@@ -6,7 +6,7 @@
 from twisted.web import xmlrpc, client
 from os import path
 
-import source_amazon as amazon
+from amazonsource import AmazonSource
 
 from pprint import pprint
 
@@ -58,9 +58,17 @@
             if url:
                 filename = book.get('isbn13',book['source_item_id'])
                 filename += '.' + url.split('.')[-1]
-                deferred = client.getPage(url)
-                deferred.addCallback(self.downloadedImage, filename)
-                deferred.addErrback(self.downloadError, url)
+                # XXX: find a proper way to calculate the static image dir
+                filepath = '../../..'
+                filepath = path.join(filepath,'src','kirbi','static',
+                                    'covers','large',filename)
+                # avoid duplicate downloads
+                if not path.exists(filepath): 
+                    deferred = client.getPage(url)
+                    deferred.addCallback(self.downloadedImage, filepath)
+                    deferred.addErrback(self.downloadError, url)
+                else:
+                    print 'skipping existing:', filepath
                 
         if KEEP_FILES:
             filename = '_'.join(isbns)+'.xml'
@@ -69,13 +77,9 @@
             out.close()
 
     
-    def downloadedImage(self, bytes, filename):
-        # XXX: find a proper way to calculate the static image dir
-        dest = '../../..'
-        dest = path.join(dest,'src','kirbi','static','covers','large'
-                            ,filename)
-        print 'saving: ', dest
-        out = file(dest, 'wb')
+    def downloadedImage(self, bytes, filepath):
+        print 'saving:', filepath
+        out = file(filepath, 'wb')
         out.write(bytes)
         out.close()
 
@@ -93,7 +97,7 @@
     xmlrpc_url = 'http://localhost:8080/RPC2'
     poll_method = 'dump_pending_isbns'
     callback = 'add_books'
-    fetcher = Fetch(xmlrpc_url, poll_method, callback, amazon.Source())
+    fetcher = Fetch(xmlrpc_url, poll_method, callback, AmazonSource())
     reactor.callLater(0, fetcher.poll)
     print 'reactor start'
     reactor.run()

Deleted: Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/source_amazon.py
===================================================================
--- Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/source_amazon.py	2007-08-06 19:06:58 UTC (rev 78638)
+++ Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/source_amazon.py	2007-08-06 19:40:22 UTC (rev 78639)
@@ -1,155 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-from zope.interface import implements
-from interfaces import IMetadataSource
-
-from lxml import etree
-from twisted.internet import reactor
-from twisted.web import xmlrpc, client
-
-from urllib import quote
-from time import sleep
-import sys
-from StringIO import StringIO
-
-
-from source_amazon_config import ACCESS_KEY_ID, ASSOCIATE_TAG
-
-"""
-Structure of the AmazonECS XML response:
-
-ItemLookupResponse
-    OperationRequest
-        (...)
-    Items
-        Request
-            IsValid
-            ItemLookupRequest
-                ItemId
-                ResponseGroup
-            (Errors)
-                (Error)
-                    (Code)
-                    (Message)
-        (Item)
-            (ItemAttributes)
-                (Author)
-                (Creator Role=...)
-
-Notes:
-- Errors element occurs when ISBN is non-existent;
-        in that case, Code contains the string "AWS.InvalidParameterValue"
-- Author element is not always present
-- Author element may be duplicated with the same content,
-        except for whitespace; for example: ISBN=0141000511
-"""
-
-FIELD_MAP = [
-    # Book schema -> Amazon ECS element path, relative to Item element
-    ('title', 'ItemAttributes/Title'),
-    ('isbn13', 'ItemAttributes/EAN'),
-    ('edition', 'ItemAttributes/Edition'),
-    ('publisher', 'ItemAttributes/Publisher'),
-    ('issued', 'ItemAttributes/PublicationDate'),
-    ('subjects', 'ItemAttributes/DeweyDecimalNumber'),
-    ('image_url', 'LargeImage/URL'),
-    ('source_url', 'DetailPageURL'),
-    ('source_item_id', 'ASIN'),
-    ]
-
-CREATOR_TAGS = ['ItemAttributes/Author', 'ItemAttributes/Creator']
-
-AMAZON_CODE_NO_MATCH = 'AWS.ECommerceService.NoExactMatches'
-
-class AmazonSource(object):
-    implements(IMetadataSource)
-    
-    name = 'amazon.com'
-    max_ids_per_request = 3
-
-
-    base_url = """http://ecs.amazonaws.com/onca/xml"""
-
-    def __init__(self):
-        self.base_params = { 'Service':'AWSECommerceService',
-                             'AWSAccessKeyId':ACCESS_KEY_ID,
-                             'AssociateTag': ASSOCIATE_TAG
-                           }
-        self.xml = ''
-        self.http_response = {}
-
-    def buildURL(self, **kw):
-        query = []
-        kw.update(self.base_params)
-        for key, val in kw.items():
-            query.append('%s=%s' % (key,quote(val)))
-        return self.base_url + '?' + '&'.join(query)
-
-    def buildItemLookupURL(self,itemId,response='ItemAttributes'):
-        params = {  'Operation':'ItemLookup',
-                    'ItemId':itemId,
-                    'ResponseGroup':response
-                 }
-        return self.buildURL(**params)
-
-    def buildItemSearchURL(self,query,response='ItemAttributes,Images'):
-        params = {  'Operation':'ItemSearch',
-                    'SearchIndex':'Books',
-                    'Power':query,
-                    'ResponseGroup':response
-                 }
-        return self.buildURL(**params)
-    
-    def buildMultipleBookDetailsURL(self, isbns):
-        query = 'isbn:' + ' or '.join(isbns)
-        return self.buildItemSearchURL(query)
-
-    def nsPath(self, *paths):
-        """Prepend namespace to each part of the path."""
-        parts = []
-        for path in paths:
-            parts.extend(path.split('/'))
-        return '/'.join([self.ns+part for part in parts])
-    
-    def parseMultipleBookDetails(self, xml):
-        xml = StringIO(xml)
-        tree = etree.parse(xml)
-        root = tree.getroot()
-        # get the XML namespace from the root tag
-        self.ns = root.tag.split('}')[0] + '}'
-        request = root.find(self.nsPath('Items/Request'))
-        error_code = request.findtext(self.nsPath('Errors/Error/Code'))
-        if error_code is None:
-            book_list = []
-            for item in root.findall(self.nsPath('Items/Item')):
-                book_dic = {}
-                for field, tag in FIELD_MAP:
-                    elem = item.find(self.nsPath(tag))
-                    if elem is not None:
-                        book_dic[field] = elem.text
-                creators = []
-                for tag in CREATOR_TAGS:
-                    for elem in item.findall(self.nsPath(tag)):
-                        if elem is None: continue
-                        role = elem.attrib.get('Role')
-                        if role:
-                            creator = '%s (%s)' % (elem.text, role)
-                        else:
-                            creator = elem.text
-                        creators.append(creator)
-                if creators:
-                    book_dic['creators'] = creators
-                if book_dic.get('subjects'):
-                    # subjects is a Tuple field
-                    book_dic['subjects'] = (book_dic['subjects'],)
-                book_dic['source'] = self.name
-
-                book_list.append(book_dic)
-            return book_list
-    
-        elif error_code == AMAZON_CODE_NO_MATCH:
-            return []
-        else:
-            raise EnvironmentError, error_code
-        

Deleted: Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/source_amazon_config.py
===================================================================
--- Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/source_amazon_config.py	2007-08-06 19:06:58 UTC (rev 78638)
+++ Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/source_amazon_config.py	2007-08-06 19:40:22 UTC (rev 78639)
@@ -1,9 +0,0 @@
-ASSOCIATE_TAG = 'circulante-20'
-ACCESS_KEY_ID = '13W2MMDG65QJJK9GG402'
-
-#UK
-#ASSOCIATE_TAG = 'circulante-21'
-#DE
-#ASSOCIATE_TAG = 'circulante0f-21'
-#FR
-#ASSOCIATE_TAG = 'circulante07-21'

Modified: Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/tests/dummy_server.py
===================================================================
--- Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/tests/dummy_server.py	2007-08-06 19:06:58 UTC (rev 78638)
+++ Sandbox/luciano/kirbi/kirbifetch/src/kirbifetch/tests/dummy_server.py	2007-08-06 19:40:22 UTC (rev 78639)
@@ -67,7 +67,7 @@
 
     xmlrpc.register_instance(srv)
 
-    print 'SimpleXMLRPCServer running on port %s...', PORT    
+    print 'SimpleXMLRPCServer running on port %s...' % PORT    
     xmlrpc.serve_forever()