[Checkins] SVN: relstorage/branches/postgres_blob_oid/relstorage/tests/ Complete large blob file testing.

Martijn Pieters mj at zopatista.com
Wed Jun 15 10:12:28 EDT 2011


Log message for revision 121949:
  Complete large blob file testing.
  
  This is a test that only runs when running the tests at level 2. On my lowly macbook, one iteration of this test against a local PostgreSQL takes 16 minutes.  To run these tests: bin/test -a 2 -s relstorage -t LargeBlobTest.

Changed:
  U   relstorage/branches/postgres_blob_oid/relstorage/tests/blob/testblob.py
  U   relstorage/branches/postgres_blob_oid/relstorage/tests/testmysql.py
  U   relstorage/branches/postgres_blob_oid/relstorage/tests/testoracle.py
  U   relstorage/branches/postgres_blob_oid/relstorage/tests/testpostgresql.py

-=-
Modified: relstorage/branches/postgres_blob_oid/relstorage/tests/blob/testblob.py
===================================================================
--- relstorage/branches/postgres_blob_oid/relstorage/tests/blob/testblob.py	2011-06-15 12:11:50 UTC (rev 121948)
+++ relstorage/branches/postgres_blob_oid/relstorage/tests/blob/testblob.py	2011-06-15 14:12:27 UTC (rev 121949)
@@ -18,6 +18,7 @@
 
 import atexit
 import collections
+import datetime
 import os
 import random
 import re
@@ -56,8 +57,8 @@
     return new_time
 
 
-def random_file(size, filename):
-    """Create a random data file of at least the given size.
+def random_file(size, fd):
+    """Create a random data of at least the given size, writing to fd.
 
     See http://jessenoller.com/2008/05/30/making-re-creatable-random-data-files-really-fast-in-python/
     for the technique used.
@@ -76,18 +77,31 @@
             a.rotate(int(b[0]))
             b.rotate(1)
     datagen = fdata()
-    output = open(filename, 'wb')
     bytes = 0
     md5sum = md5()
     while bytes < size:
         data = datagen.next()
         md5sum.update(data)
-        output.write(data)
+        fd.write(data)
         bytes += len(data)
-    output.close()
     return md5sum.hexdigest()
 
 
+def md5sum(fd):
+    md5sum = md5()
+    blocksize = md5sum.block_size << 8
+    for data in iter(lambda: fd.read(blocksize), ''):
+        md5sum.update(data)
+    return md5sum.hexdigest()
+
+
+def sizeof_fmt(num):
+    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
+        if num < 1024.0:
+            return "%3.1f%s" % (num, x)
+        num /= 1024.0
+
+
 class BlobTestBase(ZODB.tests.StorageTestBase.StorageTestBase):
 
     def setUp(self):
@@ -249,6 +263,47 @@
         self.compare(self._storage, self._dst)
     
 
+class LargeBlobTest(BlobTestBase):
+    """Test large blob upload and download.
+
+    Note that this test excercises the blob storage and only makes sense
+    when shared_blob_support=False.
+
+    """
+    level = 2 # Only run when selecting -a 2 or higher, or --all
+    testsize = 0 # Set on the auto-generated parent class
+
+    def _log(self, msg):
+        print '%s [%s]: %s' % (
+            datetime.datetime.now().isoformat(' '),
+            self.__class__.__name__, msg)
+
+    def testLargeBlob(self):
+        # Large blobs are chunked into multiple pieces, we want to know
+        # if they come out the same way they went in.
+        db = DB(self._storage)
+        conn = db.open()
+        blob = conn.root()[1] = ZODB.blob.Blob()
+        size = sizeof_fmt(self.testsize)
+        self._log('Creating %s blob file' % size)
+        signature = random_file(self.testsize, blob.open('w'))
+        self._log('Committing %s blob file' % size)
+        transaction.commit()
+
+        # Clear the cache
+        for base, dir, files in os.walk('.'):
+            for f in files:
+                if f.endswith('.blob'):
+                    ZODB.blob.remove_committed(os.path.join(base, f))
+
+        # Re-download blob
+        self._log('Caching %s blob file' % size)
+        conn = db.open()
+        blob = conn.root()[1].open('r')
+        self._log('Creating signature for %s blob cache' % size)
+        self.assertEqual(md5sum(blob), signature)
+
+
 def packing_with_uncommitted_data_non_undoing():
     """
     This covers regression for bug #130459.
@@ -534,6 +589,7 @@
                            keep_history=True,
                            pack_test_name='blob_packing.txt',
                            test_blob_cache=False,
+                           large_blob_size=None
                            ):
     """Return a test suite for a generic IBlobStorage.
 
@@ -578,10 +634,11 @@
             blob_dir = '%s.bobs' % name
         return factory(name, blob_dir, **kw)
 
-    def add_test_based_on_test_class(class_):
+    def add_test_based_on_test_class(class_, **attr):
+        attr.update(create_storage=create_storage)
         new_class = class_.__class__(
             prefix+class_.__name__, (class_, ),
-            dict(create_storage=create_storage),
+            attr,
             )
         suite.addTest(unittest.makeSuite(new_class))
 
@@ -589,6 +646,8 @@
         add_test_based_on_test_class(RecoveryBlobStorage)
     if test_undo:
         add_test_based_on_test_class(BlobUndoTests)
+    if large_blob_size:
+        add_test_based_on_test_class(LargeBlobTest, testsize=large_blob_size)
 
     suite.layer = MinimalTestLayer(prefix+'BlobTests')
 

Modified: relstorage/branches/postgres_blob_oid/relstorage/tests/testmysql.py
===================================================================
--- relstorage/branches/postgres_blob_oid/relstorage/tests/testmysql.py	2011-06-15 12:11:50 UTC (rev 121948)
+++ relstorage/branches/postgres_blob_oid/relstorage/tests/testmysql.py	2011-06-15 14:12:27 UTC (rev 121949)
@@ -202,6 +202,10 @@
                 else:
                     pack_test_name = 'blob_packing_history_free.txt'
 
+                # MySQL is limited to the blob_chunk_size as there is no
+                # native blob streaming support.
+                blob_size = Options().blob_chunk_size
+
                 suite.addTest(storage_reusable_suite(
                     prefix, create_storage,
                     test_blob_storage_recovery=True,
@@ -209,6 +213,7 @@
                     test_undo=keep_history,
                     pack_test_name=pack_test_name,
                     test_blob_cache=(not shared_blob_dir),
+                    large_blob_size=(not shared_blob_dir) and blob_size + 100
                 ))
 
     return suite

Modified: relstorage/branches/postgres_blob_oid/relstorage/tests/testoracle.py
===================================================================
--- relstorage/branches/postgres_blob_oid/relstorage/tests/testoracle.py	2011-06-15 12:11:50 UTC (rev 121948)
+++ relstorage/branches/postgres_blob_oid/relstorage/tests/testoracle.py	2011-06-15 14:12:27 UTC (rev 121949)
@@ -22,6 +22,7 @@
 from relstorage.tests.hptestbase import HistoryPreservingToFileStorage
 import logging
 import os
+import sys
 import unittest
 
 
@@ -211,6 +212,10 @@
                 else:
                     pack_test_name = 'blob_packing_history_free.txt'
 
+                # cx_Oracle blob support can only address up to sys.maxint on
+                # 32-bit systems, 4GB otherwise.
+                blob_size = min(sys.maxint, 1<<32)
+
                 suite.addTest(storage_reusable_suite(
                     prefix, create_storage,
                     test_blob_storage_recovery=True,
@@ -218,6 +223,7 @@
                     test_undo=keep_history,
                     pack_test_name=pack_test_name,
                     test_blob_cache=(not shared_blob_dir),
+                    large_blob_size=(not shared_blob_dir) and blob_size + 100,
                 ))
 
     return suite

Modified: relstorage/branches/postgres_blob_oid/relstorage/tests/testpostgresql.py
===================================================================
--- relstorage/branches/postgres_blob_oid/relstorage/tests/testpostgresql.py	2011-06-15 12:11:50 UTC (rev 121948)
+++ relstorage/branches/postgres_blob_oid/relstorage/tests/testpostgresql.py	2011-06-15 14:12:27 UTC (rev 121949)
@@ -203,6 +203,8 @@
                     test_undo=keep_history,
                     pack_test_name=pack_test_name,
                     test_blob_cache=(not shared_blob_dir),
+                    # PostgreSQL blob chunks are max 2GB in size
+                    large_blob_size=(not shared_blob_dir) and (1<<31) + 100,
                 ))
 
     return suite



More information about the checkins mailing list