[Checkins] SVN: relstorage/trunk/ Move Oracle blob support to use 1 chunk only per blob.

Martijn Pieters mj at zopatista.com
Fri Jun 10 10:53:04 EDT 2011


Log message for revision 121905:
  Move Oracle blob support to use 1 chunk only per blob.
  
  This maximizes read and write performance on Oracle systems, especially on Oracle 11g databases. Note that due to limitations in cx_Oracle the BLOB size is limited to 2GB on 32-bit systems, and 4GB on 64-bit systems.

Changed:
  U   relstorage/trunk/CHANGES.txt
  U   relstorage/trunk/README.txt
  U   relstorage/trunk/relstorage/adapters/mover.py

-=-
Modified: relstorage/trunk/CHANGES.txt
===================================================================
--- relstorage/trunk/CHANGES.txt	2011-06-09 15:38:03 UTC (rev 121904)
+++ relstorage/trunk/CHANGES.txt	2011-06-10 14:53:03 UTC (rev 121905)
@@ -11,6 +11,13 @@
 - Fix object reference downloading performance for large Oracle RelStorage
   database during the garbage collection phase of a pack.
 
+- Switch to storing the whole ZODB blob in one chunk on Oracle to maximize
+  blob reading and writing performance. Databases already using 1.5b2 or
+  before and storing blobs in an Oracle database are still supported.
+  
+  Note that due to limitations in the current release of cx_Oracle the maximum
+  BLOB size is limited to 2GB on 32-bit systems, and 4GB on 64-bit systems.
+
 1.5.0b2 (2011-03-02)
 --------------------
 

Modified: relstorage/trunk/README.txt
===================================================================
--- relstorage/trunk/README.txt	2011-06-09 15:38:03 UTC (rev 121904)
+++ relstorage/trunk/README.txt	2011-06-10 14:53:03 UTC (rev 121905)
@@ -126,6 +126,8 @@
     GRANT CONNECT, RESOURCE, CREATE TABLE, CREATE SEQUENCE TO zodb;
     GRANT EXECUTE ON DBMS_LOCK TO zodb;
 
+Note that due to limitations in cx_Oracle, the maximum ZODB blob size is
+either sys.maxint on 32-bit systems (2GB - 1 byte) or 4GB on 64-bit systems.
 
 Configuring Plone
 -----------------

Modified: relstorage/trunk/relstorage/adapters/mover.py
===================================================================
--- relstorage/trunk/relstorage/adapters/mover.py	2011-06-09 15:38:03 UTC (rev 121904)
+++ relstorage/trunk/relstorage/adapters/mover.py	2011-06-10 14:53:03 UTC (rev 121905)
@@ -22,6 +22,7 @@
 from relstorage.adapters.batch import PostgreSQLRowBatcher
 from zope.interface import implements
 import os
+import sys
 
 try:
     from hashlib import md5
@@ -1006,32 +1007,18 @@
             use_base64 = True
             stmt = stmt.replace(
                 "SELECT chunk", "SELECT encode(chunk, 'base64')")
-        elif self.database_name == 'oracle':
-            for n in (1, 2, 3):
-                stmt = stmt.replace('%s', ':%d' % n, 1)
 
         f = None
         bytes = 0
         try:
             chunk_num = 0
             while True:
-                if self.database_name == 'oracle':
-                    row = self.runner.run_lob_stmt(
-                        cursor, stmt, (oid, tid, chunk_num))
-                    if row is not None:
-                        chunk = row[0] or ''
-                    else:
-                        chunk = None
+                cursor.execute(stmt, (oid, tid, chunk_num))
+                rows = list(cursor)
+                if rows:
+                    assert len(rows) == 1
+                    chunk = rows[0][0]
                 else:
-                    cursor.execute(stmt, (oid, tid, chunk_num))
-                    rows = list(cursor)
-                    if rows:
-                        assert len(rows) == 1
-                        chunk = rows[0][0]
-                    else:
-                        chunk = None
-
-                if chunk is None:
                     # No more chunks.  Note: if there are no chunks at
                     # all, then this method should not write a file.
                     break
@@ -1055,11 +1042,71 @@
 
     mysql_download_blob = generic_download_blob
     postgresql_download_blob = generic_download_blob
-    oracle_download_blob = generic_download_blob
 
+    def oracle_download_blob(self, cursor, oid, tid, filename):
+        """Download a blob into a file."""
+        stmt = """
+        SELECT chunk
+        FROM blob_chunk
+        WHERE zoid = :1
+            AND tid = :2
+            AND chunk_num = :3
+        """
 
+        f = None
+        bytes = 0
+        chunk_num = 0
+        # XXX Current versions of cx_Oracle only support offsets up
+        # to sys.maxint or 4GB, whichever comes first.
+        maxsize = min(sys.maxint, 1<<32)
+        try:
+            # We still support the 1.5b2 case where we used chunks in
+            # the blob_chunk database; later versions no longer chunk blobs
+            # and there should be only one row with all the data in one blob.
+            while True:
+                cursor.execute(stmt, (oid, tid, chunk_num))
+                try:
+                    blob, = cursor.fetchone()
+                except TypeError:
+                    # No more chunks.  Note: if there are no chunks at
+                    # all, then this method should not write a file.
+                    break 
 
+                if f is None:
+                    f = open(filename, 'wb')
+                # round off the chunk-size to be a multiple of the oracle
+                # blob chunk size to maximize performance
+                chunk_size = int(max(round(
+                    1.0 * self.blob_chunk_size / blob.getchunksize()), 1) * 
+                    blob.getchunksize())
+                offset = 1 # Oracle still uses 1-based indexing.
+                while True:
+                    chunk = blob.read(offset, chunk_size)
+                    if chunk:
+                        f.write(chunk)
+                        bytes += len(chunk)
+                        offset += len(chunk)
+                        if offset > maxsize:
+                            # We have already read the maximum we can store
+                            # so we can assume we are done. If we do not break
+                            # off here, cx_Oracle will throw an overflow
+                            # exception anyway.
+                            break
+                    else:
+                        break
+                chunk_num += 1
+        except:
+            if f is not None:
+                f.close()
+                os.remove(filename)
+            raise
 
+        if f is not None:
+            f.close()
+        return bytes
+
+
+
     def generic_upload_blob(self, cursor, oid, tid, filename):
         """Upload a blob from a file.
 
@@ -1141,8 +1188,11 @@
             use_tid = True
             insert_stmt = """
             INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk)
-            VALUES (:oid, :tid, :chunk_num, :blobdata)
+            VALUES (:oid, :tid, 0, empty_blob())
             """
+            select_stmt = """
+            SELECT chunk FROM blob_chunk WHERE zoid=:oid AND tid=:tid
+            """
 
         else:
             use_tid = False
@@ -1151,27 +1201,37 @@
 
             insert_stmt = """
             INSERT INTO temp_blob_chunk (zoid, chunk_num, chunk)
-            VALUES (:oid, :chunk_num, :blobdata)
+            VALUES (:oid, 0, empty_blob())
             """
+            select_stmt = """
+            SELECT chunk FROM temp_blob_chunk WHERE zoid=:oid
+            """
 
         f = open(filename, 'rb')
+        blob = None
+        offset = 1 # Oracle still uses 1-based indexing.
+        # XXX Current versions of cx_Oracle only support offsets up
+        # to sys.maxint or 4GB, whichever comes first.
+        maxsize = min(sys.maxint, 1<<32)
         try:
-            chunk_num = 0
+            params = dict(oid=oid)
+            if use_tid:
+                params['tid'] = tid
+            cursor.execute(insert_stmt, params)
+            cursor.execute(select_stmt, params)
+            blob, = cursor.fetchone()
+            blob.open()
+            chunk_size = int(max(round(
+                1.0 * self.blob_chunk_size / blob.getchunksize()), 1) * 
+                blob.getchunksize())
             while True:
-                chunk = f.read(self.blob_chunk_size)
-                if not chunk and chunk_num > 0:
-                    # EOF.  Note that we always write at least one
-                    # chunk, even if the blob file is empty.
+                chunk = f.read(chunk_size)
+                if not blob.write(chunk, offset):
+                    # EOF.
                     break
-                params = {
-                    'oid': oid,
-                    'chunk_num': chunk_num,
-                    'blobdata': chunk,
-                }
-                if use_tid:
-                    params['tid'] = tid
-                cursor.setinputsizes(blobdata=self.inputsizes['blobdata'])
-                cursor.execute(insert_stmt, params)
-                chunk_num += 1
+                offset += len(chunk)
+                assert offset <= maxsize, "File too large"
         finally:
             f.close()
+            if blob is not None and blob.isopen():
+                blob.close()



More information about the checkins mailing list