[Checkins] SVN: relstorage/trunk/ 1. Valery Suhomlinov discovered a problem with non-ASCII data in transaction

Shane Hathaway shane at hathawaymix.org
Fri Feb 22 02:35:46 EST 2008


Log message for revision 84129:
  1. Valery Suhomlinov discovered a problem with non-ASCII data in transaction
     metadata.  The problem has been fixed for all supported databases.
  
  2. Made it possible to disable garbage collection during packing.
     Exposed the option in zope.conf.
  

Changed:
  U   relstorage/trunk/CHANGELOG.txt
  U   relstorage/trunk/notes/oracle_notes.txt
  U   relstorage/trunk/relstorage/adapters/common.py
  U   relstorage/trunk/relstorage/adapters/mysql.py
  U   relstorage/trunk/relstorage/adapters/oracle.py
  U   relstorage/trunk/relstorage/adapters/postgresql.py
  U   relstorage/trunk/relstorage/component.xml
  U   relstorage/trunk/relstorage/config.py
  U   relstorage/trunk/relstorage/relstorage.py
  U   relstorage/trunk/relstorage/tests/reltestbase.py

-=-
Modified: relstorage/trunk/CHANGELOG.txt
===================================================================
--- relstorage/trunk/CHANGELOG.txt	2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/CHANGELOG.txt	2008-02-22 07:35:44 UTC (rev 84129)
@@ -1,5 +1,5 @@
 
-relstorage 0.9
+relstorage 1.0 beta
 
 - Renamed to reflect expanding database support.
 
@@ -41,10 +41,18 @@
 - Implemented the storage iterator protocol, making it possible to copy
   transactions to and from FileStorage and other RelStorage instances.
 
-- Fixed a bug that caused OIDs to be reused after importing objects.
+- Fixed a bug that caused OIDs to be reused after importing transactions.
   Added a corresponding test.
 
+- Made it possible to disable garbage collection during packing.
+  Exposed the option in zope.conf.
 
+- Valery Suhomlinov discovered a problem with non-ASCII data in transaction
+  metadata.  The problem has been fixed for all supported databases.
+
+
+
+
 PGStorage 0.4
 
 - Began using the PostgreSQL LISTEN and NOTIFY statements as a shortcut

Modified: relstorage/trunk/notes/oracle_notes.txt
===================================================================
--- relstorage/trunk/notes/oracle_notes.txt	2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/notes/oracle_notes.txt	2008-02-22 07:35:44 UTC (rev 84129)
@@ -1,4 +1,8 @@
 
+If you are using Oracle 10g XE, use the "universal" version,
+since the smaller Western European version has minimal support
+for Unicode and will not pass all of the RelStorage tests.
+
 Docs:
     http://www.oracle.com/pls/db102/homepage
 
@@ -6,11 +10,18 @@
     http://www.davidpashley.com/articles/oracle-install.html
 
 Work around session limit (fixes ORA-12520):
-    ALTER SYSTEM SET PROCESSES=150 SCOPE=SPFILE
-    ALTER SYSTEM SET SESSIONS=150 SCOPE=SPFILE
+    ALTER SYSTEM SET PROCESSES=150 SCOPE=SPFILE;
+    ALTER SYSTEM SET SESSIONS=150 SCOPE=SPFILE;
     (then restart Oracle)
 
 Manually rollback an in-dispute transaction:
     select local_tran_id, state from DBA_2PC_PENDING;
     rollback force '$local_tran_id';
 
+It might be necessary to add the following lines to adapters/oracle.py,
+before all imports, to solve Oracle encoding issues.  (Let me know
+if you have to do this!)
+
+    import os
+    os.environ["NLS_LANG"] = ".AL32UTF8"
+

Modified: relstorage/trunk/relstorage/adapters/common.py
===================================================================
--- relstorage/trunk/relstorage/adapters/common.py	2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/adapters/common.py	2008-02-22 07:35:44 UTC (rev 84129)
@@ -20,6 +20,14 @@
 log = logging.getLogger("relstorage.adapters.common")
 
 
+# Notes about adapters:
+#
+# An adapter must not hold a connection, cursor, or database state, because
+# RelStorage opens multiple concurrent connections using a single adapter
+# instance.
+# Within the context of an adapter, all OID and TID values are integers,
+# not binary strings, except as noted.
+
 class Adapter(object):
     """Common code for a database adapter.
 
@@ -122,6 +130,18 @@
             self._run_script_stmt(cursor, stmt, params)
 
 
+    def _transaction_iterator(self, cursor):
+        """Iterate over a list of transactions returned from the database.
+
+        Each row begins with (tid, username, description, extension)
+        and may have other columns.
+
+        The default implementation returns the cursor unmodified.
+        Subclasses can override this.
+        """
+        return cursor
+
+
     def iter_transactions(self, cursor):
         """Iterate over the transaction log, newest first.
 
@@ -136,7 +156,7 @@
         ORDER BY tid DESC
         """
         self._run_script_stmt(cursor, stmt)
-        return iter(cursor)
+        return self._transaction_iterator(cursor)
 
 
     def iter_transactions_range(self, cursor, start=None, stop=None):
@@ -144,12 +164,11 @@
 
         Includes packed transactions.
         Yields (tid, packed, username, description, extension)
-            for each transaction.
+        for each transaction.
         """
         stmt = """
-        SELECT tid,
-            CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END,
-            username, description, extension
+        SELECT tid, username, description, extension,
+            CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END
         FROM transaction
         WHERE tid >= 0
         """
@@ -160,7 +179,7 @@
         stmt += " ORDER BY tid"
         self._run_script_stmt(cursor, stmt,
             {'min_tid': start, 'max_tid': stop})
-        return iter(cursor)
+        return self._transaction_iterator(cursor)
 
 
     def iter_object_history(self, cursor, oid):
@@ -186,7 +205,7 @@
         ORDER BY tid DESC
         """
         self._run_script_stmt(cursor, stmt, {'oid': oid})
-        return iter(cursor)
+        return self._transaction_iterator(cursor)
 
 
     def iter_objects(self, cursor, tid):
@@ -332,7 +351,7 @@
             self.close(conn, cursor)
 
 
-    def pre_pack(self, pack_tid, get_references, gc=True):
+    def pre_pack(self, pack_tid, get_references, gc):
         """Decide what to pack.
 
         Subclasses may override this.

Modified: relstorage/trunk/relstorage/adapters/mysql.py
===================================================================
--- relstorage/trunk/relstorage/adapters/mysql.py	2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/adapters/mysql.py	2008-02-22 07:35:44 UTC (rev 84129)
@@ -64,7 +64,9 @@
     """MySQL adapter for RelStorage."""
 
     def __init__(self, **params):
-        self._params = params
+        self._params = params.copy()
+        self._params['use_unicode'] = True
+        self._params['charset'] = 'utf8'
 
     def create_schema(self, cursor):
         """Create the database tables."""
@@ -76,7 +78,7 @@
             username    VARCHAR(255) NOT NULL,
             description TEXT NOT NULL,
             extension   BLOB
-        ) ENGINE = InnoDB;
+        ) ENGINE = InnoDB CHARACTER SET utf8;
 
         -- Create a special transaction to represent object creation.  This
         -- row is often referenced by object_state.prev_tid, but never by
@@ -97,7 +99,7 @@
             tid         BIGINT NOT NULL REFERENCES transaction,
             PRIMARY KEY (zoid, tid),
             prev_tid    BIGINT NOT NULL REFERENCES transaction,
-            md5         CHAR(32),
+            md5         CHAR(32) CHARACTER SET ascii,
             state       LONGBLOB,
             CHECK (tid > 0)
         ) ENGINE = InnoDB;
@@ -537,7 +539,7 @@
         cursor.execute(stmt)
 
 
-    def pre_pack(self, pack_tid, get_references, gc=True):
+    def pre_pack(self, pack_tid, get_references, gc):
         """Decide what to pack.
 
         This overrides the method by the same name in common.Adapter.

Modified: relstorage/trunk/relstorage/adapters/oracle.py
===================================================================
--- relstorage/trunk/relstorage/adapters/oracle.py	2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/adapters/oracle.py	2008-02-22 07:35:44 UTC (rev 84129)
@@ -105,8 +105,8 @@
         CREATE TABLE transaction (
             tid         NUMBER(20) NOT NULL PRIMARY KEY,
             packed      CHAR DEFAULT 'N' CHECK (packed IN ('N', 'Y')),
-            username    VARCHAR2(255),
-            description VARCHAR2(4000),
+            username    NVARCHAR2(255),
+            description NVARCHAR2(2000),
             extension   RAW(2000)
         );
 
@@ -512,9 +512,10 @@
             (tid, packed, username, description, extension)
         VALUES (:1, :2, :3, :4, :5)
         """
+        encoding = cursor.connection.encoding
         cursor.execute(stmt, (
-            tid, packed and 'Y' or 'N', username, description,
-            cx_Oracle.Binary(extension)))
+            tid, packed and 'Y' or 'N', username.encode(encoding),
+            description.encode(encoding), cx_Oracle.Binary(extension)))
 
     def detect_conflict(self, cursor):
         """Find one conflict in the data about to be committed.
@@ -618,6 +619,7 @@
         """Abort the commit.  If txn is not None, phase 1 is also aborted."""
         cursor.connection.rollback()
 
+
     def new_oid(self, cursor):
         """Return a new, unused OID."""
         stmt = "SELECT zoid_seq.nextval FROM DUAL"
@@ -625,6 +627,24 @@
         return cursor.fetchone()[0]
 
 
+    def _transaction_iterator(self, cursor):
+        """Iterate over a list of transactions returned from the database.
+
+        Each row begins with (tid, username, description, extension)
+        and may have other columns.
+
+        This overrides the default implementation.
+        """
+        encoding = cursor.connection.encoding
+        for row in cursor:
+            tid, username, description = row[:3]
+            if username is not None:
+                username = username.decode(encoding)
+            if description is not None:
+                description = description.decode(encoding)
+            yield (tid, username, description) + tuple(row[3:])
+
+
     def hold_pack_lock(self, cursor):
         """Try to acquire the pack lock.
 
@@ -720,4 +740,3 @@
     def __getitem__(self, key):
         self.used.add(key)
         return self.source[key]
-

Modified: relstorage/trunk/relstorage/adapters/postgresql.py
===================================================================
--- relstorage/trunk/relstorage/adapters/postgresql.py	2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/adapters/postgresql.py	2008-02-22 07:35:44 UTC (rev 84129)
@@ -22,15 +22,9 @@
 
 log = logging.getLogger("relstorage.adapters.postgresql")
 
+psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
 
-# Notes about adapters:
-#
-# An adapter must not hold a connection, cursor, or database state, because
-# RelStorage opens multiple concurrent connections using a single adapter
-# instance.
-# All OID and TID values are integers, not binary strings, except as noted.
 
-
 class PostgreSQLAdapter(Adapter):
     """PostgreSQL adapter for RelStorage."""
 
@@ -179,6 +173,7 @@
         """Open a database connection and return (conn, cursor)."""
         try:
             conn = psycopg2.connect(self._dsn)
+            conn.set_client_encoding('UNICODE')
             conn.set_isolation_level(isolation)
             cursor = conn.cursor()
             cursor.arraysize = 64

Modified: relstorage/trunk/relstorage/component.xml
===================================================================
--- relstorage/trunk/relstorage/component.xml	2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/component.xml	2008-02-22 07:35:44 UTC (rev 84129)
@@ -40,6 +40,21 @@
         the performance of servers with high write volume.
       </description>
     </key>
+    <key name="pack-gc" datatype="boolean" default="true">
+      <description>
+        If pack-gc is false, pack operations do not perform
+        garbage collection.  Garbage collection is enabled by default.
+
+        If garbage collection is disabled, pack operations keep at least one
+        revision of every object.  With garbage collection disabled, the
+        pack code does not need to follow object references, making
+        packing conceivably much faster.  However, some of that benefit
+        may be lost due to an ever increasing number of unused objects.
+
+        Disabling garbage collection is also a hack that ensures
+        inter-database references never break.
+      </description>
+    </key>
   </sectiontype>
 
   <sectiontype name="postgresql" implements="relstorage.adapter"

Modified: relstorage/trunk/relstorage/config.py
===================================================================
--- relstorage/trunk/relstorage/config.py	2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/config.py	2008-02-22 07:35:44 UTC (rev 84129)
@@ -24,7 +24,8 @@
         config = self.config
         adapter = config.adapter.open()
         return RelStorage(adapter, name=config.name, create=config.create,
-            read_only=config.read_only, poll_interval=config.poll_interval)
+            read_only=config.read_only, poll_interval=config.poll_interval,
+            pack_gc=config.pack_gc)
 
 
 class PostgreSQLAdapterFactory(BaseConfig):

Modified: relstorage/trunk/relstorage/relstorage.py
===================================================================
--- relstorage/trunk/relstorage/relstorage.py	2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/relstorage.py	2008-02-22 07:35:44 UTC (rev 84129)
@@ -43,7 +43,7 @@
     """Storage to a relational database, based on invalidation polling"""
 
     def __init__(self, adapter, name=None, create=True,
-            read_only=False, poll_interval=0):
+            read_only=False, poll_interval=0, pack_gc=True):
         if name is None:
             name = 'RelStorage on %s' % adapter.__class__.__name__
 
@@ -51,6 +51,7 @@
         self._name = name
         self._is_read_only = read_only
         self._poll_interval = poll_interval
+        self._pack_gc = pack_gc
 
         if create:
             self._adapter.prepare_schema()
@@ -667,6 +668,22 @@
             self._lock_release()
 
 
+    def set_pack_gc(self, pack_gc):
+        """Configures whether garbage collection during packing is enabled.
+
+        Garbage collection is enabled by default.  If GC is disabled,
+        packing keeps at least one revision of every object.
+        With GC disabled, the pack code does not need to follow object
+        references, making packing conceivably much faster.
+        However, some of that benefit may be lost due to an ever
+        increasing number of unused objects.
+
+        Disabling garbage collection is also a hack that ensures
+        inter-database references never break.
+        """
+        self._pack_gc = pack_gc
+
+
     def pack(self, t, referencesf):
         if self._is_read_only:
             raise POSException.ReadOnlyError()
@@ -700,7 +717,7 @@
                 # In pre_pack, the adapter fills tables with
                 # information about what to pack.  The adapter
                 # should not actually pack anything yet.
-                adapter.pre_pack(tid_int, get_references)
+                adapter.pre_pack(tid_int, get_references, self._pack_gc)
 
                 # Now pack.
                 adapter.pack(tid_int)
@@ -734,7 +751,7 @@
         # self._zodb_conn = zodb_conn
         RelStorage.__init__(self, adapter=parent._adapter, name=parent._name,
             create=False, read_only=parent._is_read_only,
-            poll_interval=parent._poll_interval)
+            poll_interval=parent._poll_interval, pack_gc=parent._pack_gc)
         # _prev_polled_tid contains the tid at the previous poll
         self._prev_polled_tid = None
         self._showed_disconnect = False
@@ -841,7 +858,7 @@
         else:
             stop_int = None
 
-        # _transactions: [(tid, packed, username, description, extension)]
+        # _transactions: [(tid, username, description, extension, packed)]
         self._transactions = list(adapter.iter_transactions_range(
             self._cursor, start_int, stop_int))
         self._index = 0
@@ -871,7 +888,7 @@
 
 class RecordIterator(object):
     """Iterate over the objects in a transaction."""
-    def __init__(self, trans_iter, tid_int, packed, user, desc, ext):
+    def __init__(self, trans_iter, tid_int, user, desc, ext, packed):
         self.tid = p64(tid_int)
         self.status = packed and 'p' or ' '
         self.user = user or ''

Modified: relstorage/trunk/relstorage/tests/reltestbase.py
===================================================================
--- relstorage/trunk/relstorage/tests/reltestbase.py	2008-02-22 01:53:13 UTC (rev 84128)
+++ relstorage/trunk/relstorage/tests/reltestbase.py	2008-02-22 07:35:44 UTC (rev 84129)
@@ -13,6 +13,7 @@
 ##############################################################################
 """A foundation for relstorage adapter tests"""
 
+import time
 import unittest
 from relstorage.relstorage import RelStorage
 
@@ -30,6 +31,7 @@
 
 from ZODB.tests.MinPO import MinPO
 from ZODB.tests.StorageTestBase import zodb_unpickle, zodb_pickle
+from ZODB.serialize import referencesf
 
 
 class BaseRelStorageTests(StorageTestBase.StorageTestBase):
@@ -371,7 +373,63 @@
 
         self.assertRaises(IndexError, iter.__getitem__, offset)
 
+    def checkNonASCIITransactionMetadata(self):
+        # Verify the database stores and retrieves non-ASCII text
+        # in transaction metadata.
+        db = DB(self._storage)
+        try:
+            c1 = db.open()
+            r1 = c1.root()
+            r1['alpha'] = 1
+            user = u"\u65e5\u672c\u8e86"
+            transaction.get().setUser(user)
+            transaction.commit()
+            r1['alpha'] = 2
+            desc = u"Japanese: \u65e5\u672c\u8e86"
+            transaction.get().note(desc)
+            transaction.commit()
 
+            info = self._storage.undoInfo()
+            self.assertEqual(info[0]['description'], desc)
+            self.assertEqual(info[1]['user_name'], '/ ' + user)
+        finally:
+            db.close()
+
+    def checkPackGC(self, gc_enabled=True):
+        db = DB(self._storage)
+        try:
+            c1 = db.open()
+            r1 = c1.root()
+            r1['alpha'] = PersistentMapping()
+            transaction.commit()
+
+            oid = r1['alpha']._p_oid
+            r1['alpha'] = None
+            transaction.commit()
+
+            # The object should still exist
+            self._storage.load(oid, '')
+
+            # Pack
+            now = packtime = time.time()
+            while packtime <= now:
+                packtime = time.time()
+            self._storage.pack(packtime, referencesf)
+
+            if gc_enabled:
+                # The object should now be gone
+                self.assertRaises(KeyError, self._storage.load, oid, '')
+            else:
+                # The object should still exist
+                self._storage.load(oid, '')
+        finally:
+            db.close()
+
+    def checkPackGCDisabled(self):
+        self._storage.set_pack_gc(False)
+        self.checkPackGC(gc_enabled=False)
+
+
 class IteratorDeepCompareUnordered:
     # Like IteratorDeepCompare, but compensates for OID order
     # differences in transactions.



More information about the Checkins mailing list