[Checkins] SVN: mongopersist/trunk/ - Feature: Whenever ``setattr()`` is called on a persistent object, it is

Stephen Richter cvs-admin at zope.org
Fri Mar 30 19:43:12 UTC 2012


Log message for revision 124823:
  - Feature: Whenever ``setattr()`` is called on a persistent object, it is
    marked as changed even if the new value equals the old one. To minimize
    writes to MongoDB, the latest database state is compared to the new state
    and the new state is only written when changes are detected. A flag called
    ``serialize.IGNORE_IDENTICAL_DOCUMENTS`` (default: ``True``) is used to
    control the feature. (Experimental)
  
  

Changed:
  U   mongopersist/trunk/CHANGES.txt
  U   mongopersist/trunk/src/mongopersist/conflict.py
  U   mongopersist/trunk/src/mongopersist/datamanager.py
  U   mongopersist/trunk/src/mongopersist/interfaces.py
  U   mongopersist/trunk/src/mongopersist/serialize.py
  U   mongopersist/trunk/src/mongopersist/tests/test_conflict.py
  U   mongopersist/trunk/src/mongopersist/tests/test_datamanager.py

-=-
Modified: mongopersist/trunk/CHANGES.txt
===================================================================
--- mongopersist/trunk/CHANGES.txt	2012-03-30 19:17:29 UTC (rev 124822)
+++ mongopersist/trunk/CHANGES.txt	2012-03-30 19:43:08 UTC (rev 124823)
@@ -5,8 +5,6 @@
 0.7.0 (2012-03-??)
 ------------------
 
-- Added transaction ID to LoggingDecorator
-
 - Feature: A new ``IConflictHandler`` interface now controls all aspects of
   conflict resolution. The following implementations are provided:
 
@@ -22,13 +20,20 @@
   * ``ResolvingSerialConflictHandler``: Another serial handler, but it has the
     ability to resolve a conflict. For this to happen, a persistent object
     must implement ``_p_resolveConflict(orig_state, cur_state, new_state)``,
-    which returns the new, merged state.
+    which returns the new, merged state. (Experimental)
 
   As a result, the ``detect_conflicts`` flag of the data manager was removed
   and replaced with the ``conflict_handler`` attribute. One can pass in the
   ``conflict_handler_factory`` to the data manager constructor. The factory
   needs to expect on argument, the data manager.
 
+- Feature: Whenever ``setattr()`` is called on a persistent object, it is
+  marked as changed even if the new value equals the old one. To minimize
+  writes to MongoDB, the latest database state is compared to the new state
+  and the new state is only written when changes are detected. A flag called
+  ``serialize.IGNORE_IDENTICAL_DOCUMENTS`` (default: ``True``) is used to
+  control the feature. (Experimental)
+
 - Feature: ``ConflictError`` has now a much more meaningful API. Instead of
   just referencing the object and different serials, it now actual has the
   original, current and new state documents.
@@ -40,6 +45,8 @@
 - Feature: Provide a flag to turn on MongoDB access logging. The flag is false
   by default, since access logging is very expensive.
 
+- Feature: Added transaction ID to LoggingDecorator.
+
 - Bug: We have seen several occasions in production where we suddenly lost
   some state in some documents, which prohibited the objects from being
   loadable again. The cause was that the ``_original_states`` attribute did not

Modified: mongopersist/trunk/src/mongopersist/conflict.py
===================================================================
--- mongopersist/trunk/src/mongopersist/conflict.py	2012-03-30 19:17:29 UTC (rev 124822)
+++ mongopersist/trunk/src/mongopersist/conflict.py	2012-03-30 19:43:08 UTC (rev 124823)
@@ -46,6 +46,9 @@
     def on_modified(self, obj):
         pass
 
+    def is_same(self, obj, orig_state, new_state):
+        return orig_state == new_state
+
     def has_conflicts(self, objs):
         return False
 
@@ -67,14 +70,25 @@
 
     def on_before_store(self, obj, state):
         state[self.field_name] = u64(getattr(obj, '_p_serial', 0)) + 1
-        obj._p_serial = p64(state[self.field_name])
+        # Do not set the object serial yet, since we might not decide to store
+        # after all.
 
     def on_after_store(self, obj, state):
-        pass
+        obj._p_serial = p64(state[self.field_name])
 
     def on_modified(self, obj):
         pass
 
+    def is_same(self, obj, orig_state, new_state):
+        if orig_state is None:
+            # This should never happen in a real running system.
+            return False
+        orig_state = orig_state.copy()
+        orig_state.pop(self.field_name)
+        new_state = new_state.copy()
+        new_state.pop(self.field_name)
+        return orig_state == new_state
+
     def resolve(self, obj, orig_doc, cur_doc, new_doc):
         raise NotImplementedError
 

Modified: mongopersist/trunk/src/mongopersist/datamanager.py
===================================================================
--- mongopersist/trunk/src/mongopersist/datamanager.py	2012-03-30 19:17:29 UTC (rev 124822)
+++ mongopersist/trunk/src/mongopersist/datamanager.py	2012-03-30 19:43:08 UTC (rev 124823)
@@ -185,7 +185,12 @@
         self._inserted_objects = []
         self._modified_objects = []
         self._removed_objects = []
+        # Keeps states as found at the beginning of the transaction.
         self._original_states = {}
+        # The latest states written to the database. This is different to the
+        # original states, since changes can be flushed to the database
+        # multiple times per transaction.
+        self._latest_states = {}
         self._needs_to_join = True
         self._object_cache = {}
         self.annotations = {}

Modified: mongopersist/trunk/src/mongopersist/interfaces.py
===================================================================
--- mongopersist/trunk/src/mongopersist/interfaces.py	2012-03-30 19:17:29 UTC (rev 124822)
+++ mongopersist/trunk/src/mongopersist/interfaces.py	2012-03-30 19:43:08 UTC (rev 124823)
@@ -81,6 +81,11 @@
     def on_modified(obj):
         """Method called when an object is registered as modified."""
 
+    def is_same(obj, orig_state, new_state):
+        """Compares two states of the object and determines whether they are
+        the same. It should only compare actual object fields and not any
+        meta-data fields."""
+
     def has_conflicts(objs):
         """Checks whether any of the passed in objects have conflicts.
 

Modified: mongopersist/trunk/src/mongopersist/serialize.py
===================================================================
--- mongopersist/trunk/src/mongopersist/serialize.py	2012-03-30 19:17:29 UTC (rev 124822)
+++ mongopersist/trunk/src/mongopersist/serialize.py	2012-03-30 19:43:08 UTC (rev 124823)
@@ -31,6 +31,8 @@
 SERIALIZERS = []
 OID_CLASS_LRU = lru.LRUCache(20000)
 
+IGNORE_IDENTICAL_DOCUMENTS = True
+
 def get_dotted_name(obj):
     return obj.__module__+'.'+obj.__name__
 
@@ -239,8 +241,10 @@
         # if needed.
         self._jar.conflict_handler.on_before_store(obj, doc)
 
+        stored = False
         if obj._p_oid is None:
             doc_id = coll.insert(doc)
+            stored = True
             obj._p_jar = self._jar
             obj._p_oid = pymongo.dbref.DBRef(coll_name, doc_id, db_name)
             # Make sure that any other code accessing this object in this
@@ -248,12 +252,24 @@
             self._jar._object_cache[doc_id] = obj
         else:
             doc['_id'] = obj._p_oid.id
-            coll.save(doc)
+            # We only want to store a new version of the document, if it is
+            # different. We have to delegate that task to the conflict
+            # handler, since it might know about meta-fields that need to be
+            # ignored.
+            orig_doc = self._jar._latest_states.get(obj._p_oid)
+            if (not IGNORE_IDENTICAL_DOCUMENTS or
+                not self._jar.conflict_handler.is_same(obj, orig_doc, doc)):
+                coll.save(doc)
+                stored = True
 
-        # A hook, so that the conflict handler can modify the object or state
-        # document after an object was stored.
-        self._jar.conflict_handler.on_after_store(obj, doc)
+        if stored:
+            # Make sure that the doc is added to the latest states.
+            self._jar._latest_states[obj._p_oid] = doc
 
+            # A hook, so that the conflict handler can modify the object or state
+            # document after an object was stored.
+            self._jar.conflict_handler.on_after_store(obj, doc)
+
         return obj._p_oid
 
 
@@ -406,9 +422,14 @@
         # Now store the original state. It is assumed that the state dict is
         # not modified later.
         # Make sure that we never set the original state multiple times, even
-        # if reassigning the state within the same transaction.
+        # if reassigning the state within the same transaction. Otherwise we
+        # can never fully undo a transaction.
         if obj._p_oid not in self._jar._original_states:
             self._jar._original_states[obj._p_oid] = doc
+            # Sometimes this method is called to update the object state
+            # before storage. Only update the latest states when the object is
+            # originally loaded.
+            self._jar._latest_states[obj._p_oid] = doc
         # Set the state.
         obj.__setstate__(state)
 

Modified: mongopersist/trunk/src/mongopersist/tests/test_conflict.py
===================================================================
--- mongopersist/trunk/src/mongopersist/tests/test_conflict.py	2012-03-30 19:17:29 UTC (rev 124822)
+++ mongopersist/trunk/src/mongopersist/tests/test_conflict.py	2012-03-30 19:43:08 UTC (rev 124823)
@@ -76,6 +76,14 @@
       >>> obj, state
       (<Foo 'one'>, {'name': 'one'})
 
+    There is a method that allows for comparing 2 states of a given
+    object. The method is used to detect whether objects really changed.
+
+      >>> handler.is_same(obj, {'name': 'one'}, {'name': 'one'})
+      True
+      >>> handler.is_same(obj, {'name': 'one'}, {'name': 'eins'})
+      False
+
     Let's check the conflict checking methods:
 
       >>> handler.has_conflicts([obj])
@@ -146,26 +154,50 @@
       >>> state
       {'name': 'one'}
 
-    Before the object state is stored in Mongo, we add the serial by taking
-    the current one and add 1 to it:
+    Before the object state is stored in Mongo, we add the serial to the
+    document by taking the current one and add 1 to it. Note that the object's
+    serial is not changed yet, since storing the document might still be
+    cancelled (for example by detecting that the DB state equals the new
+    state):
 
       >>> state = {'name': 'one'}
       >>> handler.on_before_store(obj, state)
+      >>> obj._p_serial
+      '\x00\x00\x00\x00\x00\x00\x00\x05'
       >>> state
       {'_py_serial': 6, 'name': 'one'}
 
-    The event handlers after store and on modification do not need to do
-    anything:
+    After the document was stored, we can safely update the object as well.
 
-      >>> state = {'name': 'one'}
       >>> handler.on_after_store(obj, state)
-      >>> obj, state
-      (<Foo 'one'>, {'name': 'one'})
+      >>> obj._p_serial
+      '\x00\x00\x00\x00\x00\x00\x00\x06'
+      >>> state
+      {'_py_serial': 6, 'name': 'one'}
 
+    The event handler on modification does not need to do anything:
+
       >>> handler.on_modified(obj)
       >>> obj
       <Foo 'one'>
 
+    There is a method that allows for comparing 2 states of a given
+    object. The method is used to detect whether objects really changed.
+
+      >>> handler.is_same(
+      ...     obj,
+      ...     {'name': 'one', '_py_serial': 1},
+      ...     {'name': 'one', '_py_serial': 2})
+      True
+      >>> handler.is_same(
+      ...     obj,
+      ...     {'name': 'one', '_py_serial': 1},
+      ...     {'name': 'eins', '_py_serial': 2})
+      False
+
+    As you can see, the serial number is omitted from the comparison, because
+    it does not represent part of the object state, but is state meta-data.
+
     Let's check the conflict checking methods now. Initially, there are no
     conflicts:
 

Modified: mongopersist/trunk/src/mongopersist/tests/test_datamanager.py
===================================================================
--- mongopersist/trunk/src/mongopersist/tests/test_datamanager.py	2012-03-30 19:17:29 UTC (rev 124822)
+++ mongopersist/trunk/src/mongopersist/tests/test_datamanager.py	2012-03-30 19:43:08 UTC (rev 124823)
@@ -201,6 +201,70 @@
       >>> foo._p_oid = foo2._p_oid
     """
 
+def doctest_MongoDataManager_dump_only_on_real_change():
+    r"""MongoDataManager: dump(): dump on real change only.
+
+    The data manager only writes data when we actually have a difference in
+    state.
+
+    We have to use a serial conflict handler, otherwise it is hard to check
+    whether data was written.
+
+      >>> dm.conflict_handler = conflict.SimpleSerialConflictHandler(dm)
+
+    Let's now add an object:
+
+      >>> foo = Foo('foo')
+      >>> foo_ref = dm.insert(foo)
+      >>> dm.tpc_finish(None)
+
+      >>> coll = dm._get_collection_from_object(foo)
+      >>> coll.find_one({})
+      {u'_id': ObjectId('...'), u'_py_serial': 1, u'name': u'foo'}
+
+    So the original state is in. Let's now modify an object:
+
+      >>> foo = dm.load(foo_ref)
+      >>> foo.name = 'Foo'
+      >>> foo._p_changed
+      True
+      >>> dm.tpc_finish(None)
+
+      >>> coll.find_one({})
+      {u'_id': ObjectId('...'), u'_py_serial': 2, u'name': u'Foo'}
+
+    If we now modify the object again, but write the same value, the state
+    should not be written to Mongo.
+
+      >>> foo = dm.load(foo_ref)
+      >>> foo.name = 'Foo'
+      >>> foo._p_changed
+      True
+      >>> dm.tpc_finish(None)
+
+      >>> coll.find_one({})
+      {u'_id': ObjectId('...'), u'_py_serial': 2, u'name': u'Foo'}
+
+    Let's make sure everything also works when we flush the transaction in the
+    middle.
+
+      >>> foo = dm.load(foo_ref)
+      >>> foo.name = 'fuh'
+      >>> dm.flush()
+      >>> coll.find_one({})
+      {u'_id': ObjectId('...'), u'_py_serial': 3, u'name': u'fuh'}
+
+      >>> foo._p_changed
+      False
+      >>> foo.name = 'fuh'
+      >>> foo._p_changed
+      True
+
+      >>> dm.tpc_finish(None)
+      >>> coll.find_one({})
+      {u'_id': ObjectId('...'), u'_py_serial': 3, u'name': u'fuh'}
+    """
+
 def doctest_MongoDataManager_flush():
     r"""MongoDataManager: flush()
 



More information about the checkins mailing list