[Zodb-checkins] SVN: ZODB/trunk/ Collector #1208: Infinite loop in cPickleCache.

Tim Peters tim.one at comcast.net
Wed May 19 18:23:20 EDT 2004


Log message for revision 24837:
Collector #1208:  Infinite loop in cPickleCache.

This fixes it, based on an approach suggested by Toby
Dickenson.  The triggering condition wasn't entirely
sane, so was very rare:  a persistent object with a
__del__ method that referenced an attribute of self.

scan_gc_items():  Look at persistent objects accessed
while this is running at most once.

New test checkMinimizeTerminates():  This spawns a thread
that will in fact run forever if you don't recompile
cPickleCache.c.  The test suite will keep running, but
checkMinimizeTerminates will fail, and all future calls
to cacheMinimize() will be effectively ignored (so other
bad things may happen as a result).


-=-
Modified: ZODB/trunk/NEWS.txt
===================================================================
--- ZODB/trunk/NEWS.txt	2004-05-19 21:52:56 UTC (rev 24836)
+++ ZODB/trunk/NEWS.txt	2004-05-19 22:23:20 UTC (rev 24837)
@@ -12,7 +12,18 @@
 Collector #1309:  The reference counts reported by DB.cacheExtremeDetails()
 for ghosts were one too small.
 
+Collector #1208:  Infinite loop in cPickleCache.
+If a persistent object had a __del__ method (probably not a good idea
+regardless, but we don't prevent it) that referenced an attribute of
+self, the code to deactivate objects in the cache could get into an
+infinite loop:  ghostifying the object could lead to calling its __del__
+method, the latter would load the object into cache again to
+satsify the attribute reference, the cache would again decide that
+the object should be ghostified, and so on.  The infinite loop no longer
+occurs, but note that objects of this kind still aren't sensible (they're
+effectively immortal).
 
+
 What's new in ZODB3 3.3 alpha 3
 ===============================
 Release date: 16-Apr-2004

Modified: ZODB/trunk/src/ZODB/tests/testCache.py
===================================================================
--- ZODB/trunk/src/ZODB/tests/testCache.py	2004-05-19 21:52:56 UTC (rev 24836)
+++ ZODB/trunk/src/ZODB/tests/testCache.py	2004-05-19 22:23:20 UTC (rev 24837)
@@ -21,6 +21,7 @@
 import gc
 import time
 import unittest
+import threading
 
 from persistent.cPickleCache import PickleCache
 from persistent.mapping import PersistentMapping
@@ -70,6 +71,20 @@
             o.value += 1
         transaction.commit()
 
+
+
+# CantGetRidOfMe is used by checkMinimizeTerminates.
+class CantGetRidOfMe(MinPO):
+    def __init__(self, value):
+        MinPO.__init__(self, value)
+        self.an_attribute = 42
+
+    def __del__(self):
+        # Referencing an attribute of self causes self to be
+        # loaded into the cache again, which also resurrects
+        # self.
+        self.an_attribute
+
 class DBMethods(CacheTestBase):
 
     __super_setUp = CacheTestBase.setUp
@@ -105,6 +120,49 @@
         new_size = self.db.cacheSize()
         self.assert_(new_size < old_size, "%s < %s" % (old_size, new_size))
 
+    def checkMinimizeTerminates(self):
+        # This is tricky.  cPickleCache had a case where it could get into
+        # an infinite loop, but we don't want the test suite to hang
+        # if this bug reappears.  So this test spawns a thread to run the
+        # dangerous operation, and the main thread complains if the worker
+        # thread hasn't finished in 30 seconds (arbitrary, but way more
+        # than enough).  In that case, the worker thread will continue
+        # running forever (until killed externally), but at least the
+        # test suite will move on.
+        #
+        # The bug was triggered by having a persistent object whose __del__
+        # method references an attribute of the object.  An attempt to
+        # ghostify such an object will clear the attribute, and if the
+        # cache also releases the last Python reference to the object then
+        # (due to ghostifying it), the __del__ method gets invoked.
+        # Referencing the attribute loads the object again, and also
+        # puts it back into the cPickleCache.  If the cache implementation
+        # isn't looking out for this, it can get into an infinite loop
+        # then, endlessly trying to ghostify an object that in turn keeps
+        # unghostifying itself again.
+        class Worker(threading.Thread):
+
+            def __init__(self, testcase):
+                threading.Thread.__init__(self)
+                self.testcase = testcase
+
+            def run(self):
+                conn = self.testcase.conns[0]
+                r = conn.root()
+                d = r[1]
+                for i in range(len(d)):
+                    d[i] = CantGetRidOfMe(i)
+                get_transaction().commit()
+
+                self.testcase.db.cacheMinimize()
+
+        w = Worker(self)
+        w.start()
+        w.join(30)
+        if w.isAlive():
+            self.fail("cacheMinimize still running after 30 seconds -- "
+                      "almost certainly in an infinite loop")
+
     # XXX don't have an explicit test for incrgc, because the
     # connection and database call it internally
 

Modified: ZODB/trunk/src/persistent/cPickleCache.c
===================================================================
--- ZODB/trunk/src/persistent/cPickleCache.c	2004-05-19 21:52:56 UTC (rev 24836)
+++ ZODB/trunk/src/persistent/cPickleCache.c	2004-05-19 22:23:20 UTC (rev 24837)
@@ -99,7 +99,12 @@
 #include <stddef.h>
 #undef Py_FindMethod
 
-static PyObject *py__p_oid, *py_reload, *py__p_jar, *py__p_changed;
+/* Python string objects to speed lookups; set by module init. */
+static PyObject *py__p_changed;
+static PyObject *py__p_deactivate;
+static PyObject *py__p_jar;
+static PyObject *py__p_oid;
+
 static cPersistenceCAPIstruct *capi;
 
 /* This object is the pickle cache.  The CACHE_HEAD macro guarantees
@@ -141,84 +146,102 @@
 #define OBJECT_FROM_RING(SELF, HERE) \
     ((cPersistentObject *)(((char *)here) - offsetof(cPersistentObject, ring)))
 
+/* Insert self into the ring, following after. */
+static void
+insert_after(CPersistentRing *self, CPersistentRing *after)
+{
+    assert(self != NULL);
+    assert(after != NULL);
+    self->r_prev = after;
+    self->r_next = after->r_next;
+    after->r_next->r_prev = self;
+    after->r_next = self;
+}
+
+/* Remove self from the ring. */
+static void
+unlink_from_ring(CPersistentRing *self)
+{
+    assert(self != NULL);
+    self->r_prev->r_next = self->r_next;
+    self->r_next->r_prev = self->r_prev;
+}
+
 static int
 scan_gc_items(ccobject *self, int target)
 {
     /* This function must only be called with the ring lock held,
-       because it places a non-object placeholder in the ring.
+       because it places non-object placeholders in the ring.
     */
-
     cPersistentObject *object;
-    CPersistentRing placeholder;
-    CPersistentRing *here = self->ring_home.r_next;
-    static PyObject *_p_deactivate;
+    CPersistentRing *here;
+    CPersistentRing before_original_home;
+    int result = -1;   /* guilty until proved innocent */
 
-    if (!_p_deactivate) {
-	_p_deactivate = PyString_InternFromString("_p_deactivate");
-	if (!_p_deactivate)
-	    return -1;
-    }
-
-    /* Scan through the ring until we either find the ring_home (i.e. start
-     * of the ring, or we've ghosted enough objects to reach the target
-     * size.
+    /* Scan the ring, from least to most recently used, deactivating
+     * up-to-date objects, until we either find the ring_home again or
+     * or we've ghosted enough objects to reach the target size.
+     * Tricky:  __getattr__ and __del__ methods can do anything, and in
+     * particular if we ghostify an object with a __del__ method, that method
+     * can load the object again, putting it back into the MRU part of the
+     * ring.  Waiting to find ring_home again can thus cause an infinite
+     * loop (Collector #1208).  So before_original_home records the MRU
+     * position we start with, and we stop the scan when we reach that.
      */
-    while (1) {
-	/* back to the home position. stop looking */
-        if (here == &self->ring_home)
-            return 0;
+    insert_after(&before_original_home, self->ring_home.r_prev);
+    here = self->ring_home.r_next;   /* least recently used object */
+    while (here != &before_original_home && self->non_ghost_count > target) {
+	assert(self->ring_lock);
+	assert(here != &self->ring_home);
 
         /* At this point we know that the ring only contains nodes
-	   from persistent objects, plus our own home node. We know
+	   from persistent objects, plus our own home node.  We know
 	   this because the ring lock is held.  We can safely assume
 	   the current ring node is a persistent object now we know it
 	   is not the home */
         object = OBJECT_FROM_RING(self, here);
-        if (!object)
-	    return -1;
 
-	/* we are small enough */
-        if (self->non_ghost_count <= target)
-            return 0;
-        else if (object->state == cPersistent_UPTODATE_STATE) {
-	    PyObject *meth, *error;
+        if (object->state == cPersistent_UPTODATE_STATE) {
+            CPersistentRing placeholder;
+            PyObject *method;
+            PyObject *temp;
+            int error_occurred = 0;
             /* deactivate it. This is the main memory saver. */
 
-            /* Add a placeholder; a dummy node in the ring.  We need
+            /* Add a placeholder, a dummy node in the ring.  We need
 	       to do this to mark our position in the ring.  It is
-	       possible that the PyObject_SetAttr() call below will
-	       invoke an __setattr__() hook in Python.  If it does,
-	       another thread might run; if that thread accesses a
-	       persistent object and moves it to the head of the ring,
-	       it might cause the gc scan to start working from the
-	       head of the list.
+	       possible that the PyObject_GetAttr() call below will
+	       invoke a __getattr__() hook in Python.  Also possible
+	       that deactivation will lead to a __del__ method call.
+	       So another thread might run, and mutate the ring as a side
+	       effect of object accesses.  There's no predicting then where
+	       in the ring here->next will point after that.  The
+	       placeholder won't move as a side effect of calling Python
+	       code.
 	    */
+            insert_after(&placeholder, here);
+	    method = PyObject_GetAttr((PyObject *)object, py__p_deactivate);
+	    if (method == NULL)
+	        error_occurred = 1;
+	    else {
+ 		temp = PyObject_CallObject(method, NULL);
+                Py_DECREF(method);
+	        if (temp == NULL)
+	            error_occurred = 1;
+	    }
 
-            placeholder.r_next = here->r_next;
-            placeholder.r_prev = here;
-            here->r_next->r_prev = &placeholder;
-            here->r_next = &placeholder;
-
-	    /* Call _p_deactivate(), which may be overridden. */
-	    meth = PyObject_GetAttr((PyObject *)object, _p_deactivate);
-	    if (!meth)
-		return -1;
-	    error = PyObject_CallObject(meth, NULL);
-	    Py_DECREF(meth);
-
-            /* unlink the placeholder */
-            placeholder.r_next->r_prev = placeholder.r_prev;
-            placeholder.r_prev->r_next = placeholder.r_next;
-
             here = placeholder.r_next;
-
-            if (!error)
-                return -1; /* problem */
-	    Py_DECREF(error);
+            unlink_from_ring(&placeholder);
+            if (error_occurred)
+                goto Done;
         }
         else
             here = here->r_next;
     }
+    result = 0;
+ Done:
+    unlink_from_ring(&before_original_home);
+    return result;
 }
 
 static PyObject *
@@ -247,7 +270,7 @@
 static PyObject *
 cc_incrgc(ccobject *self, PyObject *args)
 {
-    int obsolete_arg = -999; 
+    int obsolete_arg = -999;
     int starting_size = self->non_ghost_count;
     int target_size = self->cache_size;
 
@@ -328,7 +351,7 @@
     if (!v)
 	return;
     if (PyType_Check(v)) {
-        /* This looks wrong, but it isn't. We use strong references to types 
+        /* This looks wrong, but it isn't. We use strong references to types
            because they don't have the ring members.
 
            XXX the result is that we *never* remove classes unless
@@ -462,7 +485,7 @@
     if (l == NULL)
 	return NULL;
 
-    while (PyDict_Next(self->data, &p, &k, &v)) 
+    while (PyDict_Next(self->data, &p, &k, &v))
       {
         if (v->ob_refcnt <= 0)
           v = Py_BuildValue("Oi", k, v->ob_refcnt);
@@ -470,7 +493,7 @@
         else if (! PyType_Check(v) &&
                  (v->ob_type->tp_basicsize >= sizeof(cPersistentObject))
                  )
-          v = Py_BuildValue("Oisi", 
+          v = Py_BuildValue("Oisi",
                             k, v->ob_refcnt, v->ob_type->tp_name,
                             ((cPersistentObject*)v)->state);
         else
@@ -1082,10 +1105,18 @@
 	return;
     capi->percachedel = (percachedelfunc)cc_oid_unreferenced;
 
-    py_reload = PyString_InternFromString("reload");
+    py__p_changed = PyString_InternFromString("_p_changed");
+    if (!py__p_changed)
+        return;
+    py__p_deactivate = PyString_InternFromString("_p_deactivate");
+    if (!py__p_deactivate)
+        return;
     py__p_jar = PyString_InternFromString("_p_jar");
-    py__p_changed = PyString_InternFromString("_p_changed");
+    if (!py__p_jar)
+        return;
     py__p_oid = PyString_InternFromString("_p_oid");
+    if (!py__p_oid)
+        return;
 
     if (PyModule_AddStringConstant(m, "cache_variant", "stiff/c") < 0)
 	return;




More information about the Zodb-checkins mailing list