[Checkins] SVN: Sandbox/poster/zc.blist/trunk/src/zc/blist/ another edge case; more canary tests and debugging helpers.

Gary Poster gary at zope.com
Thu May 22 23:13:16 EDT 2008


Log message for revision 86912:
  another edge case; more canary tests and debugging helpers.

Changed:
  U   Sandbox/poster/zc.blist/trunk/src/zc/blist/__init__.py
  U   Sandbox/poster/zc.blist/trunk/src/zc/blist/testing.py
  U   Sandbox/poster/zc.blist/trunk/src/zc/blist/tests.py

-=-
Modified: Sandbox/poster/zc.blist/trunk/src/zc/blist/__init__.py
===================================================================
--- Sandbox/poster/zc.blist/trunk/src/zc/blist/__init__.py	2008-05-22 23:46:49 UTC (rev 86911)
+++ Sandbox/poster/zc.blist/trunk/src/zc/blist/__init__.py	2008-05-23 03:13:14 UTC (rev 86912)
@@ -642,7 +642,6 @@
                             del bucket[ix:stop_ix]
                         elif (stop_bucket_id != start_bucket_id and
                               bucket.previous is not None):
-                            # TODO this logic could maybe be moved earlier?
                             stop_bucket_id = bucket.previous
                         break
                     removed = bucket[ix:]
@@ -682,7 +681,7 @@
                 # B+ tree algorithm descriptions go
                 # one-item-at-a-time, while we may have swaths of
                 # changes to which we need to adjust.
-                #
+
                 # Key adjustments are different than the standard B+
                 # tree story because this is a sequence, and our keys
                 # are indices that we need to adjust to accomodate the
@@ -694,6 +693,7 @@
                 fill_minimum = fill_maximum // 2
 
                 original_stop_bucket_id = stop_bucket_id
+                check_next = False
 
                 while start_bucket_id is not None:
 
@@ -705,103 +705,177 @@
                     # anything in to; we'll want to look at it and the
                     # `stop_bucket` (if different) to see if we need to
                     # adjust.
-                    #
-                    # if bucket and stop_bucket are different and
-                    # stop_bucket is not empty and either are below the
-                    # fill_minimum...
-                        # if the combination is less than the fill_maximum,
-                        # put in bucket and empty stop_bucket
-                        # else redistribute across so both are above
-                        # fill_minimum
 
                     len_bucket = len(bucket)
-                    stop_bucket = self._mapping[stop_bucket_id]
-                    len_stop = len(stop_bucket)
-                    if (bucket is not stop_bucket and
-                        len_stop and (
-                            len_bucket < fill_minimum or
-                            len_stop < fill_minimum)):
-                        bucket = self._mutable(bucket)
-                        stop_bucket = self._mutable(stop_bucket)
-                        bucket.rotate(stop_bucket)
-                        len_bucket = len(bucket)
-                        len_stop = len(stop_bucket)
 
-                    # if (bucket is stop_bucket or stop_bucket is empty)
-                    # and bucket.previous is None and stop_bucket.next is
-                    # None, shortcut: just make sure this is the top
-                    # bucket and break.
-
-                    if ((bucket is stop_bucket or not len_stop) and
-                        bucket.previous is None and stop_bucket.next is None):
-                        bucket = self._mutable(bucket)
-                        if bucket.identifier != self._top_index:
-                            # get rid of unnecessary parents
-                            p = bucket
-                            while p.identifier != self._top_index:
-                                p = self._mapping[p.parent]
-                                if p.shared:
-                                    p.collections.remove(self)
-                                del self._mapping[p.identifier]
-                            # this is now the top of the tree
-                            self._top_index = bucket.identifier 
-                            bucket.parent = None
+                    # Usually we only care about two buckets here: ``bucket``
+                    # and ``stop_bucket``. However, the first thing we do here
+                    # is handle an exceptional case. We might be working with
+                    # an index row in which a lower row had to include three
+                    # buckets in the calculation. See where ``check_next =
+                    # True`` occurs (pretty far down) to understand the
+                    # condition.
+                    next = None
+                    if check_next:
+                        if bucket.next is None:
+                            check_next = False
                         else:
-                            assert bucket.parent is None
-                        bucket.next = None
-                        stop_bucket = stop_bucket_id = None
-                        break
+                            assert len_bucket >= fill_minimum
+                            next = self._mapping[bucket.next]
+                            if 0 < len(next) < fill_minimum:
+                                bucket = self._mutable(bucket)
+                                next = self._mutable(next)
+                                bucket.rotate(next)
+                                len_bucket = len(bucket) # reset; it has
+                                # changed if next needs to be removed from
+                                # parent, it will be done later (below)
+                                if not next: # (if empty)
+                                    # next's parent, if different than
+                                    # bucket's, should not have to be
+                                    # explicitly balanced in this way, because
+                                    # it is either empty (and will be removed)
+                                    # or the stop_bucket's parent (and will be
+                                    # balanced anyway).
+                                    check_next = False
+                            else:
+                                # We are not mutating next bucket, so next's
+                                # parent should not have to be balanced
+                                check_next = False
 
-                    # now these are the possible states:
-                    # - bucket is stop_bucket and is empty
-                    # - bucket is stop_bucket and is too small
-                    # - bucket is stop_bucket and is ok
-                    # - bucket and stop_bucket are both empty
-                    # - bucket is ok and stop_bucket is empty
-                    # - bucket is too small and stop_bucket is empty
-                    # - bucket is ok and stop_bucket is ok
-                    #
-                    # Therefore, 
-                    # - if the stop_bucket is ok or the bucket is empty,
-                    #   we're ok with this step, and can move on to
-                    #   adjusting the indexes and pointers.
-                    # - otherwise the bucket is too small, and there is
-                    #   another bucket to rotate with.  Find the bucket
-                    #   and adjust so that no non-empty buckets are
-                    #   beneath the fill_minimum.  Make sure to adjust the
-                    #   start_bucket or stop_bucket to include the altered
-                    #   bucket.
+                    stop_bucket = self._mapping[stop_bucket_id]
+                    len_stop = len(stop_bucket)
+                    if next:
+                        # We're still handing the exceptional case. ``next`` is
+                        # not None and not empty. This means that we are
+                        # working with indexes on this level. It also means
+                        # that bucket and next are above the minimum fill
+                        # level. The only thing we don't know is if the stop
+                        # bucket is not next and the stop bucket is not empty
+                        # and below the fill level. If that's the case, we need
+                        # to balance next and stop.
+                        if len_stop and len_stop < fill_minimum:
+                            assert stop_bucket_id != next.identifier
+                            next = self._mutable(next)
+                            next.rotate(stop_bucket)
+                            len_stop = len(stop_bucket)
+                        # now, bucket is not empty and above the minimum,
+                        # next is not empty and above the minimum, and
+                        # stop_bucket is either empty, or not empty and
+                        # above the minimum.
+                        assert len_bucket >= fill_minimum
+                        assert len(next) >= fill_minimum
+                        assert not len_stop or len_stop >= fill_minimum
+                        # this means we are balanced, and that we have two
+                        # buckets, so neither of them is the top bucket.
+                    else:
+                        # this is the regular code path.  We only need to look
+                        # at the start bucket and the stop bucket.
 
-                    if len_bucket < fill_minimum:
-                        previous = bucket.previous
-                        next = stop_bucket.next
-                        assert previous is not None or next is not None
-                        assert bucket is stop_bucket or not len_stop
-                        if next is not None:
-                            next = self._mapping[next]
-                        if (next is None or
-                            previous is not None and
-                            len(next) + len_bucket > fill_maximum):
-                            # work with previous
-                            previous = self._mutable(self._mapping[previous])
+                        # if bucket and stop_bucket are different and
+                        # stop_bucket is not empty and either are below the
+                        # fill_minimum...
+                            # if the combination is less than the fill_maximum,
+                            # put in bucket and empty stop_bucket
+                            # else redistribute across so both are above
+                            # fill_minimum
+                        if (bucket is not stop_bucket and
+                            len_stop and (
+                                len_bucket < fill_minimum or
+                                len_stop < fill_minimum)):
                             bucket = self._mutable(bucket)
-                            previous.rotate(bucket)
-                            if bucket.identifier == start_bucket_id:
-                                bucket = previous
-                                start_bucket_id = previous.identifier
-                            if not bucket:
-                                bucket = previous
-                                assert bucket
-                        else:
-                            # work with next
+                            stop_bucket = self._mutable(stop_bucket)
+                            bucket.rotate(stop_bucket)
+                            len_bucket = len(bucket)
+                            len_stop = len(stop_bucket)
+    
+                        # if (bucket is stop_bucket or stop_bucket is empty)
+                        # and bucket.previous is None and stop_bucket.next is
+                        # None, shortcut: just make sure this is the top
+                        # bucket and break.
+    
+                        if ((bucket is stop_bucket or not len_stop) and
+                            bucket.previous is None and
+                            stop_bucket.next is None):
                             bucket = self._mutable(bucket)
-                            next = self._mutable(next)
-                            bucket.rotateRight(next)
-                            stop_bucket_id = next.identifier
+                            if bucket.identifier != self._top_index:
+                                # get rid of unnecessary parents
+                                p = bucket
+                                while p.identifier != self._top_index:
+                                    p = self._mapping[p.parent]
+                                    if p.shared:
+                                        p.collections.remove(self)
+                                    del self._mapping[p.identifier]
+                                # this is now the top of the tree
+                                self._top_index = bucket.identifier 
+                                bucket.parent = None
+                            else:
+                                assert bucket.parent is None
+                            bucket.next = None
+                            stop_bucket = stop_bucket_id = None
+                            break
 
-                    # OK, now we need to adjust pointers and get rid of
-                    # empty buckets.  We'll go level-by-level.
+                        # now these are the possible states:
+                        # - bucket is stop_bucket and is empty
+                        # - bucket is stop_bucket and is too small
+                        # - bucket is stop_bucket and is ok
+                        # - bucket and stop_bucket are both empty
+                        # - bucket is ok and stop_bucket is empty
+                        # - bucket is too small and stop_bucket is empty
+                        # - bucket is ok and stop_bucket is ok
+                        #
+                        # Therefore, 
+                        # - if the stop_bucket is ok or the bucket is empty,
+                        #   we're ok with this step, and can move on to
+                        #   adjusting the indexes and pointers.
+                        # - otherwise the bucket is too small, and there is
+                        #   another bucket to rotate with.  Find the bucket
+                        #   and adjust so that no non-empty buckets are
+                        #   beneath the fill_minimum.  Make sure to adjust the
+                        #   start_bucket or stop_bucket to include the altered
+                        #   bucket.
+    
+                        if len_bucket < fill_minimum:
+                            previous = bucket.previous
+                            next = stop_bucket.next
+                            assert previous is not None or next is not None
+                            assert bucket is stop_bucket or not len_stop
+                            if next is not None:
+                                next = self._mapping[next]
+                            if (next is None or
+                                previous is not None and
+                                len(next) + len_bucket > fill_maximum):
+                                # work with previous
+                                previous = self._mutable(
+                                    self._mapping[previous])
+                                bucket = self._mutable(bucket)
+                                previous.rotate(bucket)
+                                if bucket.identifier == start_bucket_id:
+                                    if bucket:
+                                        # now bucket may have a parent that
+                                        # needs to be balanced that is *not*
+                                        # the parent of start bucket or of stop
+                                        # bucket. We'll check this the next
+                                        # time we go through this loop, with
+                                        # the parents of this level.
+                                        check_next = True
+                                    bucket = previous
+                                    start_bucket_id = previous.identifier
+                                if not bucket:
+                                    bucket = previous
+                                    assert bucket
+                            else:
+                                # work with next
+                                bucket = self._mutable(bucket)
+                                next = self._mutable(next)
+                                bucket.rotateRight(next)
+                                stop_bucket_id = next.identifier
 
+                    # We do this part of the code both in the exceptional
+                    # ``check_next`` case and in the usual case.
+
+                    # Now we need to adjust pointers and get rid of empty
+                    # buckets. We'll go level-by-level.
+
                     # we keep track of the bucket to reindex on separately from
                     # the start_index because we may have to move it to a
                     # previous bucket if the start_bucket is deleted to make

Modified: Sandbox/poster/zc.blist/trunk/src/zc/blist/testing.py
===================================================================
--- Sandbox/poster/zc.blist/trunk/src/zc/blist/testing.py	2008-05-22 23:46:49 UTC (rev 86911)
+++ Sandbox/poster/zc.blist/trunk/src/zc/blist/testing.py	2008-05-23 03:13:14 UTC (rev 86912)
@@ -1,3 +1,5 @@
+import random
+
 import zc.blist
 
 def checkIndex(ix, b, previous, previous_ix=0):
@@ -33,6 +35,7 @@
 def matches(b, result):
     assert list(b) == result, repr(list(b)) + ' != ' + repr(result)
     assert len(b) == len(result)
+    assert list(b[::-1]) == list(reversed(result))
     res = []
     bad = [(i, (b_i, r_i)) for (i, b_i, r_i) in
            ((i, b[i], result[i]) for i in range(len(b)))
@@ -73,3 +76,21 @@
                     two not in v.collections)
             diff.append((k, v, alt))
     return one_diff, two_diff, diff
+
+def RandomGenerator():
+    while 1:
+        yield random.randint(-sys.maxint, sys.maxint)
+
+def StringGenerator(src='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'):
+    "infinite-ish unique string generator"
+    for el in src:
+        yield el
+    for pre in StringGenerator(src):
+        for el in src:
+            yield pre + el
+
+def NumberGenerator(number=0, interval=1):
+    "infinite-ish unique number generator"
+    while 1:
+        yield number
+        number += interval
\ No newline at end of file

Modified: Sandbox/poster/zc.blist/trunk/src/zc/blist/tests.py
===================================================================
--- Sandbox/poster/zc.blist/trunk/src/zc/blist/tests.py	2008-05-22 23:46:49 UTC (rev 86911)
+++ Sandbox/poster/zc.blist/trunk/src/zc/blist/tests.py	2008-05-23 03:13:14 UTC (rev 86912)
@@ -12,6 +12,7 @@
 #
 ##############################################################################
 import os
+import pickle
 import unittest
 import random
 import sys
@@ -213,12 +214,16 @@
         [56, 7, 10]
     """
 
-class Canary(object):
-    def __init__(self, comp, blist = None):
+class AbstractCanary(object):
+    def __init__(self, comp, blist=None, generator=None):
+        if generator is None:
+            generator = zc.blist.testing.StringGenerator()
+        self.generator = generator
         if blist is None:
             blist = zc.blist.BList(comp)
         self.blist = blist
         self.comp = comp
+        zc.blist.testing.matches(self.blist, self.comp)
         self.ops = [getattr(self, n) for n in dir(self) if n.startswith('t_')]
         self.empty_ops = [getattr(self, n) for n in dir(self) if n.startswith('e_')]
         self.ops.extend(self.empty_ops)
@@ -235,18 +240,28 @@
         orig = self.blist.copy()
         orig_comp = self.comp[:]
         try:
-            zc.blist.testing.checkCopies(self.blist, orig) # initial
-            args = call()
+            zc.blist.testing.checkCopies(self.blist, orig) # pre
+            c, args = call()
+            c()
             zc.blist.testing.matches(self.blist, self.comp)
             zc.blist.testing.matches(orig, orig_comp)
             return zc.blist.testing.checkCopies(self.blist, orig) # post
         except Exception, e:
             traceback.print_exc()
             import pdb; pdb.post_mortem(sys.exc_info()[2])
+            orig.bad = self.blist
+            orig.args = args
+            for i in range(1000):
+                nm = 'bad_op_%d_%r.pickle' % (random.randint(1, 1000), call)
+                if not os.path.exists(nm):
+                    f = open(nm, 'w')
+                    pickle.dump(orig, f)
+                    f.close()
+                    break
             raise
 
     def _getval(self):
-        return random.randint(-sys.maxint, sys.maxint)
+        return self.generator.next()
 
     def _getloc(self, adjustment=-1):
         max = len(self.comp)+adjustment
@@ -293,120 +308,145 @@
     def _getvals(self):
         return [self._getval() for i in range(random.randint(1, 100))]
 
-    def e_append(self):
-        val = self._getval()
-        self.comp.append(val)
-        self.blist.append(val)
-        return (val,)
+class BigOperationCanary(AbstractCanary):
 
     def e_extend(self):
         new = self._getvals()
-        self.comp.extend(new)
-        self.blist.extend(new)
-        return (new,)
+        def test():
+            self.comp.extend(new)
+            self.blist.extend(new)
+        return test, (new,)
 
+    def t_delslice(self):
+        start, stop = self._get_start_stop()
+        def test():
+            del self.comp[start:stop]
+            del self.blist[start:stop]
+        return test, (start, stop)
+
+    def e_setitem_slice(self):
+        start, stop = self._get_start_stop()
+        vals = self._getvals()
+        def test():
+            self.comp[start:stop] = vals
+            self.blist[start:stop] = vals
+        return test, (start, stop, vals)
+
+    def e_setitem_slice_step(self):
+        start, stop, step = self._get_start_stop_step()
+        vals = [self._getval() for i in range(len(self.comp[start:stop:step]))]
+        def test():
+            self.comp[start:stop:step] = vals
+            self.blist[start:stop:step] = vals
+        return test, (start, stop, step, vals)
+
+    def t_delslice_step(self):
+        start, stop, step = self._get_start_stop_step()
+        def test():
+            del self.comp[start:stop:step]
+            del self.blist[start:stop:step]
+        return test, (start, stop, step)
+
+class Canary(BigOperationCanary):
+
+    def e_append(self):
+        val = self._getval()
+        def test():
+            self.comp.append(val)
+            self.blist.append(val)
+        return test, (val,)
+
     def e_iadd(self):
         new = self._getvals()
-        self.comp += new
-        self.blist += new
-        return (new,)
+        def test():
+            self.comp += new
+            self.blist += new
+        return test, (new,)
 
     def e_insert(self):
         val = self._getval()
         location = self._getloc(0) # can insert after last item
-        self.comp.insert(location, val)
-        self.blist.insert(location, val)
-        return location, val
+        def test():
+            self.comp.insert(location, val)
+            self.blist.insert(location, val)
+        return test, (location, val)
 
     def t_delitem(self):
         location = self._getloc()
-        del self.comp[location]
-        del self.blist[location]
-        return (location,)
+        def test():
+            del self.comp[location]
+            del self.blist[location]
+        return test, (location,)
 
-    def t_delslice(self):
-        start, stop = self._get_start_stop()
-        del self.comp[start:stop]
-        del self.blist[start:stop]
-        return start, stop
-
-    def t_delslice_step(self):
-        start, stop, step = self._get_start_stop_step()
-        del self.comp[start:stop:step]
-        del self.blist[start:stop:step]
-        return start, stop, step
-
     def e_delslice_noop(self):
         stop, start = self._get_start_stop()
-        del self.comp[start:stop]
-        del self.blist[start:stop]
-        return start, stop
+        def test():
+            del self.comp[start:stop]
+            del self.blist[start:stop]
+        return test, (start, stop)
 
     def e_delslice_step_noop(self):
         stop, start, step = self._get_start_stop_step()
-        del self.comp[start:stop:step]
-        del self.blist[start:stop:step]
-        return start, stop, step
+        def test():
+            del self.comp[start:stop:step]
+            del self.blist[start:stop:step]
+        return test, (start, stop, step)
 
     def t_pop(self):
         location = self._getloc()
-        assert self.comp.pop(location) == self.blist.pop(location)
-        return (location,)
+        def test():
+            assert self.comp.pop(location) == self.blist.pop(location)
+        return test, (location,)
 
     def t_remove(self):
         val = self.comp[self._getloc()]
-        self.comp.remove(val)
-        self.blist.remove(val)
-        return (val,)
+        def test():
+            self.comp.remove(val)
+            self.blist.remove(val)
+        return test, (val,)
 
     def t_reverse(self):
-        self.comp.reverse()
-        self.blist.reverse()
-        return ()
+        def test():
+            self.comp.reverse()
+            self.blist.reverse()
+        return test, ()
 
     def t_sort(self):
-        self.comp.sort()
-        self.blist.sort()
-        return ()
+        def test():
+            self.comp.sort()
+            self.blist.sort()
+        return test, ()
 
     def t_sort_cmp(self):
-        self.comp.sort(lambda s, o: cmp(str(o), str(s))) # reverse, by
-        self.blist.sort(lambda s, o: cmp(str(o), str(s))) # string
-        return ()
+        def test():
+            self.comp.sort(lambda s, o: cmp(str(o), str(s))) # reverse, by
+            self.blist.sort(lambda s, o: cmp(str(o), str(s))) # string
+        return test, ()
 
     def t_sort_key(self):
-        self.comp.sort(key=lambda v: str(v))
-        self.blist.sort(key=lambda v: str(v))
-        return ()
+        def test():
+            self.comp.sort(key=lambda v: str(v))
+            self.blist.sort(key=lambda v: str(v))
+        return test, ()
 
     def t_sort_reverse(self):
-        self.comp.sort(reverse = True)
-        self.blist.sort(reverse = True)
-        return ()
+        def test():
+            self.comp.sort(reverse = True)
+            self.blist.sort(reverse = True)
+        return test, ()
 
     def t_setitem(self):
         location = self._getloc()
         val = self._getval()
-        self.comp[location] = val
-        self.blist[location] = val
-        return location, val
+        def test():
+            self.comp[location] = val
+            self.blist[location] = val
+        return test, (location, val)
 
-    def e_setitem_slice(self):
-        start, stop = self._get_start_stop()
-        vals = self._getvals()
-        self.comp[start:stop] = vals
-        self.blist[start:stop] = vals
-        return start, stop, vals
 
-    def e_setitem_slice_step(self):
-        start, stop, step = self._get_start_stop_step()
-        vals = [self._getval() for i in range(len(self.comp[start:stop:step]))]
-        self.comp[start:stop:step] = vals
-        self.blist[start:stop:step] = vals
-        return start, stop, step, vals
 
 class CanaryTestCase(unittest.TestCase):
-    def test_empty_canary(self):
+    def test_canary(self):
         c = Canary([])
         c()
 
@@ -414,6 +454,29 @@
         c = Canary([], zc.blist.BList(bucket_size=5, index_size=4))
         c()
 
+    def test_big_canary(self):
+        for i in range(32):
+            c = Canary([], zc.blist.BList(bucket_size=5, index_size=4))
+            c(1000)
+
+    def test_several_small_canaries(self):
+        for i in range(128):
+            c = Canary([], zc.blist.BList(bucket_size=5, index_size=4))
+            c(10)
+
+    def test_big_bigop_canary(self):
+        for i in range(32):
+            c = BigOperationCanary(
+                [], zc.blist.BList(bucket_size=5, index_size=4))
+            c(1000)
+
+    def test_big_big_bigop_canary(self):
+        for i in range(32):
+            c = BigOperationCanary(
+                range(10000),
+                zc.blist.BList(range(10000), bucket_size=5, index_size=4))
+            c(2000)
+
 def test_suite():
     return unittest.TestSuite((
         doctest.DocFileSuite(



More information about the Checkins mailing list