[Checkins] SVN: ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py Removed unused and probably non-functional simulation classes.

Jim Fulton jim at zope.com
Fri Jun 4 11:36:43 EDT 2010


Log message for revision 113118:
  Removed unused and probably non-functional simulation classes.
  

Changed:
  U   ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py

-=-
Modified: ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py
===================================================================
--- ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py	2010-06-04 15:36:24 UTC (rev 113117)
+++ ZODB/branches/jim-simulation/src/ZEO/scripts/cache_trace_simulation.py	2010-06-04 15:36:43 UTC (rev 113118)
@@ -40,35 +40,15 @@
     cachelimit = 20*MB
     simclass = CircularCacheSimulation
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "bflyz2cOaTUs:")
+        opts, args = getopt.getopt(sys.argv[1:], "cs:")
     except getopt.error, msg:
         usage(msg)
         return 2
     for o, a in opts:
-        if o == '-b':
-            simclass = BuddyCacheSimulation
-        elif o == '-f':
-            simclass = SimpleCacheSimulation
-        elif o == '-l':
-            simclass = LRUCacheSimulation
-        elif o == '-y':
-            simclass = AltZEOCacheSimulation
-        elif o == '-z':
-            simclass = ZEOCacheSimulation
-        elif o == '-s':
+        if o == '-s':
             cachelimit = int(float(a)*MB)
-        elif o == '-2':
-            simclass = TwoQSimluation
         elif o == '-c':
             simclass = CircularCacheSimulation
-        elif o == '-O':
-            simclass = OracleSimulation
-        elif o == '-a':
-            simclass = ARCCacheSimulation
-        elif o == '-T':
-            simclass = ThorSimulation
-        elif o == '-U':
-            simclass = UnboundedSimulation
         else:
             assert False, (o, a)
 
@@ -101,11 +81,7 @@
             print >> sys.stderr, "can't open %s: %s" % (filename, msg)
             return 1
 
-    # Create simulation object.
-    if simclass is OracleSimulation:
-        sim = simclass(cachelimit, filename)
-    else:
-        sim = simclass(cachelimit)
+    sim = simclass(cachelimit)
 
     # Print output header.
     sim.printheader()
@@ -530,1000 +506,6 @@
             v = self.filemap[k]
             print k, v[0], repr(v[1])
 
-#############################################################################
-# CAUTION:  It's most likely that none of the simulators below this
-# point work anymore.  A great many changes were needed to teach
-# CircularCacheSimulation (above) about MVCC, including method signature
-# changes and changes in cache file format, and none of the other simulator
-# classes were changed.
-#############################################################################
-
-class ZEOCacheSimulation(Simulation):
-    """Simulate the ZEO 1.0 and 2.0 cache behavior.
-
-    This assumes the cache is not persistent (we don't know how to
-    simulate cache validation.)
-    """
-
-    extraname = "flips"
-
-    def __init__(self, cachelimit):
-        # Initialize base class
-        Simulation.__init__(self, cachelimit)
-        # Initialize additional global statistics
-        self.total_flips = 0
-
-    def restart(self):
-        # Reset base class
-        Simulation.restart(self)
-        # Reset additional per-run statistics
-        self.flips = 0
-        # Set up simulation
-        self.filesize = [4, 4] # account for magic number
-        self.fileoids = [{}, {}]
-        self.current = 0 # index into filesize, fileoids
-
-    def load(self, oid, size):
-        if (self.fileoids[self.current].get(oid) or
-            self.fileoids[1 - self.current].get(oid)):
-            self.hits += 1
-            self.total_hits += 1
-        else:
-            self.write(oid, size)
-
-    def write(self, oid, size):
-        # Fudge because size is rounded up to multiples of 256.  (31
-        # is header overhead per cache record; 127 is to compensate
-        # for rounding up to multiples of 256.)
-        size = size + 31 - 127
-        if self.filesize[self.current] + size > self.cachelimit / 2:
-            # Cache flip
-            self.flips += 1
-            self.total_flips += 1
-            self.current = 1 - self.current
-            self.filesize[self.current] = 4
-            self.fileoids[self.current] = {}
-        self.filesize[self.current] += size
-        self.fileoids[self.current][oid] = 1
-
-    def inval(self, oid):
-        if self.fileoids[self.current].get(oid):
-            self.invals += 1
-            self.total_invals += 1
-            del self.fileoids[self.current][oid]
-        elif self.fileoids[1 - self.current].get(oid):
-            self.invals += 1
-            self.total_invals += 1
-            del self.fileoids[1 - self.current][oid]
-
-class AltZEOCacheSimulation(ZEOCacheSimulation):
-    """A variation of the ZEO cache that copies to the current file.
-
-    When a hit is found in the non-current cache file, it is copied to
-    the current cache file.  Exception: when the copy would cause a
-    cache flip, we don't copy (this is part laziness, part concern
-    over causing extraneous flips).
-    """
-
-    def load(self, oid, size):
-        if self.fileoids[self.current].get(oid):
-            self.hits += 1
-            self.total_hits += 1
-        elif self.fileoids[1 - self.current].get(oid):
-            self.hits += 1
-            self.total_hits += 1
-            # Simulate a write, unless it would cause a flip
-            size = size + 31 - 127
-            if self.filesize[self.current] + size <= self.cachelimit / 2:
-                self.filesize[self.current] += size
-                self.fileoids[self.current][oid] = 1
-                del self.fileoids[1 - self.current][oid]
-        else:
-            self.write(oid, size)
-
-class LRUCacheSimulation(Simulation):
-
-    extraname = "evicts"
-
-    def __init__(self, cachelimit):
-        # Initialize base class
-        Simulation.__init__(self, cachelimit)
-        # Initialize additional global statistics
-        self.total_evicts = 0
-
-    def restart(self):
-        # Reset base class
-        Simulation.restart(self)
-        # Reset additional per-run statistics
-        self.evicts = 0
-        # Set up simulation
-        self.cache = {}
-        self.size = 0
-        self.head = Node(None, None)
-        self.head.linkbefore(self.head)
-
-    def load(self, oid, size):
-        node = self.cache.get(oid)
-        if node is not None:
-            self.hits += 1
-            self.total_hits += 1
-            node.linkbefore(self.head)
-        else:
-            self.write(oid, size)
-
-    def write(self, oid, size):
-        node = self.cache.get(oid)
-        if node is not None:
-            node.unlink()
-            assert self.head.next is not None
-            self.size -= node.size
-        node = Node(oid, size)
-        self.cache[oid] = node
-        node.linkbefore(self.head)
-        self.size += size
-        # Evict LRU nodes
-        while self.size > self.cachelimit:
-            self.evicts += 1
-            self.total_evicts += 1
-            node = self.head.next
-            assert node is not self.head
-            node.unlink()
-            assert self.head.next is not None
-            del self.cache[node.oid]
-            self.size -= node.size
-
-    def inval(self, oid):
-        node = self.cache.get(oid)
-        if node is not None:
-            assert node.oid == oid
-            self.invals += 1
-            self.total_invals += 1
-            node.unlink()
-            assert self.head.next is not None
-            del self.cache[oid]
-            self.size -= node.size
-            assert self.size >= 0
-
-class Node(object):
-    """Node in a doubly-linked list, storing oid and size as payload.
-
-    A node can be linked or unlinked; in the latter case, next and
-    prev are None.  Initially a node is unlinked.
-    """
-
-    __slots__ = ['prev', 'next', 'oid', 'size']
-
-    def __init__(self, oid, size):
-        self.oid = oid
-        self.size = size
-        self.prev = self.next = None
-
-    def unlink(self):
-        prev = self.prev
-        next = self.next
-        if prev is not None:
-            assert next is not None
-            assert prev.next is self
-            assert next.prev is self
-            prev.next = next
-            next.prev = prev
-            self.prev = self.next = None
-        else:
-            assert next is None
-
-    def linkbefore(self, next):
-        self.unlink()
-        prev = next.prev
-        if prev is None:
-            assert next.next is None
-            prev = next
-        self.prev = prev
-        self.next = next
-        prev.next = next.prev = self
-
-am = object()
-a1in = object()
-a1out = object()
-
-class Node2Q(Node):
-
-    __slots__ = ["kind", "hits"]
-
-    def __init__(self, oid, size, kind=None):
-        Node.__init__(self, oid, size)
-        self.kind = kind
-        self.hits = 0
-
-    def linkbefore(self, next):
-        if next.kind != self.kind:
-            self.kind = next.kind
-        Node.linkbefore(self, next)
-
-class TwoQSimluation(Simulation):
-    # The original 2Q algorithm is page based and the authors offer
-    # tuning guidlines based on a page-based cache.  Our cache is
-    # object based, so, for example, it's hard to compute the number
-    # of oids to store in a1out based on the size of a1in.
-
-    extras = "evicts", "hothit", "am_add"
-
-    NodeClass = Node2Q
-
-    def __init__(self, cachelimit, outlen=10000, threshold=0):
-        Simulation.__init__(self, cachelimit)
-
-        # The promotion threshold: If a hit occurs in a1out, it is
-        # promoted to am if the number of hits on the object while it
-        # was in a1in is at least threshold.  The standard 2Q scheme
-        # uses a threshold of 0.
-        self.threshold = threshold
-        self.am_limit = 3 * self.cachelimit / 4
-        self.a1in_limit = self.cachelimit / 4
-
-        self.cache = {}
-        self.am_size = 0
-        self.a1in_size = 0
-        self.a1out_size = 0
-
-        self.total_evicts = 0
-        self.total_hothit = 0
-        self.total_am_add = 0
-        self.a1out_limit = outlen
-
-        # An LRU queue of hot objects
-        self.am = self.NodeClass(None, None, am)
-        self.am.linkbefore(self.am)
-        # A FIFO queue of recently referenced objects.  It's purpose
-        # is to absorb references to objects that are accessed a few
-        # times in short order, then forgotten about.
-        self.a1in = self.NodeClass(None, None, a1in)
-        self.a1in.linkbefore(self.a1in)
-        # A FIFO queue of recently reference oids.
-        # This queue only stores the oids, not any data.  If we get a
-        # hit in this queue, promote the object to am.
-        self.a1out = self.NodeClass(None, None, a1out)
-        self.a1out.linkbefore(self.a1out)
-
-    def makespace(self, size):
-        for space in 0, size:
-            if self.enoughspace(size):
-                return
-            self.evict_a1in(space)
-            if self.enoughspace(size):
-                return
-            self.evict_am(space)
-
-    def enoughspace(self, size):
-        totalsize = self.a1in_size + self.am_size
-        return totalsize + size < self.cachelimit
-
-    def evict_a1in(self, extra):
-        while self.a1in_size + extra > self.a1in_limit:
-            if self.a1in.next is self.a1in:
-                return
-            assert self.a1in.next is not None
-            node = self.a1in.next
-            self.evicts += 1
-            self.total_evicts += 1
-            node.linkbefore(self.a1out)
-            self.a1out_size += 1
-            self.a1in_size -= node.size
-            if self.a1out_size > self.a1out_limit:
-                assert self.a1out.next is not None
-                node = self.a1out.next
-                node.unlink()
-                del self.cache[node.oid]
-                self.a1out_size -= 1
-
-    def evict_am(self, extra):
-        while self.am_size + extra > self.am_limit:
-            if self.am.next is self.am:
-                return
-            assert self.am.next is not None
-            node = self.am.next
-            self.evicts += 1
-            self.total_evicts += 1
-            # This node hasn't been accessed in a while, so just
-            # forget about it.
-            node.unlink()
-            del self.cache[node.oid]
-            self.am_size -= node.size
-
-    def write(self, oid, size):
-        # A write always follows a read (ZODB doesn't allow blind writes).
-        # So this write must have followed a recent read of the object.
-        # Don't change it's position, but do update the size.
-
-        # XXX For now, don't evict pages if the new version of the object
-        # is big enough to require eviction.
-        node = self.cache.get(oid)
-        if node is None or node.kind is a1out:
-            return
-        if node.kind is am:
-            self.am_size = self.am_size - node.size + size
-            node.size = size
-        else:
-            self.a1in_size = self.a1in_size - node.size + size
-            node.size = size
-
-    def load(self, oid, size):
-        node = self.cache.get(oid)
-        if node is not None:
-            if node.kind is am:
-                self.hits += 1
-                self.total_hits += 1
-                self.hothit += 1
-                self.total_hothit += 1
-                node.hits += 1
-                node.linkbefore(self.am)
-            elif node.kind is a1in:
-                self.hits += 1
-                self.total_hits += 1
-                node.hits += 1
-            elif node.kind is a1out:
-                self.a1out_size -= 1
-                if node.hits >= self.threshold:
-                    self.makespace(node.size)
-                    self.am_size += node.size
-                    node.linkbefore(self.am)
-                    self.cache[oid] = node
-                    self.am_add += 1
-                    self.total_am_add += 1
-                else:
-                    node.unlink()
-                    self.insert(oid, size)
-        else:
-            self.insert(oid, size)
-
-    def insert(self, oid, size):
-        # New objects enter the cache via a1in.  If they
-        # are frequently used over a long enough time, they
-        # will be promoted to am -- but only via a1out.
-        self.makespace(size)
-        node = self.NodeClass(oid, size, a1in)
-        node.linkbefore(self.a1in)
-        self.cache[oid] = node
-        self.a1in_size += node.size
-
-    def inval(self, oid):
-        # The original 2Q algorithm didn't have to deal with
-        # invalidations.  My own solution: Move it to the head of
-        # a1out.
-        node = self.cache.get(oid)
-        if node is None:
-            return
-        self.invals += 1
-        self.total_invals += 1
-        # XXX Should an invalidation to a1out count?
-        if node.kind is a1out:
-            return
-        node.linkbefore(self.a1out)
-        if node.kind is am:
-            self.am_size -= node.size
-        else:
-            self.a1in_size -= node.size
-
-    def restart(self):
-        Simulation.restart(self)
-
-        self.evicts = 0
-        self.hothit = 0
-        self.am_add = 0
-
-lruT = object()
-lruB = object()
-fifoT = object()
-fifoB = object()
-
-class ARCCacheSimulation(Simulation):
-
-    # Based on the paper ARC: A Self-Tuning, Low Overhead Replacement
-    # Cache by Nimrod Megiddo and Dharmendra S. Modha, USENIX FAST
-    # 2003.  The paper describes a block-based cache.  A lot of the
-    # details need to be fiddled to work with an object-based cache.
-    # For size issues, the key insight ended up being conditions
-    # A.1-A.5 rather than the details of the algorithm in Fig. 4.
-
-    extras = "lruThits", "evicts", "p"
-
-    def __init__(self, cachelimit):
-        Simulation.__init__(self, cachelimit)
-        # There are two pairs of linked lists.  Each pair has a top and
-        # bottom half.  The bottom half contains metadata, but not actual
-        # objects.
-
-        # LRU list of frequently used objects
-        self.lruT = Node2Q(None, None, lruT)
-        self.lruT.linkbefore(self.lruT)
-        self.lruT_len = 0
-        self.lruT_size = 0
-
-        self.lruB = Node2Q(None, None, lruB)
-        self.lruB.linkbefore(self.lruB)
-        self.lruB_len = 0
-        self.lruB_size = 0
-
-        # FIFO list of objects seen once
-        self.fifoT = Node2Q(None, None, fifoT)
-        self.fifoT.linkbefore(self.fifoT)
-        self.fifoT_len = 0
-        self.fifoT_size = 0
-
-        self.fifoB = Node2Q(None, None, fifoB)
-        self.fifoB.linkbefore(self.fifoB)
-        self.fifoB_len = 0
-        self.fifoB_size = 0
-
-        # maps oid to node
-        self.cache = {}
-
-        # The paper says that p should be adjust be 1 as the minimum:
-        # "The compound effect of such small increments and decrements
-        # to p s quite profound as we will demonstrated in the next
-        # section."  Not really, as far as I can tell.  In my traces
-        # with a very small cache, it was taking far too long to adjust
-        # towards favoring some FIFO component.  I would guess that the
-        # chief difference is that our caches are much bigger than the
-        # ones they experimented with.  Their biggest cache had 512K
-        # entries, while our smallest cache will have 40 times that many
-        # entries.
-
-        self.p = 0
-        # XXX multiply computed adjustments to p by walk_factor
-        self.walk_factor = 500
-
-        # statistics
-        self.total_hits = 0
-        self.total_lruThits = 0
-        self.total_fifoThits = 0
-        self.total_evicts = 0
-
-    def restart(self):
-        Simulation.restart(self)
-        self.hits = 0
-        self.lruThits = 0
-        self.fifoThits = 0
-        self.evicts = 0
-
-    def write(self, oid, size):
-        pass
-
-    def replace(self, lruB=False):
-        self.evicts += 1
-        self.total_evicts += 1
-        if self.fifoT_size > self.p or (lruB and self.fifoT_size == self.p):
-            node = self.fifoT.next
-            if node is self.fifoT:
-                return 0
-            assert node is not self.fifoT, self.stats()
-            node.linkbefore(self.fifoB)
-            self.fifoT_len -= 1
-            self.fifoT_size -= node.size
-            self.fifoB_len += 1
-            self.fifoB_size += node.size
-        else:
-            node = self.lruT.next
-            if node is self.lruT:
-                return 0
-            assert node is not self.lruT, self.stats()
-            node.linkbefore(self.lruB)
-            self.lruT_len -= 1
-            self.lruT_size -= node.size
-            self.lruB_len += 1
-            self.lruB_size += node.size
-        return node.size
-
-    def stats(self):
-        self.totalsize = self.lruT_size + self.fifoT_size
-        self.allsize = self.totalsize + self.lruB_size + self.fifoB_size
-        print "cachelimit = %s totalsize=%s allsize=%s" % (
-            addcommas(self.cachelimit),
-            addcommas(self.totalsize),
-            addcommas(self.allsize))
-
-        fmt = (
-            "p=%(p)d\n"
-            "lruT  = %(lruT_len)5d / %(lruT_size)8d / %(lruThits)d\n"
-            "fifoT = %(fifoT_len)5d / %(fifoT_size)8d / %(fifoThits)d\n"
-            "lruB  = %(lruB_len)5d / %(lruB_size)8d\n"
-            "fifoB = %(fifoB_len)5d / %(fifoB_size)8d\n"
-            "loads=%(loads)d hits=%(hits)d evicts=%(evicts)d\n"
-            )
-        print fmt % self.__dict__
-
-    def report(self, extratext=''):
-        self.total_p = self.p
-        Simulation.report(self, extratext)
-
-    def load(self, oid, size):
-        node = self.cache.get(oid)
-        if node is None:
-            # cache miss: We're going to insert a new object in fifoT.
-            # If fifo is full, we'll need to evict something to make
-            # room for it.
-
-            prev = need = size
-            while need > 0:
-                if size + self.fifoT_size + self.fifoB_size >= self.cachelimit:
-                    if need + self.fifoT_size >= self.cachelimit:
-                        node = self.fifoB.next
-                        assert node is not self.fifoB, self.stats()
-                        node.unlink()
-                        del self.cache[node.oid]
-                        self.fifoB_size -= node.size
-                        self.fifoB_len -= 1
-                        self.evicts += 1
-                        self.total_evicts += 1
-                    else:
-                        node = self.fifoB.next
-                        assert node is not self.fifoB, self.stats()
-                        node.unlink()
-                        del self.cache[node.oid]
-                        self.fifoB_size -= node.size
-                        self.fifoB_len -= 1
-                        if self.fifoT_size + self.lruT_size > self.cachelimit:
-                            need -= self.replace()
-                else:
-                    incache_size = self.fifoT_size + self.lruT_size + need
-                    total_size = (incache_size + self.fifoB_size
-                                  + self.lruB_size)
-                    if total_size >= self.cachelimit * 2:
-                        node = self.lruB.next
-                        if node is self.lruB:
-                            break
-                        assert node is not self.lruB
-                        node.unlink()
-                        del self.cache[node.oid]
-                        self.lruB_size -= node.size
-                        self.lruB_len -= 1
-                    elif incache_size > self.cachelimit:
-                        need -= self.replace()
-                    else:
-                        break
-                if need == prev:
-                    # XXX hack, apparently we can't get rid of anything else
-                    break
-                prev = need
-
-            node = Node2Q(oid, size)
-            node.linkbefore(self.fifoT)
-            self.fifoT_len += 1
-            self.fifoT_size += size
-            self.cache[oid] = node
-        else:
-            # a cache hit, but possibly in a bottom list that doesn't
-            # actually hold the object
-            if node.kind is lruT:
-                node.linkbefore(self.lruT)
-
-                self.hits += 1
-                self.total_hits += 1
-                self.lruThits += 1
-                self.total_lruThits += 1
-
-            elif node.kind is fifoT:
-                node.linkbefore(self.lruT)
-                self.fifoT_len -= 1
-                self.lruT_len += 1
-                self.fifoT_size -= node.size
-                self.lruT_size += node.size
-
-                self.hits += 1
-                self.total_hits += 1
-                self.fifoThits += 1
-                self.total_fifoThits += 1
-
-            elif node.kind is fifoB:
-                node.linkbefore(self.lruT)
-                self.fifoB_len -= 1
-                self.lruT_len += 1
-                self.fifoB_size -= node.size
-                self.lruT_size += node.size
-
-                # XXX need a better min than 1?
-##                print "adapt+", max(1, self.lruB_size // self.fifoB_size)
-                delta = max(1, self.lruB_size / max(1, self.fifoB_size))
-                self.p += delta * self.walk_factor
-                if self.p > self.cachelimit:
-                    self.p = self.cachelimit
-
-                need = node.size
-                if self.lruT_size + self.fifoT_size + need > self.cachelimit:
-                    while need > 0:
-                        r = self.replace()
-                        if not r:
-                            break
-                        need -= r
-
-            elif node.kind is lruB:
-                node.linkbefore(self.lruT)
-                self.lruB_len -= 1
-                self.lruT_len += 1
-                self.lruB_size -= node.size
-                self.lruT_size += node.size
-
-                # XXX need a better min than 1?
-##                print "adapt-", max(1, self.fifoB_size // self.lruB_size)
-                delta = max(1, self.fifoB_size / max(1, self.lruB_size))
-                self.p -= delta * self.walk_factor
-                if self.p < 0:
-                    self.p = 0
-
-                need = node.size
-                if self.lruT_size + self.fifoT_size + need > self.cachelimit:
-                    while need > 0:
-                        r = self.replace(lruB=True)
-                        if not r:
-                            break
-                        need -= r
-
-    def inval(self, oid):
-        pass
-
-    def extraheader(self):
-        pass
-
-class OracleSimulation(LRUCacheSimulation):
-    # Not sure how to implement this yet.  This is a cache where I
-    # cheat to see how good we could actually do.  The cache
-    # replacement problem for multi-size caches is NP-hard, so we're
-    # not going to have an optimal solution.
-
-    # At the moment, the oracle is mostly blind.  It knows which
-    # objects will be referenced more than once, so that it can
-    # ignore objects referenced only once.  In most traces, these
-    # objects account for about 20% of references.
-
-    def __init__(self, cachelimit, filename):
-        LRUCacheSimulation.__init__(self, cachelimit)
-        self.count = {}
-        self.scan(filename)
-
-    def load(self, oid, size):
-        node = self.cache.get(oid)
-        if node is not None:
-            self.hits += 1
-            self.total_hits += 1
-            node.linkbefore(self.head)
-        else:
-            if oid in self.count:
-                self.write(oid, size)
-
-    def scan(self, filename):
-        # scan the file in advance to figure out which objects will
-        # be referenced more than once.
-        f = open(filename, "rb")
-        struct_unpack = struct.unpack
-        f_read = f.read
-        offset = 0
-        while 1:
-            # Read a record and decode it
-            r = f_read(8)
-            if len(r) < 8:
-                break
-            offset += 8
-            ts, code = struct_unpack(">ii", r)
-            if ts == 0:
-                # Must be a misaligned record caused by a crash
-                ##print "Skipping 8 bytes at offset", offset-8
-                continue
-            r = f_read(16)
-            if len(r) < 16:
-                break
-            offset += 16
-            oid, serial = struct_unpack(">8s8s", r)
-            if code & 0x70 == 0x20:
-                # only look at loads
-                self.count[oid] = self.count.get(oid, 0) + 1
-
-        all = len(self.count)
-
-        # Now remove everything with count == 1
-        once = [oid for oid, count in self.count.iteritems()
-                if count == 1]
-        for oid in once:
-            del self.count[oid]
-
-        print "Scanned file, %d unique oids, %d repeats" % (
-            all, len(self.count))
-
-class BuddyCacheSimulation(LRUCacheSimulation):
-
-    def __init__(self, cachelimit):
-        LRUCacheSimulation.__init__(self, roundup(cachelimit))
-
-    def restart(self):
-        LRUCacheSimulation.restart(self)
-        self.allocator = self.allocatorFactory(self.cachelimit)
-
-    def allocatorFactory(self, size):
-        return BuddyAllocator(size)
-
-    # LRUCacheSimulation.load() is just fine
-
-    def write(self, oid, size):
-        node = self.cache.get(oid)
-        if node is not None:
-            node.unlink()
-            assert self.head.next is not None
-            self.size -= node.size
-            self.allocator.free(node)
-        while 1:
-            node = self.allocator.alloc(size)
-            if node is not None:
-                break
-            # Failure to allocate.  Evict something and try again.
-            node = self.head.next
-            assert node is not self.head
-            self.evicts += 1
-            self.total_evicts += 1
-            node.unlink()
-            assert self.head.next is not None
-            del self.cache[node.oid]
-            self.size -= node.size
-            self.allocator.free(node)
-        node.oid = oid
-        self.cache[oid] = node
-        node.linkbefore(self.head)
-        self.size += node.size
-
-    def inval(self, oid):
-        node = self.cache.get(oid)
-        if node is not None:
-            assert node.oid == oid
-            self.invals += 1
-            self.total_invals += 1
-            node.unlink()
-            assert self.head.next is not None
-            del self.cache[oid]
-            self.size -= node.size
-            assert self.size >= 0
-            self.allocator.free(node)
-
-class SimpleCacheSimulation(BuddyCacheSimulation):
-
-    def allocatorFactory(self, size):
-        return SimpleAllocator(size)
-
-    def finish(self):
-        BuddyCacheSimulation.finish(self)
-        self.allocator.report()
-
-MINSIZE = 256
-
-class BuddyAllocator:
-
-    def __init__(self, cachelimit):
-        cachelimit = roundup(cachelimit)
-        self.cachelimit = cachelimit
-        self.avail = {} # Map rounded-up sizes to free list node heads
-        self.nodes = {} # Map address to node
-        k = MINSIZE
-        while k <= cachelimit:
-            self.avail[k] = n = Node(None, None) # Not BlockNode; has no addr
-            n.linkbefore(n)
-            k += k
-        node = BlockNode(None, cachelimit, 0)
-        self.nodes[0] = node
-        node.linkbefore(self.avail[cachelimit])
-
-    def alloc(self, size):
-        size = roundup(size)
-        k = size
-        while k <= self.cachelimit:
-            head = self.avail[k]
-            node = head.next
-            if node is not head:
-                break
-            k += k
-        else:
-            return None # Store is full, or block is too large
-        node.unlink()
-        size2 = node.size
-        while size2 > size:
-            size2 = size2 / 2
-            assert size2 >= size
-            node.size = size2
-            buddy = BlockNode(None, size2, node.addr + size2)
-            self.nodes[buddy.addr] = buddy
-            buddy.linkbefore(self.avail[size2])
-        node.oid = 1 # Flag as in-use
-        return node
-
-    def free(self, node):
-        assert node is self.nodes[node.addr]
-        assert node.prev is node.next is None
-        node.oid = None # Flag as free
-        while node.size < self.cachelimit:
-            buddy_addr = node.addr ^ node.size
-            buddy = self.nodes[buddy_addr]
-            assert buddy.addr == buddy_addr
-            if buddy.oid is not None or buddy.size != node.size:
-                break
-            # Merge node with buddy
-            buddy.unlink()
-            if buddy.addr < node.addr: # buddy prevails
-                del self.nodes[node.addr]
-                node = buddy
-            else: # node prevails
-                del self.nodes[buddy.addr]
-            node.size *= 2
-        assert node is self.nodes[node.addr]
-        node.linkbefore(self.avail[node.size])
-
-    def dump(self, msg=""):
-        if msg:
-            print msg,
-        size = MINSIZE
-        blocks = bytes = 0
-        while size <= self.cachelimit:
-            head = self.avail[size]
-            node = head.next
-            count = 0
-            while node is not head:
-                count += 1
-                node = node.next
-            if count:
-                print "%d:%d" % (size, count),
-            blocks += count
-            bytes += count*size
-            size += size
-        print "-- %d, %d" % (bytes, blocks)
-
-def roundup(size):
-    k = MINSIZE
-    while k < size:
-        k += k
-    return k
-
-class SimpleAllocator:
-
-    def __init__(self, arenasize):
-        self.arenasize = arenasize
-        self.avail = BlockNode(None, 0, 0) # Weird: empty block as list head
-        self.rover = self.avail
-        node = BlockNode(None, arenasize, 0)
-        node.linkbefore(self.avail)
-        self.taglo = {0: node}
-        self.taghi = {arenasize: node}
-        # Allocator statistics
-        self.nallocs = 0
-        self.nfrees = 0
-        self.allocloops = 0
-        self.freebytes = arenasize
-        self.freeblocks = 1
-        self.allocbytes = 0
-        self.allocblocks = 0
-
-    def report(self):
-        print ("NA=%d AL=%d NF=%d ABy=%d ABl=%d FBy=%d FBl=%d" %
-               (self.nallocs, self.allocloops,
-                self.nfrees,
-                self.allocbytes, self.allocblocks,
-                self.freebytes, self.freeblocks))
-
-    def alloc(self, size):
-        self.nallocs += 1
-        # First fit algorithm
-        rover = stop = self.rover
-        while 1:
-            self.allocloops += 1
-            if rover.size >= size:
-                break
-            rover = rover.next
-            if rover is stop:
-                return None # We went round the list without finding space
-        if rover.size == size:
-            self.rover = rover.next
-            rover.unlink()
-            del self.taglo[rover.addr]
-            del self.taghi[rover.addr + size]
-            self.freeblocks -= 1
-            self.allocblocks += 1
-            self.freebytes -= size
-            self.allocbytes += size
-            return rover
-        # Take space from the beginning of the roving pointer
-        assert rover.size > size
-        node = BlockNode(None, size, rover.addr)
-        del self.taglo[rover.addr]
-        rover.size -= size
-        rover.addr += size
-        self.taglo[rover.addr] = rover
-        #self.freeblocks += 0 # No change here
-        self.allocblocks += 1
-        self.freebytes -= size
-        self.allocbytes += size
-        return node
-
-    def free(self, node):
-        self.nfrees += 1
-        self.freeblocks += 1
-        self.allocblocks -= 1
-        self.freebytes += node.size
-        self.allocbytes -= node.size
-        node.linkbefore(self.avail)
-        self.taglo[node.addr] = node
-        self.taghi[node.addr + node.size] = node
-        x = self.taghi.get(node.addr)
-        if x is not None:
-            # Merge x into node
-            x.unlink()
-            self.freeblocks -= 1
-            del self.taglo[x.addr]
-            del self.taghi[x.addr + x.size]
-            del self.taglo[node.addr]
-            node.addr = x.addr
-            node.size += x.size
-            self.taglo[node.addr] = node
-        x = self.taglo.get(node.addr + node.size)
-        if x is not None:
-            # Merge x into node
-            x.unlink()
-            self.freeblocks -= 1
-            del self.taglo[x.addr]
-            del self.taghi[x.addr + x.size]
-            del self.taghi[node.addr + node.size]
-            node.size += x.size
-            self.taghi[node.addr + node.size] = node
-        # It's possible that either one of the merges above invalidated
-        # the rover.
-        # It's simplest to simply reset the rover to the newly freed block.
-        self.rover = node
-
-    def dump(self, msg=""):
-        if msg:
-            print msg,
-        count = 0
-        bytes = 0
-        node = self.avail.next
-        while node is not self.avail:
-            bytes += node.size
-            count += 1
-            node = node.next
-        print count, "free blocks,", bytes, "free bytes"
-        self.report()
-
-class BlockNode(Node):
-
-    __slots__ = ['addr']
-
-    def __init__(self, oid, size, addr):
-        Node.__init__(self, oid, size)
-        self.addr = addr
-
-def testallocator(factory=BuddyAllocator):
-    # Run one of Knuth's experiments as a test
-    import random
-    import heapq # This only runs with Python 2.3, folks :-)
-    reportfreq = 100
-    cachelimit = 2**17
-    cache = factory(cachelimit)
-    queue = []
-    T = 0
-    blocks = 0
-    while T < 5000:
-        while queue and queue[0][0] <= T:
-            time, node = heapq.heappop(queue)
-            assert time == T
-            ##print "free addr=%d, size=%d" % (node.addr, node.size)
-            cache.free(node)
-            blocks -= 1
-        size = random.randint(100, 2000)
-        lifetime = random.randint(1, 100)
-        node = cache.alloc(size)
-        if node is None:
-            print "out of mem"
-            cache.dump("T=%4d: %d blocks;" % (T, blocks))
-            break
-        else:
-            ##print "alloc addr=%d, size=%d" % (node.addr, node.size)
-            blocks += 1
-            heapq.heappush(queue, (T + lifetime, node))
-        T = T+1
-        if T % reportfreq == 0:
-            cache.dump("T=%4d: %d blocks;" % (T, blocks))
-
 def hitrate(loads, hits):
     return "%5.1f%%" % (100.0 * hits / max(1, loads))
 
@@ -1546,218 +528,5 @@
         i -= 3
     return sign + s
 
-import random
-
-def maybe(f, p=0.5):
-    if random.random() < p:
-        f()
-
-#############################################################################
-# Thor-like eviction scheme.
-#
-# The cache keeps a list of all objects, and uses a travelling pointer
-# to decay the worth of objects over time.
-
-class ThorNode(Node):
-
-    __slots__ = ['worth']
-
-    def __init__(self, oid, size, worth=None):
-        Node.__init__(self, oid, size)
-        self.worth = worth
-
-class ThorListHead(Node):
-    def __init__(self):
-        Node.__init__(self, 0, 0)
-        self.next = self.prev = self
-
-class ThorSimulation(Simulation):
-
-    extras = "evicts", "trips"
-
-    def __init__(self, cachelimit):
-        Simulation.__init__(self, cachelimit)
-
-        # Maximum total of object sizes we keep in cache.
-        self.maxsize = cachelimit
-        # Current total of object sizes in cache.
-        self.currentsize = 0
-
-        # A worth byte maps to a set of all objects with that worth.
-        # This is cheap to keep updated, and makes finding low-worth
-        # objects for eviction trivial (just march over the worthsets
-        # list, in order).
-        self.worthsets = [Set() for dummy in range(256)]
-
-        # We keep a circular list of all objects in cache.  currentobj
-        # walks around it forever.  Each time _tick() is called, the
-        # worth of currentobj is decreased, basically by shifting
-        # right 1, and currentobj moves on to the next object.  When
-        # an object is first inserted, it enters the list right before
-        # currentobj.  When an object is accessed, its worth is
-        # increased by or'ing in 0x80.  This scheme comes from the
-        # Thor system, and is an inexpensive way to account for both
-        # recency and frequency of access:  recency is reflected in
-        # the leftmost bit set, and frequency by how many bits are
-        # set.
-        #
-        # Note:  because evictions are interleaved with ticks,
-        # unlinking an object is tricky, lest we evict currentobj.  The
-        # class _unlink method takes care of this properly.
-        self.listhead = ThorListHead()
-        self.currentobj = self.listhead
-
-        # Map an object.oid to its ThorNode.
-        self.oid2object = {}
-
-        self.total_evicts = self.total_trips = 0
-
-    # Unlink object from the circular list, taking care not to lose
-    # track of the current object.  Always call this instead of
-    # invoking obj.unlink() directly.
-    def _unlink(self, obj):
-        assert obj is not self.listhead
-        if obj is self.currentobj:
-            self.currentobj = obj.next
-        obj.unlink()
-
-    # Change obj.worth to newworth, maintaining invariants.
-    def _change_worth(self, obj, newworth):
-        if obj.worth != newworth:
-            self.worthsets[obj.worth].remove(obj)
-            obj.worth = newworth
-            self.worthsets[newworth].add(obj)
-
-    def add(self, object):
-        assert object.oid not in self.oid2object
-        self.oid2object[object.oid] = object
-
-        newsize = self.currentsize + object.size
-        if newsize > self.maxsize:
-            self._evictbytes(newsize - self.maxsize)
-        self.currentsize += object.size
-        object.linkbefore(self.currentobj)
-
-        if object.worth is None:
-            # Give smaller objects higher initial worth.  This favors kicking
-            # out unreferenced large objects before kicking out unreferenced
-            # small objects.  On real life traces, this is a significant
-            # win for the hit rate.
-            object.worth = 32 - int(round(math.log(object.size, 2)))
-        self.worthsets[object.worth].add(object)
-
-    # Decrease the worth of the current object, and advance the
-    # current object.
-    def _tick(self):
-        c = self.currentobj
-        if c is self.listhead:
-            c = c.next
-            if c is self.listhead:  # list is empty
-                return
-            self.total_trips += 1
-            self.trips += 1
-        self._change_worth(c, (c.worth + 1) >> 1)
-        self.currentobj = c.next
-
-    def access(self, oid):
-        self._tick()
-        obj = self.oid2object.get(oid)
-        if obj is None:
-            return None
-        self._change_worth(obj, obj.worth | 0x80)
-        return obj
-
-    # Evict objects of least worth first, until at least nbytes bytes
-    # have been freed.
-    def _evictbytes(self, nbytes):
-        for s in self.worthsets:
-            while s:
-                if nbytes <= 0:
-                    return
-                obj = s.pop()
-                nbytes -= obj.size
-                self._evictobj(obj)
-
-    def _evictobj(self, obj):
-        self.currentsize -= obj.size
-        self.worthsets[obj.worth].discard(obj)
-        del self.oid2object[obj.oid]
-        self._unlink(obj)
-
-        self.evicts += 1
-        self.total_evicts += 1
-
-    def _evict_without_bumping_evict_stats(self, obj):
-        self._evictobj(obj)
-        self.evicts -= 1
-        self.total_evicts -= 1
-
-    # Simulator overrides from here on.
-
-    def restart(self):
-        # Reset base class
-        Simulation.restart(self)
-        # Reset additional per-run statistics
-        self.evicts = self.trips = 0
-
-    def write(self, oid, size):
-        obj = self.oid2object.get(oid)
-        worth = None
-        if obj is not None:
-            worth = obj.worth
-            self._evict_without_bumping_evict_stats(obj)
-        self.add(ThorNode(oid, size, worth))
-
-    def load(self, oid, size):
-        if self.access(oid) is not None:
-            self.hits += 1
-            self.total_hits += 1
-        else:
-            self.write(oid, size)
-
-    def inval(self, oid):
-        obj = self.oid2object.get(oid)
-        if obj is not None:
-            self.invals += 1
-            self.total_invals += 1
-            self._evict_without_bumping_evict_stats(obj)
-
-    # Take the "x" off to see additional stats after each restart period.
-    def xreport(self):
-        Simulation.report(self)
-        print 'non-empty worth sets', sum(map(bool, self.worthsets)),
-        print 'objects', len(self.oid2object),
-        print 'size', self.currentsize
-
-#############################################################################
-# Perfection:  What if the cache were unbounded, and never forgot anything?
-# This simulator answers that question directly; the cache size parameter
-# isn't used.
-
-class UnboundedSimulation(Simulation):
-
-    extraname = 'evicts'   # for some reason we *have* to define >= 1 extra
-
-    def __init__(self, cachelimit):
-        Simulation.__init__(self, cachelimit)
-        self.oids = Set()
-        self.evicts = self.total_evicts = 0
-
-    def write(self, oid, size):
-        self.oids.add(oid)
-
-    def load(self, oid, size):
-        if oid in self.oids:
-            self.hits += 1
-            self.total_hits += 1
-        else:
-            self.oids.add(oid)
-
-    def inval(self, oid):
-        if oid in self.oids:
-            self.invals += 1
-            self.total_invals += 1
-            self.oids.remove(oid)
-
 if __name__ == "__main__":
     sys.exit(main())



More information about the checkins mailing list