[Zope-Checkins] SVN: Zope/trunk/src/Products/ZCatalog/ Rename the catalog report to plan to fit its new purpose

Hanno Schlichting hannosch at hannosch.eu
Sun Aug 1 16:36:07 EDT 2010


Log message for revision 115350:
  Rename the catalog report to plan to fit its new purpose
  

Changed:
  U   Zope/trunk/src/Products/ZCatalog/Catalog.py
  U   Zope/trunk/src/Products/ZCatalog/ZCatalog.py
  A   Zope/trunk/src/Products/ZCatalog/dtml/catalogPlan.dtml
  D   Zope/trunk/src/Products/ZCatalog/dtml/catalogReport.dtml
  A   Zope/trunk/src/Products/ZCatalog/plan.py
  D   Zope/trunk/src/Products/ZCatalog/report.py
  A   Zope/trunk/src/Products/ZCatalog/tests/test_plan.py
  D   Zope/trunk/src/Products/ZCatalog/tests/test_report.py

-=-
Modified: Zope/trunk/src/Products/ZCatalog/Catalog.py
===================================================================
--- Zope/trunk/src/Products/ZCatalog/Catalog.py	2010-08-01 20:26:37 UTC (rev 115349)
+++ Zope/trunk/src/Products/ZCatalog/Catalog.py	2010-08-01 20:36:07 UTC (rev 115350)
@@ -31,8 +31,8 @@
 from BTrees.IOBTree import IOBTree
 from Lazy import LazyMap, LazyCat, LazyValues
 from CatalogBrains import AbstractCatalogBrain, NoBrainer
-from .report import CatalogReport
-from .report import make_key
+from .plan import CatalogPlan
+from .plan import make_key
 
 
 LOG = logging.getLogger('Zope.ZCatalog')
@@ -507,7 +507,7 @@
         # Canonicalize the request into a sensible query before passing it on
         query = self.make_query(query)
 
-        cr = self.getCatalogReport(query)
+        cr = self.getCatalogPlan(query)
         cr.start()
 
         plan = cr.plan()
@@ -819,12 +819,12 @@
 
     __call__ = searchResults
 
-    def getCatalogReport(self, query=None):
-        """Reports about the duration of queries.
+    def getCatalogPlan(self, query=None):
+        """Query time reporting and planning.
         """
         parent = aq_base(aq_parent(self))
         threshold = getattr(parent, 'long_query_time', 0.1)
-        return CatalogReport(self, query, threshold)
+        return CatalogPlan(self, query, threshold)
 
 
 class CatalogSearchArgumentsMap(object):

Modified: Zope/trunk/src/Products/ZCatalog/ZCatalog.py
===================================================================
--- Zope/trunk/src/Products/ZCatalog/ZCatalog.py	2010-08-01 20:26:37 UTC (rev 115349)
+++ Zope/trunk/src/Products/ZCatalog/ZCatalog.py	2010-08-01 20:36:07 UTC (rev 115350)
@@ -103,7 +103,7 @@
         {'label': 'Metadata', 'action': 'manage_catalogSchema'},
         {'label': 'Find Objects', 'action': 'manage_catalogFind'},
         {'label': 'Advanced', 'action': 'manage_catalogAdvanced'},
-        {'label': 'Query Report', 'action': 'manage_catalogReport'},
+        {'label': 'Query Report', 'action': 'manage_catalogPlan'},
         {'label': 'Undo', 'action': 'manage_UndoForm'},
         {'label': 'Security', 'action': 'manage_access'},
         {'label': 'Ownership', 'action': 'manage_owner'},
@@ -127,9 +127,8 @@
                               'manage_catalogAdvanced')
     manage_catalogAdvanced = DTMLFile('dtml/catalogAdvanced', globals())
 
-    security.declareProtected(manage_zcatalog_entries,
-                              'manage_catalogReport')
-    manage_catalogReport = DTMLFile('dtml/catalogReport', globals())
+    security.declareProtected(manage_zcatalog_entries, 'manage_catalogPlan')
+    manage_catalogPlan = DTMLFile('dtml/catalogPlan', globals())
 
     security.declareProtected(manage_zcatalog_entries,
                               'manage_objectInformation')
@@ -874,36 +873,36 @@
     def delColumn(self, name):
         return self._catalog.delColumn(name)
 
-    # Catalog report methods
+    # Catalog plan methods
 
-    security.declareProtected(manage_zcatalog_entries, 'getCatalogReport')
-    def getCatalogReport(self):
-        """ Reports about the duration of queries """
-        rval = self._catalog.getCatalogReport().report()
+    security.declareProtected(manage_zcatalog_entries, 'getCatalogPlan')
+    def getCatalogPlan(self):
+        """Query time reporting and planning."""
+        rval = self._catalog.getCatalogPlan().report()
         rval.sort(key=operator.itemgetter('duration'), reverse=True)
         return rval
 
     security.declareProtected(manage_zcatalog_entries,
-                              'manage_resetCatalogReport')
-    def manage_resetCatalogReport(self, REQUEST=None):
-        """ resets the catalog report """
-        self._catalog.getCatalogReport().reset()
+                              'manage_resetCatalogPlan')
+    def manage_resetCatalogPlan(self, REQUEST=None):
+        """Resets the catalog plan."""
+        self._catalog.getCatalogPlan().reset()
 
         if REQUEST is not None:
             REQUEST.response.redirect(REQUEST.URL1 +
-                '/manage_catalogReport?manage_tabs_message=Report%20cleared')
+                '/manage_catalogPlan?manage_tabs_message=Plan%20cleared')
 
     security.declareProtected(manage_zcatalog_entries,
-                              'manage_editCatalogReport')
-    def manage_editCatalogReport(self, long_query_time=0.1, REQUEST=None):
-        """ edit the long query time """
+                              'manage_editCatalogPlan')
+    def manage_editCatalogPlan(self, long_query_time=0.1, REQUEST=None):
+        """Edit the long query time."""
         if not isinstance(long_query_time, float):
             long_query_time = float(long_query_time)
         self.long_query_time = long_query_time
 
         if REQUEST is not None:
             REQUEST.response.redirect(REQUEST.URL1 +
-                '/manage_catalogReport?manage_tabs_message=' +
+                '/manage_catalogPlan?manage_tabs_message=' +
                 'Long%20query%20time%20changed')
 
 InitializeClass(ZCatalog)

Copied: Zope/trunk/src/Products/ZCatalog/dtml/catalogPlan.dtml (from rev 115347, Zope/trunk/src/Products/ZCatalog/dtml/catalogReport.dtml)
===================================================================
--- Zope/trunk/src/Products/ZCatalog/dtml/catalogPlan.dtml	                        (rev 0)
+++ Zope/trunk/src/Products/ZCatalog/dtml/catalogPlan.dtml	2010-08-01 20:36:07 UTC (rev 115350)
@@ -0,0 +1,118 @@
+<dtml-var manage_page_header>
+<dtml-var manage_tabs>
+
+<p class="form-help"> 
+   The <strong>query report</strong> shows catalog queries that
+   perform slowly.
+</p>
+
+<table width="100%" cellspacing="0" cellpadding="2" border="0">
+    <tr class="list-header" >
+        <td align="left" valign="top">
+            <div class="list-nav">
+                Mean duration&nbsp;[ms]
+            </div>
+        </td>
+        <td align="left" valign="top">
+            <div class="list-nav">
+                Hits
+            </div>
+        </td>
+        <td align="left" valign="top">
+            <div class="list-nav">
+                Query key
+            </div>
+        </td>
+        <td align="left" valign="top">
+            <div class="list-nav">
+                Recent
+            </div>
+        </td>
+    </tr>
+    <dtml-if getCatalogPlan>
+        <dtml-in getCatalogPlan mapping>
+            <dtml-if sequence-odd>
+                <tr class="row-normal">
+            <dtml-else>
+                <tr class="row-hilite">
+            </dtml-if>
+                    <td align="left" valign="top">
+                        <div class="list-item">
+                            <dtml-var expr="'%3.2f' % duration">
+                        </div>
+                    </td>
+                    <td align="left" valign="top">
+                        <div class="list-item">
+                            &dtml-counter;
+                        </div>
+                    </td>
+                    <td align="left" valign="top">
+                        <div class="list-item">
+                            &dtml-query;
+                        </div>
+                    </td>
+                    <td align="left" valign="top">
+                        <div class="list-item">
+                            <dtml-var expr="'%3.2f' % last['duration']">ms
+                            [<dtml-in expr="last['details']" sort mapping>
+                            &dtml-id;:
+                            <dtml-var expr="'%3.2f' % duration">ms /
+                            &dtml-length; objects,
+                            </dtml-in>]
+                        </div>
+                    </td>
+                </tr>
+        </dtml-in>
+        <tr>
+            <td colspan="2" align="left" valign="top">
+                <p class="form-help">Resetting the catalog report will reinitialize the report log.</p>
+            </td>
+            <td colspan="2" align="right" valign="top">
+                <form action="manage_resetCatalogPlan" method=POST>
+                    <div class="form-element">
+                        <input class="form-element" type="submit" value="Reset Report">
+                    </div>
+                </form>
+            </td>
+        </tr>
+    <dtml-else>
+        <tr>
+            <td colspan="4" >
+                <div class="list-item">
+                    Report is empty.
+                </div>
+            </td>
+        </tr>
+    </dtml-if>
+</table>
+
+<form action="manage_editCatalogPlan" method="post">
+    <table width="100%" style="padding-top:1em;" cellspacing="0" cellpadding="2" border="0">
+        <tr class="section-bar">
+            <td colspan="3" align="left">
+                <div class="form-label">
+                    Settings
+                </div>
+            </td>
+        </tr>
+        <tr>
+            <td align="right" valign="middle">
+                <div class="list-item">
+                    Threshold (in seconds)
+                </div>
+            </td>
+            <td align="left" valign="middle">
+                <div class="form-element">
+                    <input name="long_query_time:float" value="&dtml-long_query_time;" />
+                </div>
+            </td>
+            <td align="left" valign="middle">
+                <p class="form-help">Only queries whose execution
+                takes longer than the configured threshold are considered
+                being slow. (Default 0.1 seconds).</p>
+        </tr>
+    </table>
+    <input class="form-element" type="submit" value="Apply settings">
+</form>
+
+<dtml-var manage_page_footer>

Deleted: Zope/trunk/src/Products/ZCatalog/dtml/catalogReport.dtml
===================================================================
--- Zope/trunk/src/Products/ZCatalog/dtml/catalogReport.dtml	2010-08-01 20:26:37 UTC (rev 115349)
+++ Zope/trunk/src/Products/ZCatalog/dtml/catalogReport.dtml	2010-08-01 20:36:07 UTC (rev 115350)
@@ -1,118 +0,0 @@
-<dtml-var manage_page_header>
-<dtml-var manage_tabs>
-
-<p class="form-help"> 
-   The <strong>query report</strong> shows catalog queries that
-   perform slowly.
-</p>
-
-<table width="100%" cellspacing="0" cellpadding="2" border="0">
-    <tr class="list-header" >
-        <td align="left" valign="top">
-            <div class="list-nav">
-                Mean duration&nbsp;[ms]
-            </div>
-        </td>
-        <td align="left" valign="top">
-            <div class="list-nav">
-                Hits
-            </div>
-        </td>
-        <td align="left" valign="top">
-            <div class="list-nav">
-                Query key
-            </div>
-        </td>
-        <td align="left" valign="top">
-            <div class="list-nav">
-                Recent
-            </div>
-        </td>
-    </tr>
-    <dtml-if getCatalogReport>
-        <dtml-in getCatalogReport mapping>
-            <dtml-if sequence-odd>
-                <tr class="row-normal">
-            <dtml-else>
-                <tr class="row-hilite">
-            </dtml-if>
-                    <td align="left" valign="top">
-                        <div class="list-item">
-                            <dtml-var expr="'%3.2f' % duration">
-                        </div>
-                    </td>
-                    <td align="left" valign="top">
-                        <div class="list-item">
-                            &dtml-counter;
-                        </div>
-                    </td>
-                    <td align="left" valign="top">
-                        <div class="list-item">
-                            &dtml-query;
-                        </div>
-                    </td>
-                    <td align="left" valign="top">
-                        <div class="list-item">
-                            <dtml-var expr="'%3.2f' % last['duration']">ms
-                            [<dtml-in expr="last['details']" sort mapping>
-                            &dtml-id;:
-                            <dtml-var expr="'%3.2f' % duration">ms /
-                            &dtml-length; objects,
-                            </dtml-in>]
-                        </div>
-                    </td>
-                </tr>
-        </dtml-in>
-        <tr>
-            <td colspan="2" align="left" valign="top">
-                <p class="form-help">Resetting the catalog report will reinitialize the report log.</p>
-            </td>
-            <td colspan="2" align="right" valign="top">
-                <form action="manage_resetCatalogReport" method=POST>
-                    <div class="form-element">
-                        <input class="form-element" type="submit" value="Reset Report">
-                    </div>
-                </form>
-            </td>
-        </tr>
-    <dtml-else>
-        <tr>
-            <td colspan="4" >
-                <div class="list-item">
-                    Report is empty.
-                </div>
-            </td>
-        </tr>
-    </dtml-if>
-</table>
-
-<form action="manage_editCatalogReport" method="post">
-    <table width="100%" style="padding-top:1em;" cellspacing="0" cellpadding="2" border="0">
-        <tr class="section-bar">
-            <td colspan="3" align="left">
-                <div class="form-label">
-                    Settings
-                </div>
-            </td>
-        </tr>
-        <tr>
-            <td align="right" valign="middle">
-                <div class="list-item">
-                    Threshold (in seconds)
-                </div>
-            </td>
-            <td align="left" valign="middle">
-                <div class="form-element">
-                    <input name="long_query_time:float" value="&dtml-long_query_time;" />
-                </div>
-            </td>
-            <td align="left" valign="middle">
-                <p class="form-help">Only queries whose execution
-                takes longer than the configured threshold are considered
-                being slow. (Default 0.1 seconds).</p>
-        </tr>
-    </table>
-    <input class="form-element" type="submit" value="Apply settings">
-</form>
-
-<dtml-var manage_page_footer>

Copied: Zope/trunk/src/Products/ZCatalog/plan.py (from rev 115349, Zope/trunk/src/Products/ZCatalog/report.py)
===================================================================
--- Zope/trunk/src/Products/ZCatalog/plan.py	                        (rev 0)
+++ Zope/trunk/src/Products/ZCatalog/plan.py	2010-08-01 20:36:07 UTC (rev 115350)
@@ -0,0 +1,295 @@
+##############################################################################
+#
+# Copyright (c) 2010 Zope Foundation and Contributors.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+
+import time
+from thread import allocate_lock
+
+from Acquisition import aq_base
+from Acquisition import aq_parent
+from Products.PluginIndexes.interfaces import IUniqueValueIndex
+
+reports_lock = allocate_lock()
+reports = {}
+
+value_indexes_lock = allocate_lock()
+value_indexes = frozenset()
+
+MAX_DISTINCT_VALUES = 10
+REFRESH_RATE = 100
+
+
+def determine_value_indexes(indexes):
+    # This function determines all indexes whose values should be respected
+    # in the report key. The number of unique values for the index needs to be
+    # lower than the MAX_DISTINCT_VALUES watermark.
+
+    # TODO: Ideally who would only consider those indexes with a small number
+    # of unique values, where the number of items for each value differs a
+    # lot. If the number of items per value is similar, the duration of a
+    # query is likely similar as well.
+    global value_indexes
+    if value_indexes:
+        # Calculating all the value indexes is quite slow, so we do this once
+        # for the first query. Since this is an optimization only, slightly
+        # outdated results based on index changes in the running process
+        # can be ignored.
+        return value_indexes
+
+    new_value_indexes = set()
+    for name, index in indexes.items():
+        if IUniqueValueIndex.providedBy(index):
+            values = index.uniqueValues()
+            if values and len(list(values)) < MAX_DISTINCT_VALUES:
+                # Only consider indexes which actually return a number
+                # greater than zero
+                new_value_indexes.add(name)
+    try:
+        value_indexes_lock.acquire()
+        value_indexes = frozenset(new_value_indexes)
+    finally:
+        value_indexes_lock.release()
+
+    return value_indexes
+
+
+def clear_value_indexes():
+    global value_indexes
+    try:
+        value_indexes_lock.acquire()
+        value_indexes = frozenset()
+    finally:
+        value_indexes_lock.release()
+
+
+from zope.testing.cleanup import addCleanUp
+addCleanUp(clear_value_indexes)
+del addCleanUp
+
+
+def make_key(catalog, query):
+    indexes = catalog.indexes
+    valueindexes = determine_value_indexes(indexes)
+    key = keys = query.keys()
+
+    values = [name for name in keys if name in valueindexes]
+    if values:
+        # If we have indexes whose values should be considered, we first
+        # preserve all normal indexes and then add the keys whose values
+        # matter including their value into the key
+        key = [name for name in keys if name not in values]
+        for name in values:
+
+            v = query.get(name, [])
+            if isinstance(v, (tuple, list)):
+                v = list(v)
+                v.sort()
+
+            # We need to make sure the key is immutable, repr() is an easy way
+            # to do this without imposing restrictions on the types of values
+            key.append((name, repr(v)))
+
+    key = tuple(sorted(key))
+    return key
+
+
+class CatalogPlan(object):
+    """Catalog plan class to measure and identify catalog queries and plan
+    their execution.
+    """
+
+    def __init__(self, catalog, query=None, threshold=0.1):
+        self.init()
+        self.catalog = catalog
+        self.query = query
+        self._key = None
+        self.threshold = threshold
+
+        parent = aq_parent(catalog)
+        path = getattr(aq_base(parent), 'getPhysicalPath', None)
+        if path is None:
+            path = ('', 'NonPersistentCatalog')
+        else:
+            path = tuple(parent.getPhysicalPath())
+        self.cid = path
+
+    def init(self):
+        self.res = []
+        self.start_time = None
+        self.interim = {}
+        self.stop_time = None
+        self.duration = None
+
+    def prioritymap(self):
+        # holds the benchmark of each index
+        prioritymap = getattr(self.catalog, '_v_prioritymap', None)
+        if prioritymap is None:
+            prioritymap = self.catalog._v_prioritymap = {}
+        return prioritymap
+
+    def benchmark(self):
+        # holds the benchmark of each index
+        return self.prioritymap().get(self.key(), None)
+
+    def plan(self):
+        benchmark = self.benchmark()
+        if not benchmark:
+            return None
+
+        # sort indexes on (mean hits, mean search time)
+        ranking = [((v[0], v[1]), k) for k, v in benchmark.items()]
+        ranking.sort()
+        return [i[1] for i in ranking]
+
+    def start(self):
+        self.init()
+        self.start_time = time.time()
+        benchmark = self.benchmark()
+        if benchmark is None:
+            self.prioritymap()[self.key()] = {}
+
+    def start_split(self, label, result=None):
+        self.interim[label] = (time.time(), None)
+
+    def stop_split(self, name, result=None):
+        current = time.time()
+        start_time, stop_time = self.interim.get(name, (None, None))
+        length = 0
+        if result is not None:
+            # TODO: calculating the length can be expensive
+            length = len(result)
+        self.interim[name] = (start_time, current)
+        dt = current - start_time
+        self.res.append((name, current - start_time, length))
+
+        # remember index's hits, search time and calls
+        benchmark = self.benchmark()
+        if name not in benchmark:
+            benchmark[name] = (length, dt, 1)
+        else:
+            n, t, c = benchmark[name]
+            n = int(((n*c) + length) / float(c + 1))
+            t = ((t*c) + dt) / float(c + 1)
+            # reset adaption
+            if c % REFRESH_RATE == 0:
+                c = 0
+            c += 1
+            benchmark[name] = (n, t, c)
+
+    def stop(self):
+        self.end_time = time.time()
+        self.duration = self.end_time - self.start_time
+
+        key = self.key()
+        benchmark = self.benchmark()
+        prioritymap = self.prioritymap()
+        prioritymap[key] = benchmark
+
+        # calculate mean time of search
+        stats = getattr(self.catalog, '_v_stats', None)
+        if stats is None:
+            stats = self.catalog._v_stats = {}
+
+        if key not in stats:
+            mt = self.duration
+            c = 1
+        else:
+            mt, c = stats[key]
+            mt = ((mt * c) + self.duration) / float(c + 1)
+            c += 1
+
+        stats[key] = (mt, c)
+        self.log()
+
+    def result(self):
+        return (self.duration, tuple(self.res))
+
+    def key(self):
+        if not self._key:
+            self._key = make_key(self.catalog, self.query)
+        return self._key
+
+    def log(self):
+        # result of stopwatch
+        res = self.result()
+        if res[0] < self.threshold:
+            return
+
+        # The key calculation takes a bit itself, we want to avoid that for
+        # any fast queries. This does mean that slow queries get the key
+        # calculation overhead added to their runtime.
+        key = self.key()
+
+        reports_lock.acquire()
+        try:
+            if self.cid not in reports:
+                reports[self.cid] = {}
+
+            previous = reports[self.cid].get(key)
+            if previous:
+                counter, mean, last = previous
+                mean = (mean * counter + res[0]) / float(counter + 1)
+                reports[self.cid][key] = (counter + 1, mean, res)
+            else:
+                reports[self.cid][key] = (1, res[0], res)
+
+        finally:
+            reports_lock.release()
+
+    def reset(self):
+        reports_lock.acquire()
+        try:
+            reports[self.cid] = {}
+        finally:
+            reports_lock.release()
+
+    def report(self):
+        """Returns a statistic report of catalog queries as list of dicts as
+        follows:
+
+        [{'query': <query_key>,
+          'counter': <hits>
+          'duration': <mean duration>,
+          'last': <details of recent query>,
+         },
+         ...
+        ]
+
+        <details of recent query> := {'duration': <duration of last query>,
+                                      'details': <duration of single indexes>,
+                                     }
+
+        <duration of single indexes> := [{'id': <index_name1>,
+                                          'duration': <duration>,
+                                          'length': <resultset length>,
+                                         },
+                                         ...
+                                        ]
+
+        The duration is provided in millisecond.
+        """
+        rval = []
+        for k, v in reports.get(self.cid, {}).items():
+            info = {
+                'query': k,
+                'counter': v[0],
+                'duration': v[1] * 1000,
+                'last': {'duration': v[2][0] * 1000,
+                         'details': [dict(id=i[0],
+                                          duration=i[1]*1000,
+                                          length=i[2])
+                                     for i in v[2][1]],
+                        },
+                }
+            rval.append(info)
+
+        return rval

Deleted: Zope/trunk/src/Products/ZCatalog/report.py
===================================================================
--- Zope/trunk/src/Products/ZCatalog/report.py	2010-08-01 20:26:37 UTC (rev 115349)
+++ Zope/trunk/src/Products/ZCatalog/report.py	2010-08-01 20:36:07 UTC (rev 115350)
@@ -1,294 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2010 Zope Foundation and Contributors.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-
-import time
-from thread import allocate_lock
-
-from Acquisition import aq_base
-from Acquisition import aq_parent
-from Products.PluginIndexes.interfaces import IUniqueValueIndex
-
-reports_lock = allocate_lock()
-reports = {}
-
-value_indexes_lock = allocate_lock()
-value_indexes = frozenset()
-
-MAX_DISTINCT_VALUES = 10
-REFRESH_RATE = 100
-
-
-def determine_value_indexes(indexes):
-    # This function determines all indexes whose values should be respected
-    # in the report key. The number of unique values for the index needs to be
-    # lower than the MAX_DISTINCT_VALUES watermark.
-
-    # TODO: Ideally who would only consider those indexes with a small number
-    # of unique values, where the number of items for each value differs a
-    # lot. If the number of items per value is similar, the duration of a
-    # query is likely similar as well.
-    global value_indexes
-    if value_indexes:
-        # Calculating all the value indexes is quite slow, so we do this once
-        # for the first query. Since this is an optimization only, slightly
-        # outdated results based on index changes in the running process
-        # can be ignored.
-        return value_indexes
-
-    new_value_indexes = set()
-    for name, index in indexes.items():
-        if IUniqueValueIndex.providedBy(index):
-            values = index.uniqueValues()
-            if values and len(list(values)) < MAX_DISTINCT_VALUES:
-                # Only consider indexes which actually return a number
-                # greater than zero
-                new_value_indexes.add(name)
-    try:
-        value_indexes_lock.acquire()
-        value_indexes = frozenset(new_value_indexes)
-    finally:
-        value_indexes_lock.release()
-
-    return value_indexes
-
-
-def clear_value_indexes():
-    global value_indexes
-    try:
-        value_indexes_lock.acquire()
-        value_indexes = frozenset()
-    finally:
-        value_indexes_lock.release()
-
-
-from zope.testing.cleanup import addCleanUp
-addCleanUp(clear_value_indexes)
-del addCleanUp
-
-
-def make_key(catalog, query):
-    indexes = catalog.indexes
-    valueindexes = determine_value_indexes(indexes)
-    key = keys = query.keys()
-
-    values = [name for name in keys if name in valueindexes]
-    if values:
-        # If we have indexes whose values should be considered, we first
-        # preserve all normal indexes and then add the keys whose values
-        # matter including their value into the key
-        key = [name for name in keys if name not in values]
-        for name in values:
-
-            v = query.get(name, [])
-            if isinstance(v, (tuple, list)):
-                v = list(v)
-                v.sort()
-
-            # We need to make sure the key is immutable, repr() is an easy way
-            # to do this without imposing restrictions on the types of values
-            key.append((name, repr(v)))
-
-    key = tuple(sorted(key))
-    return key
-
-
-class CatalogReport(object):
-    """Catalog report class to meassure and identify catalog queries.
-    """
-
-    def __init__(self, catalog, query=None, threshold=0.1):
-        self.init()
-        self.catalog = catalog
-        self.query = query
-        self._key = None
-        self.threshold = threshold
-
-        parent = aq_parent(catalog)
-        path = getattr(aq_base(parent), 'getPhysicalPath', None)
-        if path is None:
-            path = ('', 'NonPersistentCatalog')
-        else:
-            path = tuple(parent.getPhysicalPath())
-        self.cid = path
-
-    def init(self):
-        self.res = []
-        self.start_time = None
-        self.interim = {}
-        self.stop_time = None
-        self.duration = None
-
-    def prioritymap(self):
-        # holds the benchmark of each index
-        prioritymap = getattr(self.catalog, '_v_prioritymap', None)
-        if prioritymap is None:
-            prioritymap = self.catalog._v_prioritymap = {}
-        return prioritymap
-
-    def benchmark(self):
-        # holds the benchmark of each index
-        return self.prioritymap().get(self.key(), None)
-
-    def plan(self):
-        benchmark = self.benchmark()
-        if not benchmark:
-            return None
-
-        # sort indexes on (mean hits, mean search time)
-        ranking = [((v[0], v[1]), k) for k, v in benchmark.items()]
-        ranking.sort()
-        return [i[1] for i in ranking]
-
-    def start(self):
-        self.init()
-        self.start_time = time.time()
-        benchmark = self.benchmark()
-        if benchmark is None:
-            self.prioritymap()[self.key()] = {}
-
-    def start_split(self, label, result=None):
-        self.interim[label] = (time.time(), None)
-
-    def stop_split(self, name, result=None):
-        current = time.time()
-        start_time, stop_time = self.interim.get(name, (None, None))
-        length = 0
-        if result is not None:
-            # TODO: calculating the length can be expensive
-            length = len(result)
-        self.interim[name] = (start_time, current)
-        dt = current - start_time
-        self.res.append((name, current - start_time, length))
-
-        # remember index's hits, search time and calls
-        benchmark = self.benchmark()
-        if name not in benchmark:
-            benchmark[name] = (length, dt, 1)
-        else:
-            n, t, c = benchmark[name]
-            n = int(((n*c) + length) / float(c + 1))
-            t = ((t*c) + dt) / float(c + 1)
-            # reset adaption
-            if c % REFRESH_RATE == 0:
-                c = 0
-            c += 1
-            benchmark[name] = (n, t, c)
-
-    def stop(self):
-        self.end_time = time.time()
-        self.duration = self.end_time - self.start_time
-
-        key = self.key()
-        benchmark = self.benchmark()
-        prioritymap = self.prioritymap()
-        prioritymap[key] = benchmark
-
-        # calculate mean time of search
-        stats = getattr(self.catalog, '_v_stats', None)
-        if stats is None:
-            stats = self.catalog._v_stats = {}
-
-        if key not in stats:
-            mt = self.duration
-            c = 1
-        else:
-            mt, c = stats[key]
-            mt = ((mt * c) + self.duration) / float(c + 1)
-            c += 1
-
-        stats[key] = (mt, c)
-        self.log()
-
-    def result(self):
-        return (self.duration, tuple(self.res))
-
-    def key(self):
-        if not self._key:
-            self._key = make_key(self.catalog, self.query)
-        return self._key
-
-    def log(self):
-        # result of stopwatch
-        res = self.result()
-        if res[0] < self.threshold:
-            return
-
-        # The key calculation takes a bit itself, we want to avoid that for
-        # any fast queries. This does mean that slow queries get the key
-        # calculation overhead added to their runtime.
-        key = self.key()
-
-        reports_lock.acquire()
-        try:
-            if self.cid not in reports:
-                reports[self.cid] = {}
-
-            previous = reports[self.cid].get(key)
-            if previous:
-                counter, mean, last = previous
-                mean = (mean * counter + res[0]) / float(counter + 1)
-                reports[self.cid][key] = (counter + 1, mean, res)
-            else:
-                reports[self.cid][key] = (1, res[0], res)
-
-        finally:
-            reports_lock.release()
-
-    def reset(self):
-        reports_lock.acquire()
-        try:
-            reports[self.cid] = {}
-        finally:
-            reports_lock.release()
-
-    def report(self):
-        """Returns a statistic report of catalog queries as list of dicts as
-        follows:
-
-        [{'query': <query_key>,
-          'counter': <hits>
-          'duration': <mean duration>,
-          'last': <details of recent query>,
-         },
-         ...
-        ]
-
-        <details of recent query> := {'duration': <duration of last query>,
-                                      'details': <duration of single indexes>,
-                                     }
-
-        <duration of single indexes> := [{'id': <index_name1>,
-                                          'duration': <duration>,
-                                          'length': <resultset length>,
-                                         },
-                                         ...
-                                        ]
-
-        The duration is provided in millisecond.
-        """
-        rval = []
-        for k, v in reports.get(self.cid, {}).items():
-            info = {
-                'query': k,
-                'counter': v[0],
-                'duration': v[1] * 1000,
-                'last': {'duration': v[2][0] * 1000,
-                         'details': [dict(id=i[0],
-                                          duration=i[1]*1000,
-                                          length=i[2])
-                                     for i in v[2][1]],
-                        },
-                }
-            rval.append(info)
-
-        return rval

Copied: Zope/trunk/src/Products/ZCatalog/tests/test_plan.py (from rev 115339, Zope/trunk/src/Products/ZCatalog/tests/test_report.py)
===================================================================
--- Zope/trunk/src/Products/ZCatalog/tests/test_plan.py	                        (rev 0)
+++ Zope/trunk/src/Products/ZCatalog/tests/test_plan.py	2010-08-01 20:36:07 UTC (rev 115350)
@@ -0,0 +1,112 @@
+##############################################################################
+#
+# Copyright (c) 2010 Zope Foundation and Contributors. All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
+import unittest
+
+
+class dummy(object):
+
+    def __init__(self, num):
+        self.num = num
+
+    def big(self):
+        return self.num > 5
+
+    def numbers(self):
+        return (self.num, self.num + 1)
+
+
+class TestCatalogPlan(unittest.TestCase):
+
+    def setUp(self):
+        from Products.ZCatalog.ZCatalog import ZCatalog
+        self.zcat = ZCatalog('catalog')
+        self.zcat.long_query_time = 0.0
+        self.zcat.addIndex('num', 'FieldIndex')
+        self.zcat.addIndex('big', 'FieldIndex')
+        self.zcat.addIndex('numbers', 'KeywordIndex')
+
+        for i in range(9):
+            obj = dummy(i)
+            self.zcat.catalog_object(obj, str(i))
+
+    def tearDown(self):
+        from Products.ZCatalog.plan import clear_value_indexes
+        clear_value_indexes()
+
+    def test_ReportLength(self):
+        """ tests the report aggregation """
+        self.zcat.manage_resetCatalogPlan()
+
+        self.zcat.searchResults(numbers=4, sort_on='num')
+        self.zcat.searchResults(numbers=1, sort_on='num')
+        self.zcat.searchResults(numbers=3, sort_on='num')
+
+        self.zcat.searchResults(big=True, sort_on='num')
+        self.zcat.searchResults(big=True, sort_on='num')
+        self.zcat.searchResults(big=False, sort_on='num')
+
+        self.zcat.searchResults(num=[5, 4, 3], sort_on='num')
+        self.zcat.searchResults(num=(3, 4, 5), sort_on='num')
+        self.assertEqual(4, len(self.zcat.getCatalogPlan()))
+
+    def test_ReportCounter(self):
+        """ tests the counter of equal queries """
+        self.zcat.manage_resetCatalogPlan()
+
+        self.zcat.searchResults(numbers=5, sort_on='num')
+        self.zcat.searchResults(numbers=6, sort_on='num')
+        self.zcat.searchResults(numbers=8, sort_on='num')
+
+        r = self.zcat.getCatalogPlan()[0]
+        self.assertEqual(r['counter'], 3)
+
+    def test_ReportKey(self):
+        """ tests the query keys for uniqueness """
+        # query key 1
+        key = ('sort_on', ('big', 'True'))
+        self.zcat.manage_resetCatalogPlan()
+
+        self.zcat.searchResults(big=True, sort_on='num')
+        self.zcat.searchResults(big=True, sort_on='num')
+        r = self.zcat.getCatalogPlan()[0]
+
+        self.assertEqual(r['query'], key)
+        self.assertEqual(r['counter'], 2)
+
+        # query key 2
+        key = ('sort_on', ('big', 'False'))
+        self.zcat.manage_resetCatalogPlan()
+
+        self.zcat.searchResults(big=False, sort_on='num')
+        r = self.zcat.getCatalogPlan()[0]
+
+        self.assertEqual(r['query'], key)
+        self.assertEqual(r['counter'], 1)
+
+        # query key 3
+        key = ('sort_on', ('num', '[3, 4, 5]'))
+        self.zcat.manage_resetCatalogPlan()
+
+        self.zcat.searchResults(num=[5, 4, 3], sort_on='num')
+        self.zcat.searchResults(num=(3, 4, 5), sort_on='num')
+        r = self.zcat.getCatalogPlan()[0]
+
+        self.assertEqual(r['query'], key)
+        self.assertEqual(r['counter'], 2)
+
+
+def test_suite():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(TestCatalogPlan))
+    return suite

Deleted: Zope/trunk/src/Products/ZCatalog/tests/test_report.py
===================================================================
--- Zope/trunk/src/Products/ZCatalog/tests/test_report.py	2010-08-01 20:26:37 UTC (rev 115349)
+++ Zope/trunk/src/Products/ZCatalog/tests/test_report.py	2010-08-01 20:36:07 UTC (rev 115350)
@@ -1,112 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2010 Zope Foundation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-
-import unittest
-
-
-class dummy(object):
-
-    def __init__(self, num):
-        self.num = num
-
-    def big(self):
-        return self.num > 5
-
-    def numbers(self):
-        return (self.num, self.num + 1)
-
-
-class TestCatalogReport(unittest.TestCase):
-
-    def setUp(self):
-        from Products.ZCatalog.ZCatalog import ZCatalog
-        self.zcat = ZCatalog('catalog')
-        self.zcat.long_query_time = 0.0
-        self.zcat.addIndex('num', 'FieldIndex')
-        self.zcat.addIndex('big', 'FieldIndex')
-        self.zcat.addIndex('numbers', 'KeywordIndex')
-
-        for i in range(9):
-            obj = dummy(i)
-            self.zcat.catalog_object(obj, str(i))
-
-    def tearDown(self):
-        from Products.ZCatalog.report import clear_value_indexes
-        clear_value_indexes()
-
-    def test_ReportLength(self):
-        """ tests the report aggregation """
-        self.zcat.manage_resetCatalogReport()
-
-        self.zcat.searchResults(numbers=4, sort_on='num')
-        self.zcat.searchResults(numbers=1, sort_on='num')
-        self.zcat.searchResults(numbers=3, sort_on='num')
-
-        self.zcat.searchResults(big=True, sort_on='num')
-        self.zcat.searchResults(big=True, sort_on='num')
-        self.zcat.searchResults(big=False, sort_on='num')
-
-        self.zcat.searchResults(num=[5, 4, 3], sort_on='num')
-        self.zcat.searchResults(num=(3, 4, 5), sort_on='num')
-        self.assertEqual(4, len(self.zcat.getCatalogReport()))
-
-    def test_ReportCounter(self):
-        """ tests the counter of equal queries """
-        self.zcat.manage_resetCatalogReport()
-
-        self.zcat.searchResults(numbers=5, sort_on='num')
-        self.zcat.searchResults(numbers=6, sort_on='num')
-        self.zcat.searchResults(numbers=8, sort_on='num')
-
-        r = self.zcat.getCatalogReport()[0]
-        self.assertEqual(r['counter'], 3)
-
-    def test_ReportKey(self):
-        """ tests the query keys for uniqueness """
-        # query key 1
-        key = ('sort_on', ('big', 'True'))
-        self.zcat.manage_resetCatalogReport()
-
-        self.zcat.searchResults(big=True, sort_on='num')
-        self.zcat.searchResults(big=True, sort_on='num')
-        r = self.zcat.getCatalogReport()[0]
-
-        self.assertEqual(r['query'], key)
-        self.assertEqual(r['counter'], 2)
-
-        # query key 2
-        key = ('sort_on', ('big', 'False'))
-        self.zcat.manage_resetCatalogReport()
-
-        self.zcat.searchResults(big=False, sort_on='num')
-        r = self.zcat.getCatalogReport()[0]
-
-        self.assertEqual(r['query'], key)
-        self.assertEqual(r['counter'], 1)
-
-        # query key 3
-        key = ('sort_on', ('num', '[3, 4, 5]'))
-        self.zcat.manage_resetCatalogReport()
-
-        self.zcat.searchResults(num=[5, 4, 3], sort_on='num')
-        self.zcat.searchResults(num=(3, 4, 5), sort_on='num')
-        r = self.zcat.getCatalogReport()[0]
-
-        self.assertEqual(r['query'], key)
-        self.assertEqual(r['counter'], 2)
-
-
-def test_suite():
-    suite = unittest.TestSuite()
-    suite.addTest(unittest.makeSuite(TestCatalogReport))
-    return suite



More information about the Zope-Checkins mailing list