[Checkins] SVN: lovely.tag/trunk/s use iobtree for tag persistence instead of persistent list and intid util, should be much faster now with big numbers of tags

Bernd Dorn bernd.dorn at lovelysystems.com
Wed Apr 4 11:03:09 EDT 2007


Log message for revision 74003:
  use iobtree for tag persistence instead of  persistent list and intid util, should be much faster now with big numbers of tags

Changed:
  U   lovely.tag/trunk/setup.py
  U   lovely.tag/trunk/src/lovely/tag/engine.py
  U   lovely.tag/trunk/src/lovely/tag/generations/__init__.py
  A   lovely.tag/trunk/src/lovely/tag/generations/evolve2.py

-=-
Modified: lovely.tag/trunk/setup.py
===================================================================
--- lovely.tag/trunk/setup.py	2007-04-04 14:28:22 UTC (rev 74002)
+++ lovely.tag/trunk/setup.py	2007-04-04 15:03:08 UTC (rev 74003)
@@ -3,7 +3,7 @@
 
 setup (
     name='lovely.tag',
-    version='0.2',
+    version='0.3',
     author = "Lovely Systems",
     author_email = "office at lovelysystems.com",
     description = "A tagging engine for zope 3",

Modified: lovely.tag/trunk/src/lovely/tag/engine.py
===================================================================
--- lovely.tag/trunk/src/lovely/tag/engine.py	2007-04-04 14:28:22 UTC (rev 74002)
+++ lovely.tag/trunk/src/lovely/tag/engine.py	2007-04-04 15:03:08 UTC (rev 74003)
@@ -18,13 +18,12 @@
 __docformat__ = "reStructuredText"
 
 import persistent
-import persistent.list
 import zope.interface
 from zope import component
 from BTrees import IOBTree, OOBTree
+import random
 
 from zope.app.container import contained
-from zope.app import intid
 from zope.app.intid.interfaces import IIntIdRemovedEvent, IIntIds
 from lovely.tag import interfaces, tag
 from zope.dottedname.resolve import resolve
@@ -34,21 +33,44 @@
     zope.interface.implements(interfaces.ITaggingEngine,
                               interfaces.ITaggingStatistics)
 
+    _v_nextid = None
+    
     def __init__(self):
         super(TaggingEngine, self).__init__()
         self._reset()
 
+    def _generateId(self):
+        """Generate an id which is not yet taken.
+
+        This tries to allocate sequential ids so they fall into the
+        same BTree bucket, and randomizes if it stumbles upon a
+        used one.
+        """
+        while True:
+            if self._v_nextid is None:
+                self._v_nextid = random.randrange(0, 2**31)
+            uid = self._v_nextid
+            self._v_nextid += 1
+            if uid not in self._tagid_to_obj:
+                return uid
+            self._v_nextid = None
+
+    def _add(self, tagObj):
+        uid = self._generateId()
+        self._tagid_to_obj[uid] = tagObj
+        # set the __parent__ in order to get a _p_oid for the object
+        #tagObj.__parent__ = self
+        return uid
+
     def _reset(self):
-        # Used purely to provide a persistence reference for the tag objects
-        self._tags = persistent.list.PersistentList()
-
-        # Used to generate ids for tag objects
-        self._tag_ids = intid.IntIds()
+        # mapping of tagid to tag object
+        self._tagid_to_obj = IOBTree.IOBTree()
         # Our indices for efficient querying
         self._user_to_tagids = OOBTree.OOBTree()
         self._item_to_tagids = IOBTree.IOBTree()
         self._name_to_tagids = OOBTree.OOBTree()
 
+
     @property
     def tagCount(self):
         return len(self._name_to_tagids)
@@ -65,24 +87,25 @@
         """See interfaces.ITaggingEngine"""
         tags_item = set(self._item_to_tagids.get(item, ()))
         tags_user = set(self._user_to_tagids.get(user, ()))
-
+        tags_tags = set()
+        for t in tags:
+            tags_tags.update(self._name_to_tagids.get(t, ()))
         old_tag_ids = tags_item.intersection(tags_user)
+        # any tags of the same user/item that are not in tags
+        old_tag_ids = old_tag_ids.difference(tags_tags)
             
-        old_tags = set([self._tag_ids.getObject(id)
+        old_tags = set([self._tagid_to_obj[id]
                         for id in old_tag_ids])
 
         new_tags = set([tag.Tag(item, user, tagName)
                         for tagName in tags])
 
         add_tags = new_tags.difference(old_tags)
-        del_tags = old_tags.difference(new_tags)
-
+        
+        add_tag_ids = []
         for tagObj in add_tags:
-            self._tags.append(tagObj)
-            # set the __parent__ in order to get a _p_oid for the object
-            tagObj.__parent__ = self
-            id = self._tag_ids.register(tagObj)
-
+            id = self._add(tagObj)
+            add_tag_ids.append(id)
             ids = self._user_to_tagids.get(user)
             if ids is None:
                 self._user_to_tagids[user] = IOBTree.IOSet((id,))
@@ -100,16 +123,13 @@
                 self._name_to_tagids[tagObj.name] = IOBTree.IOSet((id,))
             else:
                 ids.insert(id)
+        del_tag_ids = old_tag_ids.difference(add_tag_ids)
+        self._delTags(del_tag_ids)
 
-        self._delTags(del_tags)
-
-    def _delTags(self, del_tags):
+    def _delTags(self, del_tag_ids):
         """deletes tags in iterable"""
-        for tagObj in del_tags:
-            id = self._tag_ids.getId(tagObj)
-            self._tag_ids.unregister(tagObj)
-            self._tags.remove(tagObj)
-
+        for id in del_tag_ids:
+            tagObj = self._tagid_to_obj[id]
             self._user_to_tagids[tagObj.user].remove(id)
             if not len(self._user_to_tagids[tagObj.user]):
                 del self._user_to_tagids[tagObj.user]
@@ -121,6 +141,7 @@
             self._name_to_tagids[tagObj.name].remove(id)
             if not len(self._name_to_tagids[tagObj.name]):
                 del self._name_to_tagids[tagObj.name]
+            del self._tagid_to_obj[id]
 
     def delete(self, item=None, user=None, tag=None):
         tags = None
@@ -138,8 +159,6 @@
                 tags = tags.intersection(name_tags)
             else:
                 tags = name_tags
-        # make objects
-        tags = map(self._tag_ids.getObject, tags)
         self._delTags(tags)
 
     def getTags(self, items=None, users=None):
@@ -174,14 +193,14 @@
 
     def getTagObjects(self, items=None, users=None,  tags=None):
         ids = self._getTagIds(items, users, tags)
-        return set([self._tag_ids.getObject(id) for id in ids])
+        return set([self._tagid_to_obj[id] for id in ids])
 
     def getItems(self, tags=None, users=None):
         """See interfaces.ITaggingEngine"""
         uids = self._getTagIds(items=None, users=users, tags=tags)
         res = set()
         for uid in uids:
-            o = self._tag_ids.queryObject(uid)
+            o = self._tagid_to_obj.get(uid)
             if o is not None:
                 res.add(o.item)
         return res
@@ -189,7 +208,7 @@
     def getUsers(self, tags=None, items=None):
         """See interfaces.ITaggingEngine"""
         ids = self._getTagIds(items=items, users=None, tags=tags)
-        return set([self._tag_ids.getObject(id).user for id in ids])
+        return set([self._tagid_to_obj[id].user for id in ids])
 
     def getRelatedTags(self, tag, degree=1):
         """See interfaces.ITaggingEngine"""
@@ -201,7 +220,7 @@
             for cur_name in previous_degree_tags:
                 tagids = self._name_to_tagids.get(cur_name, ())
                 for tagid in tagids:
-                    tag_obj = self._tag_ids.getObject(tagid)
+                    tag_obj = self._tagid_to_obj[tagid]
                     degree_tags.update(self.getTags(
                         items=(tag_obj.item,), users=(tag_obj.user,) ))
             # After all the related tags of this degree were found, update the
@@ -264,7 +283,8 @@
         return set(result.items())
 
     def __repr__(self):
-        return '<%s entries=%i>' %(self.__class__.__name__, len(self._tags))
+        return '<%s entries=%i>' %(self.__class__.__name__,
+                                   len(self._tagid_to_obj))
 
 
     def cleanStaleItems(self):
@@ -285,7 +305,7 @@
             return 0
         tagIds = set(self._name_to_tagids.get(old, ()))
         for tagId in tagIds:
-            tagObj = self._tag_ids.getObject(tagId)
+            tagObj = self._tagid_to_obj[tagId]
             tagObj.name = new
         newTagIds = IOBTree.IOSet(self._name_to_tagids.get(new, ()))
         newTagIds.update(tagIds)

Modified: lovely.tag/trunk/src/lovely/tag/generations/__init__.py
===================================================================
--- lovely.tag/trunk/src/lovely/tag/generations/__init__.py	2007-04-04 14:28:22 UTC (rev 74002)
+++ lovely.tag/trunk/src/lovely/tag/generations/__init__.py	2007-04-04 15:03:08 UTC (rev 74003)
@@ -14,6 +14,6 @@
 
 
 schemaManager = SchemaManager(
-    minimum_generation=1,
-    generation=1,
+    minimum_generation=2,
+    generation=2,
     package_name=pkg)

Added: lovely.tag/trunk/src/lovely/tag/generations/evolve2.py
===================================================================
--- lovely.tag/trunk/src/lovely/tag/generations/evolve2.py	2007-04-04 14:28:22 UTC (rev 74002)
+++ lovely.tag/trunk/src/lovely/tag/generations/evolve2.py	2007-04-04 15:03:08 UTC (rev 74003)
@@ -0,0 +1,21 @@
+from zope.app.zopeappgenerations import getRootFolder
+from zope.app.generations.utility import findObjectsProviding
+from lovely.tag.interfaces import ITaggingEngine
+from BTrees import IOBTree
+from zope.app.component.interfaces import ISite
+
+def evolve(context):
+
+    """evolve to use an IOBTree instead of PersistentList"""
+    for s in findObjectsProviding(getRootFolder(context), ISite):
+        for engine in s.getSiteManager().getAllUtilitiesRegisteredFor(
+            ITaggingEngine):
+
+            if hasattr(engine, '_tags'):
+                engine._tagid_to_obj = IOBTree.IOBTree()
+                for uid, ref in engine._tag_ids.items():
+                    obj = ref()
+                    engine._tagid_to_obj[uid] = obj
+                del engine._tags
+                del engine._tag_ids
+                


Property changes on: lovely.tag/trunk/src/lovely/tag/generations/evolve2.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native



More information about the Checkins mailing list