[ZODB-Dev] Big OOBTree --> memory exhausted

Thomas Guettler zopestoller@thomas-guettler.de
Thu, 31 Jan 2002 11:50:17 +0100


This is a multi-part message in MIME format.
--------------070107030008020208040203
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit

Hi,

I am programming a mxmRelation like class which uses OOBTree to store the
relations between the objects.
(mxmRelation: http://www.zope.org/Members/maxm/productList/mxmRelations)

Although I commit the transaction after 10,000 adds
the virtual memory gets exhausted after several minutes.

If I understood BTrees and ZODB correctly my machine should relate the
objects until it runs out of disk space. Why gets my memory exhausted?

The Relation object is part of a Zope Folder.

This code is making the trouble: (I try to relate 200*300,000 objekts)

max=300000
l1=range(0, 200)
l2=range(max, 2*max)
self.relate(l1, l2)


this is the code which does the relation:

    def relate(self, obj1, obj2):
        """Relate two objects. Both arguments can be lists.
        If they are lists each item in obj1 gets related to each item
        in obj2"""
        if type(obj1) not in ( type(()), type([]) ): obj1=[obj1]
        if type(obj2) not in ( type(()), type([]) ): obj2=[obj2]
        i=0
        counter=0
        for o1 in obj1:
            for o2 in obj2:
                self.relate_one_direction(o1, o2)
                self.relate_one_direction(o2, o1)
                i=i+1
                if i>10000:
                    get_transaction().commit()
                    print "commit: ", counter
                    counter=counter+1
                    i=0
                   
    def relate_one_direction(self, obj1, obj2):
        related=self._tree.get(obj1, None)
        if related is None:
            related=OOBTree()
            self._tree[obj1]=related
        related[obj2]=None

The whole Relation.py is attached

thomas

--------------070107030008020208040203
Content-Type: text/plain;
 name="Relations.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="Relations.py"

# inspired by mxmRelation
#   http://www.zope.org/Members/maxm/productList/mxmRelations
# Thomas Guettler http://www.thomas-guettler.de
# TODO:
#  +Test big relation
# The class Relations stores relations between objects.
# It uses OOBTree to store the relations, which means it is
# persistent and can scale (it can store more data than you have RAM)
# Example:
# r=Relations()
# r.relate((1, 2), (3, 4))
# --> 1-2, 1-3, 2-3, 2-4 are related
#
# You can relate an object to itself. But you can store only
# one relation to itself.
#
# For example usage look at the test cases at the end of the file.

from BTrees.OOBTree import OOBTree
from OFS.SimpleItem import SimpleItem

class Relations(SimpleItem):
    def __init__(self):
        self._tree=OOBTree()

    def relate(self, obj1, obj2):
        """Relate two objects. Both arguments can be lists.
        If they are lists each item in obj1 gets related to each item
        in obj2"""
        if type(obj1) not in ( type(()), type([]) ): obj1=[obj1]
        if type(obj2) not in ( type(()), type([]) ): obj2=[obj2]
        i=0
        counter=0
        for o1 in obj1:
            for o2 in obj2:
                self.relate_one_direction(o1, o2)
                self.relate_one_direction(o2, o1)
                i=i+1
                if i>10000:
                    get_transaction().commit()
                    print "commit: ", counter
                    counter=counter+1
                    i=0
                    
    def relate_one_direction(self, obj1, obj2):
        related=self._tree.get(obj1, None)
        if related is None:
            related=OOBTree()
            self._tree[obj1]=related
        related[obj2]=None
                
    def get(self, obj):
        related=self._tree.get(obj, None)
        if related is None:
            return None
        else:
            return tuple(related.keys())

    def delete(self, obj=None):
        """delete obj from the relation. If obj is a list each item
        in the list gets deleted. If obj is None all relations are deleted"""
        if obj==None:
            self._tree=OOBTree()
        related=self._tree.get(obj, None)
        if related is None: return
        for r in tuple(related.keys()):
            r_back=self._tree.get(r)
            del(r_back[obj])
            #TODO len() is expensive, should be done different
            if len(r_back)==0: del(self._tree[r]) 
        if self._tree.get(obj, None)!=None:
            del(self._tree[obj])

    def unrelate(self, obj1, obj2):
        """
        unrelate obj1 from obj2. Both can be lists
        """
        if type(obj1) not in ( type(()), type([]) ): obj1=[obj1]
        if type(obj2) not in ( type(()), type([]) ): obj2=[obj2]
        for o1 in obj1:
            if self._tree.get(o1, None)==None: continue
            for o2 in obj2:
                if self._tree.get(o2, None)==None: continue
                del(self._tree[o1][o2])
                if o1!=o2:
                    del(self._tree[o2][o1])
                #TODO len() is expensive, should be done different
                if len(self._tree[o1])==0: del(self._tree[o1])
                if o1!=o2:
                    if len(self._tree[o2])==0: del(self._tree[o2])
                
        
    def tostring(self):
        "return string of the relations"
        result={}
        for r in tuple(self._tree.keys()):
            result[r]=[]
            for r2 in tuple(self._tree[r].keys()):
                result[r].append(r2)
        return result
        
    def test_relations(self):
        """
        Test the relations returns 'OK' on success, otherwise
        an error message is returned.
        """

        #Test with single values
        self.delete()
        self.relate(5, 3)
        l=self.get(5)
        l_wish=3,
        if l != l_wish:
            return "Error1: l: ", l, " l_wish: ", l_wish

        #Test with strings
        self.delete()
        self.relate("5", "3")
        l=self.get("5")
        l_wish="3",
        if l != l_wish:
            return "Error2: l: ", l, " l_wish: ", l_wish

        #Test with list values
        self.delete()
        self.relate([1, 2, 3, 4], [5, 6, 7, 8])
        l=self.get(2)
        l_wish=[5, 6, 7, 8]
        l=list(l)
        l.sort()
        if l != l_wish:
            return "Error3: l: ", l, " l_wish: ", l_wish

        #Test with list values other way
        self.delete()
        self.relate([1, 2, 3, 4], [5, 6, 7, 8])
        l=self.get(5)
        l_wish=[1, 2, 3, 4]
        l=list(l)
        l.sort()
        if l != l_wish:
            return "Error4: l: ", l, " l_wish: ", l_wish

        #Test for failed relation
        self.delete()
        self.relate([1, 2, 3, 4], [5, 6, 7, 8])
        l=self.get(1000)
        if l != None:
            return "Error5"

        #Test del
        self.delete()
        self.relate(1, 2)
        self.delete(1)
        if len(self._tree)!=0:
            return "Error6"

        #Test del
        self.delete()
        self.relate([1, 2, 3], 4)
        self.delete(1)
        if len(self._tree)!=3:
            return "Error7"

        #Test del
        self.delete()
        self.relate([1, 2, 3], 4)
        self.delete(4)
        if len(self._tree)!=0:
            return "Error8"

        #Test unrelate
        self.delete()
        self.relate(1, 2)
        self.unrelate(1, 2)
        if len(self._tree)!=0:
            return "Error 9"

        #Test unrelate
        self.delete()
        self.relate(1, 2)
        self.unrelate(2, 1)
        if len(self._tree)!=0:
            return "Error 10"

        #Test unrelate
        self.delete()
        self.relate((1, 2), (3, 4))
        self.unrelate(1, 3)
        if len(self._tree[1])!=1:
            return "Error 11"
        if len(self._tree[3])!=1:
            return "Error 12"
        self.unrelate(1, 4)
        if self._tree.has_key(1):
            return "Error 13"

        #Test relation to itself
        self.delete()
        self.relate(1, 1)
        self.unrelate(1, 1)
        if len(self._tree)!=0:
            return "Error 14"
        self.relate(1, 1)
        self.delete(1)
        if len(self._tree)!=0:
            return "Error 15"

        #Test multiple relations to itself
        self.delete()
        self.relate((1, 1), (1, 1))
        self.unrelate(1, 1)
        if len(self._tree)!=0:
            return "Error 16"
        self.relate(1, 1)
        self.delete(1)
        if len(self._tree)!=0:
            return "Error 17"

        ##Test big relation
        self.delete()
        max=300000
        l1=range(0, 200)
        l2=range(max, 2*max)
        self.relate(l1, l2)

        return "OK"

--------------070107030008020208040203--