Commit 3d4acc86 authored by Michel Pelletier's avatar Michel Pelletier

index_object methods now return the number of words they indexed,

*unless* they internally reached the word threshold, at which point
they did their own commit and reset their counter.  So, the value
returned is allways less than the threshold.  This is so huge
documents get the benefit of subtransactions, while lots of small
documents still get subtransacted too.
parent 901b40c4
......@@ -84,7 +84,7 @@
##############################################################################
"""Simple column indices"""
__version__='$Revision: 1.5 $'[11:-2]
__version__='$Revision: 1.6 $'[11:-2]
from Globals import Persistent
import BTree
......@@ -150,7 +150,7 @@ class UnIndex(Persistent):
def __len__(self):
return len(self._unindex)
def index_object(self, i, obj):
def index_object(self, i, obj, threshold=None):
""" index and object 'obj' with integer id 'i'"""
index = self._index
unindex = self._unindex
......
......@@ -87,13 +87,14 @@
"""
__version__='$Revision: 1.6 $'[11:-2]
__version__='$Revision: 1.7 $'[11:-2]
from Globals import Persistent
import BTree, IIBTree, IOBTree
import BTree, IIBTree, IOBTree, OIBTree
BTree=BTree.BTree
IOBTree=IOBTree.BTree
IIBTree=IIBTree.Bucket
OIBTree=OIBTree.BTree
from intSet import intSet
import operator
from Splitter import Splitter
......@@ -162,7 +163,7 @@ class UnTextIndex(Persistent):
return r
def index_object(self, i, obj, tupleType=type(()),
def index_object(self, i, obj, threshold=None, tupleType=type(()),
dictType=type({}), callable=callable):
""" Please document """
......@@ -178,7 +179,7 @@ class UnTextIndex(Persistent):
except:
return 0
d = {}
d = OIBTree()
old = d.has_key
last = None
......@@ -197,8 +198,12 @@ class UnTextIndex(Persistent):
get = index.get
unindex[i] = []
times = 0
for word,score in d.items():
if times > threshold:
get_transaction().commit(1)
times = 0
r = get(word)
if r is not None:
r = index[word]
......@@ -223,6 +228,7 @@ class UnTextIndex(Persistent):
else:
index[word] = i, score
unindex[i].append(word)
times = times + 1
unindex[i] = tuple(unindex[i])
l = len(unindex[i])
......@@ -230,7 +236,7 @@ class UnTextIndex(Persistent):
self._index = index
self._unindex = unindex
return l
return times
def unindex_object(self, i, tt=type(()) ):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment