Commit 3d4acc86 authored by Michel Pelletier's avatar Michel Pelletier

index_object methods now return the number of words they indexed,

*unless* they internally reached the word threshold, at which point
they did their own commit and reset their counter.  So, the value
returned is allways less than the threshold.  This is so huge
documents get the benefit of subtransactions, while lots of small
documents still get subtransacted too.
parent 901b40c4
...@@ -84,7 +84,7 @@ ...@@ -84,7 +84,7 @@
############################################################################## ##############################################################################
"""Simple column indices""" """Simple column indices"""
__version__='$Revision: 1.5 $'[11:-2] __version__='$Revision: 1.6 $'[11:-2]
from Globals import Persistent from Globals import Persistent
import BTree import BTree
...@@ -150,7 +150,7 @@ class UnIndex(Persistent): ...@@ -150,7 +150,7 @@ class UnIndex(Persistent):
def __len__(self): def __len__(self):
return len(self._unindex) return len(self._unindex)
def index_object(self, i, obj): def index_object(self, i, obj, threshold=None):
""" index and object 'obj' with integer id 'i'""" """ index and object 'obj' with integer id 'i'"""
index = self._index index = self._index
unindex = self._unindex unindex = self._unindex
......
...@@ -87,13 +87,14 @@ ...@@ -87,13 +87,14 @@
""" """
__version__='$Revision: 1.6 $'[11:-2] __version__='$Revision: 1.7 $'[11:-2]
from Globals import Persistent from Globals import Persistent
import BTree, IIBTree, IOBTree import BTree, IIBTree, IOBTree, OIBTree
BTree=BTree.BTree BTree=BTree.BTree
IOBTree=IOBTree.BTree IOBTree=IOBTree.BTree
IIBTree=IIBTree.Bucket IIBTree=IIBTree.Bucket
OIBTree=OIBTree.BTree
from intSet import intSet from intSet import intSet
import operator import operator
from Splitter import Splitter from Splitter import Splitter
...@@ -162,7 +163,7 @@ class UnTextIndex(Persistent): ...@@ -162,7 +163,7 @@ class UnTextIndex(Persistent):
return r return r
def index_object(self, i, obj, tupleType=type(()), def index_object(self, i, obj, threshold=None, tupleType=type(()),
dictType=type({}), callable=callable): dictType=type({}), callable=callable):
""" Please document """ """ Please document """
...@@ -178,7 +179,7 @@ class UnTextIndex(Persistent): ...@@ -178,7 +179,7 @@ class UnTextIndex(Persistent):
except: except:
return 0 return 0
d = {} d = OIBTree()
old = d.has_key old = d.has_key
last = None last = None
...@@ -197,8 +198,12 @@ class UnTextIndex(Persistent): ...@@ -197,8 +198,12 @@ class UnTextIndex(Persistent):
get = index.get get = index.get
unindex[i] = [] unindex[i] = []
times = 0
for word,score in d.items(): for word,score in d.items():
if times > threshold:
get_transaction().commit(1)
times = 0
r = get(word) r = get(word)
if r is not None: if r is not None:
r = index[word] r = index[word]
...@@ -223,6 +228,7 @@ class UnTextIndex(Persistent): ...@@ -223,6 +228,7 @@ class UnTextIndex(Persistent):
else: else:
index[word] = i, score index[word] = i, score
unindex[i].append(word) unindex[i].append(word)
times = times + 1
unindex[i] = tuple(unindex[i]) unindex[i] = tuple(unindex[i])
l = len(unindex[i]) l = len(unindex[i])
...@@ -230,7 +236,7 @@ class UnTextIndex(Persistent): ...@@ -230,7 +236,7 @@ class UnTextIndex(Persistent):
self._index = index self._index = index
self._unindex = unindex self._unindex = unindex
return l return times
def unindex_object(self, i, tt=type(()) ): def unindex_object(self, i, tt=type(()) ):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment