Commit 87909446 authored by Tim Peters's avatar Tim Peters

These guys actually return IIBuckets, not IIBTrees. Rewrote the docs

and code accordingly.

Also rewrote the code to never create a new IIBucket unless one is
really needed (for example, if the input list has only one (bucket, weight)
pair, and the weight is 1, the input bucket can be returned as-is; and
this case will be common with score maps produced by an Okapi index).
parent 084bafb3
...@@ -14,35 +14,30 @@ ...@@ -14,35 +14,30 @@
"""SetOps -- Weighted intersections and unions applied to many inputs.""" """SetOps -- Weighted intersections and unions applied to many inputs."""
from BTrees.IIBTree import IIBTree, weightedIntersection, weightedUnion from BTrees.IIBTree import IIBucket, weightedIntersection, weightedUnion
from Products.ZCTextIndex.NBest import NBest from Products.ZCTextIndex.NBest import NBest
def mass_weightedIntersection(L): def mass_weightedIntersection(L):
"A list of (mapping, weight) pairs -> their weightedIntersection IIBTree." "A list of (mapping, weight) pairs -> their weightedIntersection IIBucket."
L = [(map, weight) for (map, weight) in L if map is not None] L = [(x, wx) for (x, wx) in L if x is not None]
if not L: if len(L) < 2:
return IIBTree() return _trivial(L)
# Intersect with smallest first. # Intersect with smallest first. We expect the input maps to be
# IIBuckets, so it doesn't hurt to get their lengths repeatedly
# (len(Bucket) is fast; len(BTree) is slow).
L.sort(lambda x, y: cmp(len(x[0]), len(y[0]))) L.sort(lambda x, y: cmp(len(x[0]), len(y[0])))
x, w = L[0] (x, wx), (y, wy) = L[:2]
dummy, result = weightedUnion(IIBTree(), x, 1, w) dummy, result = weightedIntersection(x, y, wx, wy)
for x, w in L[1:]: for x, wx in L[2:]:
dummy, result = weightedIntersection(result, x, 1, w) dummy, result = weightedIntersection(result, x, 1, wx)
return result return result
def mass_weightedUnion(L): def mass_weightedUnion(L):
"A list of (mapping, weight) pairs -> their weightedUnion IIBTree." "A list of (mapping, weight) pairs -> their weightedUnion IIBucket."
if not L: if len(L) < 2:
return IIBTree() return _trivial(L)
if len(L) == 1:
# Have to do a union in order to get the input's values
# multiplied by the weight.
x, weight = L[0]
dummy, result = weightedUnion(IIBTree(), x, 1, weight)
return result
# Balance unions as closely as possible, smallest to largest. # Balance unions as closely as possible, smallest to largest.
assert len(L) > 1
merge = NBest(len(L)) merge = NBest(len(L))
for x, weight in L: for x, weight in L:
merge.add((x, weight), len(x)) merge.add((x, weight), len(x))
...@@ -54,3 +49,14 @@ def mass_weightedUnion(L): ...@@ -54,3 +49,14 @@ def mass_weightedUnion(L):
merge.add((z, 1), len(z)) merge.add((z, 1), len(z))
(result, weight), dummy = merge.pop_smallest() (result, weight), dummy = merge.pop_smallest()
return result return result
def _trivial(L):
# L is empty or has only one (mapping, weight) pair. If there is a
# pair, we may still need to multiply the mapping by its weight.
assert len(L) <= 1
if len(L) == 0:
return IIBucket()
[(result, weight)] = L
if weight != 1:
dummy, result = weightedUnion(IIBucket(), result, 0, weight)
return result
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment