Commit 6cb8ab91 authored by Tim Peters's avatar Tim Peters

Compute scaled_int the same way everywhere.

parent 85236fcc
......@@ -21,28 +21,14 @@ from BTrees.IIBTree import IIBTree, IIBucket
from Products.ZCTextIndex.IIndex import IIndex
from Products.ZCTextIndex import WidCode
from Products.ZCTextIndex.BaseIndex import BaseIndex, inverse_doc_frequency
from Products.ZCTextIndex.BaseIndex import BaseIndex, \
inverse_doc_frequency, \
scaled_int, SCALE_FACTOR
from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \
mass_weightedUnion
import ZODB
# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
# is large enough to get about 3 decimal digits of fractional info, and
# small enough so that scaled values should almost always fit in a signed
# 16-bit int (we're generally storing logs, so a few bits before the radix
# point goes a long way; on the flip side, for reasonably small numbers x
# most of the info in log(x) is in the fractional bits, so we do want to
# save a lot of those).
SCALE_FACTOR = 1024.0
def scaled_int(f, scale=SCALE_FACTOR):
# We expect only positive inputs, so "add a half and chop" is the
# same as round(). Surprising, calling round() is significantly more
# expensive.
return int(f * scale + 0.5)
class CosineIndex(BaseIndex):
__implements__ = IIndex
......
......@@ -24,28 +24,14 @@ from BTrees.IIBTree import IIBTree, IIBucket
from Products.ZCTextIndex.IIndex import IIndex
from Products.ZCTextIndex import WidCode
from Products.ZCTextIndex.BaseIndex import BaseIndex, inverse_doc_frequency
from Products.ZCTextIndex.BaseIndex import BaseIndex, \
inverse_doc_frequency, \
scaled_int
from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \
mass_weightedUnion
import ZODB
# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
# is large enough to get about 3 decimal digits of fractional info, and
# small enough so that scaled values should almost always fit in a signed
# 16-bit int (we're generally storing logs, so a few bits before the radix
# point goes a long way; on the flip side, for reasonably small numbers x
# most of the info in log(x) is in the fractional bits, so we do want to
# save a lot of those).
SCALE_FACTOR = 1024.0
def scaled_int(f, scale=SCALE_FACTOR):
# We expect only positive inputs, so "add a half and chop" is the
# same as round(). Surprising, calling round() is significantly more
# expensive.
return int(f * scale + 0.5)
class OkapiIndex(BaseIndex):
__implements__ = IIndex
......
from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex
from Products.ZCTextIndex.tests \
import testIndex, testQueryEngine, testQueryParser
from Products.ZCTextIndex.CosineIndex import scaled_int, SCALE_FACTOR
from Products.ZCTextIndex.BaseIndex import scaled_int, SCALE_FACTOR
from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment