Commit 6cb8ab91 authored by Tim Peters's avatar Tim Peters

Compute scaled_int the same way everywhere.

parent 85236fcc
...@@ -21,28 +21,14 @@ from BTrees.IIBTree import IIBTree, IIBucket ...@@ -21,28 +21,14 @@ from BTrees.IIBTree import IIBTree, IIBucket
from Products.ZCTextIndex.IIndex import IIndex from Products.ZCTextIndex.IIndex import IIndex
from Products.ZCTextIndex import WidCode from Products.ZCTextIndex import WidCode
from Products.ZCTextIndex.BaseIndex import BaseIndex, inverse_doc_frequency from Products.ZCTextIndex.BaseIndex import BaseIndex, \
inverse_doc_frequency, \
scaled_int, SCALE_FACTOR
from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \ from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \
mass_weightedUnion mass_weightedUnion
import ZODB import ZODB
# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
# is large enough to get about 3 decimal digits of fractional info, and
# small enough so that scaled values should almost always fit in a signed
# 16-bit int (we're generally storing logs, so a few bits before the radix
# point goes a long way; on the flip side, for reasonably small numbers x
# most of the info in log(x) is in the fractional bits, so we do want to
# save a lot of those).
SCALE_FACTOR = 1024.0
def scaled_int(f, scale=SCALE_FACTOR):
# We expect only positive inputs, so "add a half and chop" is the
# same as round(). Surprising, calling round() is significantly more
# expensive.
return int(f * scale + 0.5)
class CosineIndex(BaseIndex): class CosineIndex(BaseIndex):
__implements__ = IIndex __implements__ = IIndex
......
...@@ -24,28 +24,14 @@ from BTrees.IIBTree import IIBTree, IIBucket ...@@ -24,28 +24,14 @@ from BTrees.IIBTree import IIBTree, IIBucket
from Products.ZCTextIndex.IIndex import IIndex from Products.ZCTextIndex.IIndex import IIndex
from Products.ZCTextIndex import WidCode from Products.ZCTextIndex import WidCode
from Products.ZCTextIndex.BaseIndex import BaseIndex, inverse_doc_frequency from Products.ZCTextIndex.BaseIndex import BaseIndex, \
inverse_doc_frequency, \
scaled_int
from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \ from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \
mass_weightedUnion mass_weightedUnion
import ZODB import ZODB
# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
# is large enough to get about 3 decimal digits of fractional info, and
# small enough so that scaled values should almost always fit in a signed
# 16-bit int (we're generally storing logs, so a few bits before the radix
# point goes a long way; on the flip side, for reasonably small numbers x
# most of the info in log(x) is in the fractional bits, so we do want to
# save a lot of those).
SCALE_FACTOR = 1024.0
def scaled_int(f, scale=SCALE_FACTOR):
# We expect only positive inputs, so "add a half and chop" is the
# same as round(). Surprising, calling round() is significantly more
# expensive.
return int(f * scale + 0.5)
class OkapiIndex(BaseIndex): class OkapiIndex(BaseIndex):
__implements__ = IIndex __implements__ = IIndex
......
from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex
from Products.ZCTextIndex.tests \ from Products.ZCTextIndex.tests \
import testIndex, testQueryEngine, testQueryParser import testIndex, testQueryEngine, testQueryParser
from Products.ZCTextIndex.CosineIndex import scaled_int, SCALE_FACTOR from Products.ZCTextIndex.BaseIndex import scaled_int, SCALE_FACTOR
from Products.ZCTextIndex.CosineIndex import CosineIndex from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex from Products.ZCTextIndex.OkapiIndex import OkapiIndex
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment