Commit be8b6d9b authored by Jeremy Hylton's avatar Jeremy Hylton

Try to handle the case where a wid has no wordinfo.

This case can arise when the last occurence of a word is removed, or
when a lexicon is shared across multiple indexes.

XXX Not sure this code is correct, but it might be and the tests pass.
If it's wrong, we need more tests.
parent ea795e5a
...@@ -147,7 +147,11 @@ class CosineIndex(Persistent): ...@@ -147,7 +147,11 @@ class CosineIndex(Persistent):
L = [] L = []
DictType = type({}) DictType = type({})
for wid in wids: for wid in wids:
d2w = self._wordinfo[wid] # maps docid to w(docid, wid) d2w = self._wordinfo.get(wid) # maps docid to w(docid, wid)
if d2w is None:
# Need a test case to cover this
L.append((IIBucket(), scaled_int(1)))
continue
idf = query_term_weight(len(d2w), N) # this is an unscaled float idf = query_term_weight(len(d2w), N) # this is an unscaled float
#print "idf = %.3f" % idf #print "idf = %.3f" % idf
if isinstance(d2w, DictType): if isinstance(d2w, DictType):
...@@ -165,7 +169,10 @@ class CosineIndex(Persistent): ...@@ -165,7 +169,10 @@ class CosineIndex(Persistent):
for wid in wids: for wid in wids:
if wid == 0: if wid == 0:
continue continue
wt = math.log(1.0 + N / len(self._wordinfo[wid])) map = self._wordinfo.get(wid)
if map is None:
continue
wt = math.log(1.0 + N / len(map))
sum += wt ** 2.0 sum += wt ** 2.0
return scaled_int(math.sqrt(sum)) return scaled_int(math.sqrt(sum))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment