Commit 809596bd authored by Jeremy Hylton's avatar Jeremy Hylton

Change query() method of ZCTextIndex to return nbest, num.

Otherwise, there's no way to report the total number of matches from
which the nbest were selected.

I changed all the functional tests that I knew how to fix, but not the
mhindex stuff.  All unit tests pass.
parent 2594a81b
......@@ -93,13 +93,17 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
return results, (self._fieldname,)
def query(self, query, nbest=10):
# returns a mapping from docids to scores
"""Return pair (mapping from docids to scores, num results).
The num results is the total number of results before trimming
to the nbest results.
"""
tree = QueryParser().parseQuery(query)
results = tree.executeQuery(self.index)
chooser = NBest(nbest)
chooser.addmany(results.items())
return chooser.getbest()
return chooser.getbest(), len(results)
def numObjects(self):
"""Return number of object indexed"""
return self.index.length()
......
......@@ -11,6 +11,7 @@ options:
"""
import os
from time import clock
import ZODB
from ZODB.FileStorage import FileStorage
......@@ -29,15 +30,19 @@ def make_index():
extra.lexicon_id = "lexicon"
caller = Struct()
caller.lexicon = Lexicon(HTMLWordSplitter(), StopWordRemover())
return ZCTextIndex(extra, caller)
return ZCTextIndex("read", extra, caller)
def main(db, root, dir):
rt["index"] = index = make_index()
rt["files"] = paths = IOBTree()
get_transaction().commit()
zodb_time = 0.0
pack_time = 0.0
files = [os.path.join(dir, file) for file in os.listdir(dir)]
docid = 0
t0 = clock()
for file in files:
if os.path.isdir(file):
files += [os.path.join(file, sub) for sub in os.listdir(file)]
......@@ -51,10 +56,25 @@ def main(db, root, dir):
index.index_object(docid, f)
f.close()
if docid % TXN_INTERVAL == 0:
z0 = clock()
get_transaction().commit()
z1 = clock()
zodb_time += z1 - z0
if docid % PACK_INTERVAL == 0:
p0 = clock()
db.pack()
p1 = clock()
zodb_time += p1 - p0
zodb_time += p1 - p0
z0 = clock()
get_transaction().commit()
z1 = t1 = clock()
total_time = t1 - t0
zodb_time += z1 - z0
if VERBOSE:
print "Total index time", total_time
print "Non-pack time", total_time - pack_time
print "Non-ZODB time", total_time - zodb_time
if __name__ == "__main__":
import sys
......
......@@ -140,7 +140,7 @@ def index(rt, mboxfile, db):
def query(rt, query_str):
idx = rt["index"]
docs = rt["documents"]
results = idx.query(query_str, BEST)
results, num_results = idx.query(query_str, BEST)
print "query:", query_str
print "# results:", len(results)
for docid, score in results:
......
......@@ -114,7 +114,7 @@ class IndexTests(testIndex.IndexTest):
q = QueryParser().parseQuery(raw)
wq = self.index.query_weight(q.terms())
eq(wq, scaled_int(wqs[i]))
r = self.zc_index.query(raw)
r, n = self.zc_index.query(raw)
self.assertEqual(len(r), len(results[i]))
# convert the results to a dict for each checking
d = {}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment