Commit 3135e213 authored by Guido van Rossum's avatar Guido van Rossum

Add usage message and -h option.

Add -w and -W option to dump the word list (by word and by wid,
respectively).

Except KeyboardInterrupt from unqualified except clauses.
parent 830d8ea9
#! /usr/bin/env python2.1 #! /usr/bin/env python2.1
"""MH mail indexer.""" """MH mail indexer.
To index messages from a single folder (messages defaults to 'all'):
mhindex.py [options] -u +folder [messages ...]
To bulk index all messages from several folders:
mhindex.py [options] -b folder ...
To execute a single query:
mhindex.py [options] query
To enter interactive query mode:
mhindex.py [options]
Common options:
-d FILE -- specify the Data.fs to use (default ~/.Data.fs)
-w -- dump the word list in alphabetical order and exit
-W -- dump the word list ordered by word id and exit
Indexing options:
-O -- do a prescan on the data to compute optimal word id assignments;
this is only useful the first time the Data.fs is used
-t N -- commit a transaction after every N messages (default 20000)
-p N -- pack after every N commits (by default no packing is done)
Querying options:
-m N -- show at most N matching lines from the message (default 3)
-n N -- show the N best matching messages (default 3)
"""
import os import os
import re import re
...@@ -36,10 +64,11 @@ MAXLINES = 3 ...@@ -36,10 +64,11 @@ MAXLINES = 3
def main(): def main():
try: try:
opts, args = getopt.getopt(sys.argv[1:], "bd:m:n:Op:t:u") opts, args = getopt.getopt(sys.argv[1:], "bd:hm:n:Op:t:uwW")
except getopt.error, msg: except getopt.error, msg:
print msg print msg
sys.exit(2) print "use -h for help"
return 2
update = 0 update = 0
bulk = 0 bulk = 0
optimize = 0 optimize = 0
...@@ -48,11 +77,15 @@ def main(): ...@@ -48,11 +77,15 @@ def main():
datafs = os.path.expanduser(DATAFS) datafs = os.path.expanduser(DATAFS)
pack = 0 pack = 0
trans = 20000 trans = 20000
dumpwords = dumpwids = 0
for o, a in opts: for o, a in opts:
if o == "-b": if o == "-b":
bulk = 1 bulk = 1
if o == "-d": if o == "-d":
datafs = a datafs = a
if o == "-h":
print __doc__
return
if o == "-m": if o == "-m":
maxlines = int(a) maxlines = int(a)
if o == "-n": if o == "-n":
...@@ -65,7 +98,17 @@ def main(): ...@@ -65,7 +98,17 @@ def main():
trans = ont(a) trans = ont(a)
if o == "-u": if o == "-u":
update = 1 update = 1
if o == "-w":
dumpwords = 1
if o == "-W":
dumpwids = 1
ix = Indexer(datafs, writable=update or bulk, trans=trans, pack=pack) ix = Indexer(datafs, writable=update or bulk, trans=trans, pack=pack)
if dumpwords:
ix.dumpwords()
if dumpwids:
ix.dumpwids()
if dumpwords or dumpwids:
return
if bulk: if bulk:
if optimize: if optimize:
ix.optimize(args) ix.optimize(args)
...@@ -127,6 +170,17 @@ class Indexer: ...@@ -127,6 +170,17 @@ class Indexer:
self.maxdocid = 0 self.maxdocid = 0
print len(self.docpaths), "Document ids" print len(self.docpaths), "Document ids"
print len(self.path2docid), "Pathnames" print len(self.path2docid), "Pathnames"
print self.index.lexicon.length(), "Words"
def dumpwids(self):
lexicon = self.index.lexicon
for wid in lexicon.wids():
print "%10d %s" % (wid, lexicon.get_word(wid))
def dumpwords(self):
lexicon = self.index.lexicon
for word in lexicon.words():
print "%10d %s" % (lexicon.get_wid(word), word)
def close(self): def close(self):
self.root = None self.root = None
...@@ -162,6 +216,8 @@ class Indexer: ...@@ -162,6 +216,8 @@ class Indexer:
continue continue
try: try:
results, n = self.timequery(text, top + nbest) results, n = self.timequery(text, top + nbest)
except KeyboardInterrupt:
raise
except: except:
reportexc() reportexc()
text = "" text = ""
...@@ -367,6 +423,8 @@ class Indexer: ...@@ -367,6 +423,8 @@ class Indexer:
self.getheaders(m, L) self.getheaders(m, L)
try: try:
self.getmsgparts(m, L, 0) self.getmsgparts(m, L, 0)
except KeyboardInterrupt:
raise
except: except:
print "(getmsgparts failed:)" print "(getmsgparts failed:)"
reportexc() reportexc()
...@@ -471,4 +529,4 @@ def reportexc(): ...@@ -471,4 +529,4 @@ def reportexc():
traceback.print_exc() traceback.print_exc()
if __name__ == "__main__": if __name__ == "__main__":
main() sys.exit(main())
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment