Commit b96667eb authored by Casey Duncan's avatar Casey Duncan

Added management interface to query words in the lexicon and in the process...

Added management interface to query words in the lexicon and in the process uncovered a BTreeItems bug, whee!
parent 560ace9d
......@@ -200,8 +200,9 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem):
meta_type = 'ZCTextIndex Lexicon'
manage_options = ({'label':'Overview', 'action':'manage_main'},) + \
manage_options = ({'label':'Overview', 'action':'manage_main'},
{'label':'Query', 'action':'queryLexicon'},
) + SimpleItem.manage_options
def __init__(self, id, title='', *pipeline): = str(id)
......@@ -213,7 +214,50 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem):
def getPipelineNames(self):
"""Return list of names of pipeline element classes"""
return [element.__class__.__name__ for element in self._pipeline]
_queryLexicon = DTMLFile('dtml/queryLexicon', globals())
def queryLexicon(self, REQUEST, words=None, page=0, rows=20, cols=4):
"""Lexicon browser/query user interface
if words:
wids = []
for word in words:
words = [self.get_word(wid) for wid in wids]
words = self.words()
word_count = len(words)
rows = max(min(rows, 500),1)
cols = max(min(cols, 12), 1)
page_count = word_count / (rows * cols) + \
(word_count % (rows * cols) > 0)
page = max(min(page, page_count - 1), 0)
start = rows * cols * page
end = min(rows * cols * (page + 1), word_count)
if word_count:
words = list(words[start:end])
words = []
columns = []
i = 0
while i < len(words):
columns.append(words[i:i + rows])
i += rows
return self._queryLexicon(self, REQUEST,
manage_main = DTMLFile('dtml/manageLexicon', globals())
......@@ -10,12 +10,12 @@
<span class="form-label">Input Pipeline Stages</span>
<p class="form-help">
Text indexed through this lexicon is processed by the following pipeline
<ol class="form-help">
<dtml-in name="getPipelineNames">
......@@ -2,8 +2,12 @@
<dtml-var manage_tabs>
<p class="form-help">
There is nothing to manage here. Move along.
The ZCTextIndex Lexicon in use by this index is:
<em><dtml-var expr="lexicon.getId()"></em>
<p class="form-help">
<em>Note:</em> You cannot change the lexicon assigned to a ZCTextIndex.
To use another lexicon, delete this index and create a new one that
uses the desired lexicon.
<dtml-var manage_page_footer>
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<p class="form-help">
Browse the words in the lexicon or enter the word(s) you are interested in
below. Globbing characters (*, ?) are supported
<dtml-let words_str="' '.join(REQUEST.get('words',[]))">
<form action="&dtml-URL;">
<p class="form-element">
<span class="form-label">Word(s)</span>
<input name="words:tokens" size="20" value="&dtml-words_str;" />
<input type="submit" value="Query" />
<span class="form-label">&nbsp;Output Columns:</span>
<input name="cols:int" size="2" value="&dtml-cols;" />
<span class="form-label">&nbsp;Rows:</span>
<input name="rows:int" size="2" value="&dtml-rows;" />
<hr />
<form action="&dtml-URL;">
<table width="100%" cellpadding="2" cellspacing="0" border="0">
<tr class="section-bar">
<td><span class="form-label">
&dtml-word_count; Words Found<dtml-if word_count>,
Displaying &dtml-start_word;-&dtml-end_word;
<dtml-if expr="page_count > 0">
<td align="right"><span class="form-label">
<select name="page:int" onchange="this.form.submit()">
<dtml-in expr="_.range(page_count)" prefix="page">
<option value="&dtml-page_item;"
<dtml-if expr="page == page_item">
<dtml-var expr="page_item+1">
of &dtml-page_count;
<input type="submit" value="Go" />
<input type="hidden" name="cols:int" value="&dtml-cols;" />
<input type="hidden" name="rows:int" value="&dtml-rows;" />
<input type="hidden" name="words:tokens" value="&dtml-words_str;" />
<dtml-if name="page_columns">
<table width="100%" cellpadding="0" cellspacing="10" border="0">
<dtml-in name="page_columns" prefix="column">
<td align="left" valign="top">
<dtml-var expr="'<br />'.join(column_item)">
<dtml-var manage_page_footer
......@@ -25,14 +25,13 @@ ZCTextIndex Lexicon - Add: Create a new ZCTextIndex Lexicon
tags. The HTML aware splitter gives best results when all of
the incoming content to index is HTML.
- **Stop Words** To conserve space in the vocabulary, and possibly increase
performance, you can select a stop word remover which subtracts
very common or single letter words from the Lexicon. Bear in
mind that you will not be able to search on removed stop words,
and they will also be removed from queries passed to search
ZCTextIndexes using the Lexicon.
- **Stop Words** To conserve space in the vocabulary, and possibly
increase performance, you can select a stop word remover which
subtracts very common or single letter words from the Lexicon.
Bear in mind that you will not be able to search on removed stop
words, and they will also be removed from queries passed to
search ZCTextIndexes using the Lexicon.
- **Case Normalizer** The case normalizer removes case information from the words in
the Lexicon. If case-sensitive searching is desires, then omit
this element from the pipeline.
- **Case Normalizer** The case normalizer removes case information
from the words in the Lexicon. If case-sensitive searching is
desires, then omit this element from the pipeline.
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment