Commit 98f2a885 authored by Christopher Petrilli's avatar Christopher Petrilli

Merge in of some changes to the Catalog, mostly things that are appearance

driven.
parent fbb0741f
......@@ -595,7 +595,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Perform searches with indexes and sort_index
r=[]
used=self._indexedSearch(kw, sort_index, r.append, used)
if not r: return r
if not r: return LazyCat(r)
# Sort/merge sub-results
if len(r)==1:
......
......@@ -82,8 +82,8 @@
# attributions are listed in the accompanying credits file.
#
##############################################################################
__doc__='''$Id: Lazy.py,v 1.2 2000/05/11 18:54:16 jim Exp $'''
__version__='$Revision: 1.2 $'[11:-2]
__doc__='''$Id: Lazy.py,v 1.3 2001/01/15 16:29:23 petrilli Exp $'''
__version__='$Revision: 1.3 $'[11:-2]
class Lazy:
......@@ -107,6 +107,34 @@ class Lazy:
self._len=l
return l
def __add__(self, other):
try:
for base in other.__class__.__bases__:
if base.__name__ == 'Lazy':
break
else:
raise TypeError
except:
raise TypeError, "Can not concatenate objects. Both must be lazy sequences."
if self.__class__.__name__ == 'LazyCat':
if hasattr(self, '_seq'):
seq = self._seq
else:
seq = [self._data]
else:
seq = [self]
if other.__class__.__name__ == 'LazyCat':
if hasattr(other, '_seq'):
seq = seq + other._seq
else:
seq.append(other._data)
else:
seq.append(other)
return LazyCat(seq)
def __getslice__(self,i1,i2):
r=[]
for i in range(i1,i2):
......
......@@ -163,14 +163,14 @@ class ZCatalog(Folder, Persistent, Implicit):
'action': 'manage_catalogSchema',
'target':'manage_main',
'help':('ZCatalog','ZCatalog_MetaData-Table.stx')},
{'label': 'Status', # TAB: Status
'action': 'manage_catalogStatus',
'target':'manage_main',
'help':('ZCatalog','ZCatalog_Status.stx')},
{'label': 'Find Objects', # TAB: Find Objects
'action': 'manage_catalogFind',
'target':'manage_main',
'help':('ZCatalog','ZCatalog_Find-Items-to-ZCatalog.stx')},
{'label': 'Advanced', # TAB: Advanced
'action': 'manage_catalogAdvanced',
'target':'manage_main',
'help':('ZCatalog','ZCatalog_Advanced.stx')},
{'label': 'Undo', # TAB: Undo
'action': 'manage_UndoForm',
'help': ('OFSP','Undo.stx')},
......@@ -190,7 +190,7 @@ class ZCatalog(Folder, Persistent, Implicit):
'manage_catalogView', 'manage_catalogFind',
'manage_catalogSchema', 'manage_catalogIndexes',
'manage_catalogStatus',
'manage_catalogAdvanced',
'manage_catalogReindex', 'manage_catalogFoundItems',
'manage_catalogClear', 'manage_addColumn', 'manage_delColumns',
......@@ -211,7 +211,7 @@ class ZCatalog(Folder, Persistent, Implicit):
manage_catalogFind = DTMLFile('dtml/catalogFind',globals())
manage_catalogSchema = DTMLFile('dtml/catalogSchema', globals())
manage_catalogIndexes = DTMLFile('dtml/catalogIndexes', globals())
manage_catalogStatus = DTMLFile('dtml/catalogStatus', globals())
manage_catalogAdvanced = DTMLFile('dtml/catalogAdvanced', globals())
threshold=10000
......@@ -271,7 +271,7 @@ class ZCatalog(Folder, Persistent, Implicit):
else:
self.threshold = 10000
RESPONSE.redirect(URL1 + '/manage_catalogStatus?manage_tabs_message=Catalog%20Changed')
RESPONSE.redirect(URL1 + '/manage_catalogAdvanced?manage_tabs_message=Catalog%20Changed')
def manage_catalogObject(self, REQUEST, RESPONSE, URL1, urls=None):
......@@ -445,7 +445,7 @@ class ZCatalog(Folder, Persistent, Implicit):
def _searchable_arguments(self):
r = {}
n={'optional':1}
for name in self._catalog.schema.keys():
for name in self._catalog.indexes.keys():
r[name]=n
return r
......
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<br />
<table width="100%" cellspacing="0" cellpadding="2" border="0">
<tr class="section-bar">
<td colspan="2" align="left">
<div class="form-label">
Catalog Maintenance
</div>
</td>
</tr>
<tr>
<td align="left" valign="top">
<p class="form-help"> Updating the catalog will update all catalog
records and remove invalid records. It does this by deleting all
indexes and re-cataloging all currently indexed objects.
</p>
</td>
<td align="right" valign="top">
<form action="<dtml-var URL1>">
<input class="form-element" type="submit"
name="manage_catalogReindex:method" value=" Update Catalog ">
</form>
</td>
</tr>
<tr>
<td align="left" valign="top">
<p class="form-help">Clearing the catalog will remove all entries.
</p>
</td>
<td align="right" valign="top">
<form action="<dtml-var URL1>">
<input class="form-element" type="submit"
name="manage_catalogClear:method" value=" Clear Catalog ">
</form>
</td>
</tr>
<tr>
<td>
</td>
</tr>
<tr class="section-bar">
<td colspan="2" align="left">
<div class="form-label">
Subtransactions
</div>
</td>
</tr>
<tr>
<td colspan="2" align="left" valign="top">
<p class="form-help"> Subtransactions allow Zope to commit small
parts of a transaction over a period of time instead of all at
once. For ZCatalog, this means using subtransactions can
signficantly reduce the memory requirements needed to index huge
amounts of text all at once. Currently, subtransactions are only
applied to text indexes.</p>
<p class="form-help"> If enabled, subtransactions will reduce the memory
requirements of ZCatalog, but <em>at the expense of speed</em>.
If you choose to enable subtransactions, you can adjust how often
ZCatalog commits a subtransactions by adjusting the
<em>threshold</em> below.</p>
<p class="form-help"> If you are using ZCatalog and ZSQL Methods
in the same transaction, you <em>must</em> disable
subtransactions, they are not compatible with ZSQL Methods.</p>
<br />
</td>
</tr>
<tr>
<td align="left" valign="top">
<p>Subtransactions are
<dtml-if threshold>
<font color="green"><b>Enabled</b></font>
<dtml-else>
<font color="red"><b>Disabled</b></font>
</dtml-if></p>
</td>
<td align="right" valign="top">
<form action="<dtml-var URL1>" method="POST">
<div class="form-element">
<dtml-if threshold>
<input class="form-element" type="submit"
name="manage_subbingToggle:method"
value="Disable" />
<dtml-else>
<input class="form-element" type="submit"
name="manage_subbingToggle:method"
value="Enable" />
</dtml-if>
</div>
</form>
</td>
</tr>
<dtml-if threshold>
<tr>
<td align="left" valign="top">
<p class="form-help">The Subtransaction threshold is the number of words the catalog
will index before it commits a subtransaction. If this number
is low, the Catalog will take longer to index but consume less
memory. If this number is higher, the Catalog will index
quickly but consume much more memory.</p>
</td>
<td align="right" valign="top">
<form action="manage_edit" method=POST>
<div class="form-element">
<input name="threshold:int" value="<dtml-var
threshold html_quote>" />
<input type="submit" name="submit" value="Set Threshold">
</div>
</form>
</dtml-if>
</td>
</tr>
</table>
<dtml-var manage_page_footer>
......@@ -28,16 +28,33 @@ that have one or more keywords specified in a search query.
</p>
<form action="<dtml-var URL1>" method="post">
<table cellspacing="0" cellpadding="2" border="0">
<dtml-in index_objects sort=id>
<tr>
<td align="left" valign="top">
<dtml-if name="sequence-start">
<table width="100%" cellspacing="0" cellpadding="2" border="0">
<tr class="list-header">
<td width="1%" align="right" valign="top">&nbsp;</td>
<td width="64%" align="left" valign="top">
<div class="list-item">Index Name</div></td>
<td width="20%" align="right" valign="top">
<div class="list-item">Index Type</div></td>
<td width="15%" align="right" valign="top">
<div class="list-item">Objects Indexed</div></td>
</tr>
</dtml-if>
<dtml-if name="sequence-odd"><tr class="row-normal">
<dtml-else><tr class="row-hilite"></dtml-if>
<td align="right" valign="top">
<input type="checkbox" name="names:list" value="<dtml-var
id html_quote>" />
</td>
<td align="left" valign="top">
<div class="form-text">
<dtml-var id> (<em><dtml-var meta_type></em>)
<td width="60%" align="left" valign="top">
<div class="list-item">
<a href="">&dtml-id;</a></div></td>
<td width="20%" align="right" valign="top">
<div class="list-item">&dtml-meta_type;</div></td>
<td width="15%" align="right" valign="top">
<div class="list-item">
<dtml-var expr="_.len(_['sequence-item'])" thousands_commas>
</div>
</td>
</tr>
......
......@@ -3,25 +3,13 @@
<dtml-if searchResults>
<p class="form-help">
Updating the catalog will update all catalog records and remove
invalid records. Clearing the catalog will remove all entries.
You can also remove or update individual catalog records.
</p>
<form action="<dtml-var URL1>">
<div class="form-element">
<input class="form-element" type="submit"
name="manage_catalogReindex:method" value=" Update Catalog ">
<input class="form-element" type="submit"
name="manage_catalogClear:method" value=" Clear Catalog ">
</form>
<form action="<dtml-var name="URL1">">
<p class="form-text">
<dtml-var id> contains <dtml-var
searchResults fmt=collection-length thousands_commas> record(s).
</p>
<div class="form-text">
<dtml-in searchResults previous size=20 start=query_start >
<a href="<dtml-var URL>?query_start=<dtml-var previous-sequence-start-number>">
[Previous <dtml-var previous-sequence-size> entries]
......@@ -32,35 +20,39 @@ You can also remove or update individual catalog records.
[Next <dtml-var next-sequence-size> entries]
</a>
</dtml-in>
<form action="<dtml-var URL1>">
<input type="hidden" name="individual" value="1">
<div class="form-element">
<input class="form-element" type="submit" value=" Remove "
name="manage_uncatalogObject:method">
<input class="form-element" type="submit" value=" Update "
name="manage_catalogObject:method">
</div>
<table cellspacing="0" cellpadding="2" border="0">
</div>
<table width="100%" cellspacing="0" cellpadding="2" border="0">
<dtml-in searchResults size=20 start=query_start >
<tr>
<td align="left" valign="top">
<input type="checkbox" NAME="urls:list" VALUE="<dtml-var
"getpath(data_record_id_)" html_quote>">
<dtml-if name="sequence-start">
<tr class="list-header">
<td width="5%" align="right" colspan="2" valign="top">&nbsp;</td>
<td width="80%" align="left" valign="top">
<div class="list-item">Object Identifier</div></td>
<td width="15%" align="left" valign="top">
<div class="list-item">Type</div></td>
</tr>
</dtml-if>
<dtml-if name="sequence-odd"><tr class="row-normal">
<dtml-else><tr class="row-hilite"></dtml-if>
<td align="right" valign="top">
<input type="checkbox" NAME="urls:list" VALUE="&dtml-getPath;">
</td>
<td align="left" valign="top">&nbsp;</td>
<td align="left" valign="top">
<div class="form-text">
<dtml-var meta_type>
<a href="<dtml-var
"getURL" url_quote>/manage_workspace">&dtml-getPath;</a>
</div>
</td>
<td align="left" valign="top">
<div class="form-text">
<a href="<dtml-var
"getpath(data_record_id_)" url_quote>/manage_workspace"><dtml-var
"getpath(data_record_id_)"><dtml-if title> (<dtml-var
title>)</dtml-if></a>
<dtml-with name="aq_self" only>
<dtml-if name="meta_type">
<dtml-var name="meta_type" size="15">
<dtml-else>
<i>Unknown</i>
</dtml-if>
</dtml-with>
</div>
</td>
</tr>
......@@ -74,19 +66,6 @@ You can also remove or update individual catalog records.
name="manage_catalogObject:method">
</div>
</form>
<dtml-in searchResults previous size=20 start=query_start >
<a href="<dtml-var URL>?query_start=<dtml-var previous-sequence-start-number>">
[Previous <dtml-var previous-sequence-size> entries]
</a>
</dtml-in>
<dtml-in searchResults next size=20 start=query_start >
<a href="<dtml-var URL>?query_start=<dtml-var next-sequence-start-number>">
[Next <dtml-var next-sequence-size> entries]
</a>
</dtml-in>
<dtml-else>
<p class="form-text">
There are no objects in the Catalog.
......
......@@ -4,6 +4,8 @@ Vocabulary
Vocabularies display a list of all the unique words that are
indexed by a Catalog or Catalogs that use this Vocabulary object.
This view allows you to browse all of the words in the Vocabulary
This view allows you to browse all of the words in the Vocabulary.
Currently, Vocabularies are only used by TextIndexes.
......@@ -5,4 +5,3 @@ Vocabulary - Query: Query Vocabulary
This form lets you query the Vocabulary for words that match a
certain pattern.
Controls
......@@ -2,8 +2,8 @@ Vocabulary - Vocabulary: Description here
Description
Details here
Vocabularies display a list of all the unique words that are
indexed by a Catalog or Catalogs that use this Vocabulary object.
This view allows you to browse all of the words in the Vocabulary.
Controls
Details here
Currently, Vocabularies are only used by TextIndexes.
ZCatalog - Advanced: Control advanced Catalog features
Description
This view allows you to control advanced catalog features.
Subtransactions reduce the memory requirements of ZCatalog,
but at the expense of speed. If you choose to enable
subtransactions, you can adjust how often ZCatalog commits a
subtransactions by adjusting the threshold. The index status
reports how many objects are cataloged in each index.
Controls
'Update Catalog' -- Clicked, it will update the meta data for
all of the objects in the Catalog. **Note:** The Catalog is
not being updated after each transaction.
'Clear Catalog' -- Clicked, all objects are removed from the
Catalog.
'Enable/Disable' -- If enables, the Subtransaction system is
enabled and the threshold specified below is used.
'Set Threshold' -- Allows you to specify how often the
ZCatalog will commit the subtransactions. The default value is
10000.
......@@ -5,32 +5,26 @@ ZCatalog - Cataloged Objects: Manage catalog entries
This view allows you to manage catalog entries.
The catalog holds records which refer to Zope objects. If the
catalog currently contains records they will be listed in this view.
catalog currently contains records they will be listed in this
view.
Controls
'Update Catalog' -- Clicked, it will update the meta data for all of the objects in the
Catalog.
**Note:** The Catalog is not being updated after each transaction.
'[List of objects]' -- Each lines describes one object. The
checkbox in the front of the line, allows you to checkmark the
object for deletion ('Delete') or to update the objects
('Update'). Next the object type is listed, followed by the
object path and the object title. Clicking on the path and
title of the object, you can examine individual catalog
records by clicking on the record name links.
'Clear Catalog' -- Clicked, all objects are removed from the Catalog.
'Next/Previos XX entries' -- Allows you to see more of the
cataloged objects. These two links will only appear if you
have enough objects in the Catalog.
'Size' -- Indicates the amount of objects in the Catalog.
'Update' -- Clicked, Zope updates its indexes and metadata for
the object.
'[List of objects]' -- Each lines describes one object. The checkbox in the front of the
line, allows you to checkmark the object for deletion
('Delete') or to update the objects ('Update'). Next
the object type is listed, followed by the object path and the
object title. Clicking on the path and title of the object, you
can examine individual catalog records by clicking on the record
name links.
'Next/Previos XX entries' -- Allows you to see more of the cataloged objects. These teo links
will only appear if you have enough objects in the Catalog.
'Update' -- Clicked, Zope greps the newest content for each checked object
and updates the meta data (the cataloged attributes of the object)
in the Catalog.
'Remove' -- Clicked, it removes all the selected objects from the Catalog.
**Note:** No objects are deleted from the database.
'Remove' -- Clicked, it removes all the selected objects from
the Catalog. **Note:** No objects are deleted from the
database.
......@@ -85,7 +85,7 @@
"""Simple column indices"""
__version__='$Revision: 1.20 $'[11:-2]
__version__='$Revision: 1.21 $'[11:-2]
from Globals import Persistent
......@@ -154,6 +154,17 @@ class UnIndex(Persistent, Implicit):
def __len__(self):
return len(self._unindex)
def histogram(self):
"""Return a mapping which provides a histogram of the number of
elements found at each point in the index."""
histogram = {}
for (key, value) in self._index.items():
entry = len(value)
histogram[entry] = histogram.get(entry, 0) + 1
return histogram
def removeForwardIndexEntry(self, entry, documentId):
"""Take the entry provided and remove any reference to documentId
in its entry in the index."""
......
......@@ -92,7 +92,7 @@ is no longer known.
"""
__version__='$Revision: 1.31 $'[11:-2]
__version__='$Revision: 1.32 $'[11:-2]
from Globals import Persistent
......@@ -200,10 +200,72 @@ class UnTextIndex(Persistent, Implicit):
self._unindex = IOBTree()
def histogram(self):
"""Return a mapping which provides a histogram of the number of
elements found at each point in the index."""
histogram = {}
for (key, value) in self._index.items():
entry = len(value)
histogram[entry] = histogram.get(entry, 0) + 1
return histogram
def insertForwardIndexEntry(self, entry, documentId, score=1):
"""Uses the information provided to update the indexes.
The basic logic for choice of data structure is based on
the number of entries as follows:
1 tuple
2-4 dictionary
5+ bucket.
"""
indexRow = self._index.get(entry, None)
if indexRow is not None:
if type(indexRow) is TupleType:
# Tuples are only used for rows which have only
# a single entry. Since we now need more, we'll
# promote it to a mapping object (dictionary).
indexRow = { indexRow[0]: indexRow[1] }
indexRow[documentId] = score
self._index[entry] = indexRow
elif type(indexRow) is DictType:
if len(indexRow) > 4:
# We have a mapping (dictionary), but it has
# grown too large, so we'll convert it to a
# bucket.
newRow = IIBucket()
for (k, v) in indexRow.items():
newRow[k] = v
indexRow = newRow
indexRow[documentId] = score
self._index[entry] = indexRow
else:
indexRow[documentId] = score
else:
# We've got a IIBucket already.
indexRow[documentId] = score
else:
# We don't have any information at this point, so we'll
# put our first entry in, and use a tuple to save space
self._index[entry] = (documentId, score)
return 1
def insertReverseIndexEntry(self, entry, documentId):
"""Insert the correct entry into the reverse indexes for future
unindexing."""
newEntry = self._unindex.get(documentId, [])
newEntry.append(entry)
self._unindex[documentId] = newEntry
def index_object(self, documentId, obj, threshold=None):
""" Index an object:
'documentId' is the integer id of the document
'obj' is the objects to be indexed
......@@ -244,7 +306,7 @@ class UnTextIndex(Persistent, Implicit):
index = self._index
unindex = self._unindex
lexicon = self.getLexicon(self._lexicon)
unindex[documentId] = [] # XXX this should be more intellegent
unindex[documentId] = [] # XXX
wordCount = 0
for word, score in wordList.items():
......@@ -256,39 +318,10 @@ class UnTextIndex(Persistent, Implicit):
self._p_jar.cacheFullSweep(1)
wordId = lexicon.set(word)
indexRow = index.get(wordId)
if indexRow is not None:
indexRow = index[wordId] # Duplicate?
if type(indexRow) is TupleType:
indexRow = {indexRow[0]:indexRow[1]}
indexRow[documentId] = score
index[wordId] = indexRow
unindex[documentId].append(wordId)
elif type(indexRow) is DictType:
if len(indexRow) > 4:
b = IIBucket()
for k, v in indexRow.items():
b[k] = v
indexRow = b
indexRow[documentId] = score
index[wordId] = indexRow
unindex[documentId].append(wordId)
else:
indexRow[documentId] = score
unindex[documentId].append(wordId)
else:
index[wordId] = documentId, score
unindex[documentId].append(wordId)
self.insertForwardIndexEntry(wordId, documentId, score)
self.insertReverseIndexEntry(wordId, documentId)
wordCount = wordCount + 1
unindex[documentId] = tuple(unindex[documentId])
## return the number of words you indexed
return wordCount
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment