Commit b279f61c authored by Jim Fulton's avatar Jim Fulton

added dcprotect

parent 4eafbd2e
......@@ -11,7 +11,7 @@
static char BTree_module_documentation[] =
""
"\n$Id: BTree.c,v 1.8 1997/11/03 15:17:53 jim Exp $"
"\n$Id: BTree.c,v 1.9 1997/11/13 20:38:35 jim Exp $"
;
#define PERSISTENT
......@@ -1720,7 +1720,7 @@ initBTree()
#endif
{
PyObject *m, *d;
char *rev="$Revision: 1.8 $";
char *rev="$Revision: 1.9 $";
UNLESS(PyExtensionClassCAPI=PyCObject_Import("ExtensionClass","CAPI"))
return;
......@@ -1765,6 +1765,8 @@ initBTree()
PyDict_SetItemString(d, "__version__",
PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));
#include "dcprotect.h"
/* Check for errors */
if (PyErr_Occurred())
......@@ -1780,6 +1782,9 @@ initBTree()
Revision Log:
$Log: BTree.c,v $
Revision 1.9 1997/11/13 20:38:35 jim
added dcprotect
Revision 1.8 1997/11/03 15:17:53 jim
Fixed stupid bug in has_key methods.
......
......@@ -10,7 +10,7 @@
static char intSet_module_documentation[] =
""
"\n$Id: intSet.c,v 1.3 1997/10/01 02:45:58 jim Exp $"
"\n$Id: intSet.c,v 1.4 1997/11/13 20:38:39 jim Exp $"
;
#include <limits.h>
......@@ -527,7 +527,7 @@ void
initintSet()
{
PyObject *m, *d;
char *rev="$Revision: 1.3 $";
char *rev="$Revision: 1.4 $";
UNLESS(ExtensionClassImported) return;
......@@ -555,6 +555,8 @@ initintSet()
PyDict_SetItemString(d, "__version__",
PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));
#include "dcprotect.h"
/* Check for errors */
if (PyErr_Occurred())
......@@ -566,6 +568,9 @@ initintSet()
Revision Log:
$Log: intSet.c,v $
Revision 1.4 1997/11/13 20:38:39 jim
added dcprotect
Revision 1.3 1997/10/01 02:45:58 jim
Minor reformat.
......
......@@ -11,7 +11,7 @@
static char BTree_module_documentation[] =
""
"\n$Id: BTree.c,v 1.8 1997/11/03 15:17:53 jim Exp $"
"\n$Id: BTree.c,v 1.9 1997/11/13 20:38:35 jim Exp $"
;
#define PERSISTENT
......@@ -1720,7 +1720,7 @@ initBTree()
#endif
{
PyObject *m, *d;
char *rev="$Revision: 1.8 $";
char *rev="$Revision: 1.9 $";
UNLESS(PyExtensionClassCAPI=PyCObject_Import("ExtensionClass","CAPI"))
return;
......@@ -1765,6 +1765,8 @@ initBTree()
PyDict_SetItemString(d, "__version__",
PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));
#include "dcprotect.h"
/* Check for errors */
if (PyErr_Occurred())
......@@ -1780,6 +1782,9 @@ initBTree()
Revision Log:
$Log: BTree.c,v $
Revision 1.9 1997/11/13 20:38:35 jim
added dcprotect
Revision 1.8 1997/11/03 15:17:53 jim
Fixed stupid bug in has_key methods.
......
This diff is collapsed.
__doc__='''\
Simple query parser for InvertedIndex.
Usage:
InvertedIndexQuery.query(query_string, InvertedIndex_Index_instance)
Query strings should be of the form
"keyword1 [and | or | and not] [keyword2 [and | or | and not] ... keywordN]"
Parentheses may be used to alter association rules.
A series of keywords may be surrounded by quotes to perform a proximity
search (InvertedIndex.ResultList.near)
$Id: InvertedIndexQuery.py,v 1.1 1997/09/15 17:25:47 jim Exp $'''
import string, regsub, regex
from types import *
QueryError = 'InvertedIndexQuery.QueryError'
AndNot = 'andnot'
And = 'and'
Or = 'or'
Proximity = 'proximity'
operator_dict = {
AndNot : AndNot,
And : And,
Or : Or,
Proximity : Proximity
}
ws = (string.whitespace,)
class Word:
'''Holds a parsed word in a query'''
def __init__(self, word):
self._word = word
def word(self):
return self._word
def __repr__(self):
return 'Word(%s)' % repr(self._word)
parens_regex = regex.compile("(\|)")
def parens(s):
'''Find the beginning and end of the first set of parentheses in string, s'''
if (parens_regex.search(s) < 0):
return None
if (parens_regex.group(0) == ")"):
raise QueryError, "Mismatched parentheses"
open = parens_regex.regs[0][0] + 1
start = parens_regex.regs[0][1]
p = 1
while (parens_regex.search(s, start) >= 0):
if (parens_regex.group(0) == ")"):
p = p - 1
else:
p = p + 1
start = parens_regex.regs[0][1]
if (p == 0):
return (open, parens_regex.regs[0][0])
raise QueryError, "Mismatched parentheses"
def parse2(q, default_operator):
'''Find operators and operands'''
i = 0
while (i < len(q)):
if (type(q[i]) is ListType):
q[i] = parse2(q[i], default_operator)
# every other item, starting with the first, should be an operand
if ((i % 2) == 0):
if (type(q[i]) is StringType):
q[i] = Word(q[i])
else:
# This word should be an operator; if it is not, splice in
# the default operator.
try:
q[i] = operator_dict[q[i]]
except:
q[i : i] = [ default_operator ]
i = i + 1
return q
def quotes(s):
# split up quoted regions
split = regsub.split(s, '[%s]*\"[%s]*' % (ws * 2))
if (len(split) > 1):
if ((len(split) % 2) == 0):
raise QueryError, "Mismatched quotes"
for i in range(1, len(split), 2):
# split the quoted region into words and make Word instances out of them
split[i] = map(Word, filter(None, regsub.split(split[i], '[%s]+' % ws)))
# put the Proxmity operator in between quoted words
for j in range(1, len(split[i])):
split[i][j : j] = [ Proximity ]
split = filter(None, split)
else:
# No quotes, so just split the string into words
split = filter(None, regsub.split(s, '[%s]+' % ws))
return split
def parse(s):
'''Parse parentheses and quotes'''
l = []
tmp = string.lower(s)
while (1):
p = parens(tmp)
if (p is None):
# No parentheses found. Look for quotes then exit.
l = l + quotes(tmp)
break
else:
# Look for quotes in the section of the string before
# the parentheses, then parse the string inside the parens
l = l + quotes(tmp[:(p[0] - 1)])
l.append(parse(tmp[p[0] : p[1]]))
# continue looking through the rest of the string
tmp = tmp[(p[1] + 1):]
return l
def get_operands(q, i, index):
'''Evaluate and return the left and right operands for an operator'''
try:
left = q[i - 1]
right = q[i + 1]
except IndexError:
raise QueryError, "Malformed query"
# A list is a parenthesized section, so evaluate those
# before anything else
if (type(left) is ListType):
left = evaluate(left, index)
if (type(right) is ListType):
right = evaluate(right, index)
# Do a search on each Word
try:
left = left.word()
except AttributeError:
pass
else:
left = index[left]
try:
right = right.word()
except AttributeError:
pass
else:
right = index[right]
# Return a pair of ResultLists
return (left, right)
def evaluate(q, index):
'''Evaluate a parsed query'''
if (len(q) == 1):
if (type(q[0]) is ListType):
return evaluate(q[0], index)
return index[q[0].word()]
i = 0
while (i < len(q)):
if q[i] is AndNot:
left, right = get_operands(q, i, index)
val = left.and_not(right)
q[(i - 1) : (i + 2)] = [ val ]
else:
i = i + 1
i = 0
while (i < len(q)):
if q[i] is And:
left, right = get_operands(q, i, index)
val = left & right
q[(i - 1) : (i + 2)] = [ val ]
else:
i = i + 1
i = 0
while (i < len(q)):
if q[i] is Or:
left, right = get_operands(q, i, index)
val = left | right
q[(i - 1) : (i + 2)] = [ val ]
else:
i = i + 1
i = 0
while (i < len(q)):
if q[i] is Proximity:
left, right = get_operands(q, i, index)
val = left.near(right)
q[(i - 1) : (i + 2)] = [ val ]
else:
i = i + 1
if (len(q) != 1):
raise QueryError, "Malformed query"
return q[0]
def query(s, index, default_operator = Or):
# First replace any occurences of " and not " with " andnot "
s = regsub.gsub('[%s]+and[%s]*not[%s]+' % (ws * 3), ' andnot ', s)
q = parse(s)
q = parse2(q, default_operator)
return evaluate(q, index)
# $Log: InvertedIndexQuery.py,v $
# Revision 1.1 1997/09/15 17:25:47 jim
# *** empty log message ***
#
# Revision 1.8 1997/05/15 14:10:46 jim
# Fixed bug in handling of or.
# Also added some quote escapes to make hilit19 happier.
#
# Revision 1.7 1997/04/30 16:27:39 jim
# Changed default operator to Or.
#
# Revision 1.6 1997/04/10 19:11:52 chris
# Added some documentation
#
/*
$Id: Query.c,v 1.4 1997/03/23 01:33:50 jim Exp $
$Id: Query.c,v 1.5 1997/11/13 20:38:37 jim Exp $
Query objects for building tests without python code generation.
......@@ -890,14 +890,14 @@ static char Query_module_documentation[] =
"against objects. Each object type define objects that can be called\n"
"with a single argument or with 'getitem' to check whether an object,\n"
"such as a database record or a collection item satisfies a query.\n"
"\n$Id: Query.c,v 1.4 1997/03/23 01:33:50 jim Exp $"
"\n$Id: Query.c,v 1.5 1997/11/13 20:38:37 jim Exp $"
;
void
initQuery()
{
PyObject *m, *d, *regex, *string;
char *rev="$Revision: 1.4 $";
char *rev="$Revision: 1.5 $";
AttrTesttype.ob_type =&PyType_Type;
CompAttrTesttype.ob_type =&PyType_Type;
......@@ -931,11 +931,20 @@ initQuery()
PyDict_SetItemString(d, "__version__",
PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));
#include "dcprotect.h"
/* Check for errors */
if (PyErr_Occurred())
Py_FatalError("can't initialize module BTree");
}
/****************************************************************************
$Log: Query.c,v $
Revision 1.5 1997/11/13 20:38:37 jim
added dcprotect
Revision 1.4 1997/03/23 01:33:50 jim
Added repr operations.
......
*shared*
intSet intSet.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence
Splitter Splitter.c
intSet intSet.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence -I/projects/_/python
Splitter Splitter.c -I/projects/_/python
#Trie Trie.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence
BTree BTree.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence
IIBTree IIBTree.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence
IOBTree IOBTree.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence
OIBTree OIBTree.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence
BTree BTree.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence -I/projects/_/python
IIBTree IIBTree.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence -I/projects/_/python
IOBTree IOBTree.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence -I/projects/_/python
OIBTree OIBTree.c -I/projects/_/ExtensionClass -I/projects/_/cPersistence -I/projects/_/python
Query Query.c
Query Query.c -I/projects/_/python
# install Index.py
# install TextIndex.py
......@@ -388,14 +388,14 @@ static char Splitter_module_documentation[] =
"\n"
"for use in an inverted index\n"
"\n"
"$Id: Splitter.c,v 1.2 1997/11/03 19:00:44 jim Exp $\n"
"$Id: Splitter.c,v 1.3 1997/11/13 20:38:38 jim Exp $\n"
;
void
initSplitter()
{
PyObject *m, *d;
char *rev="$Revision: 1.2 $";
char *rev="$Revision: 1.3 $";
/* Create the module and add the functions */
m = Py_InitModule4("Splitter", Splitter_module_methods,
......@@ -406,6 +406,8 @@ initSplitter()
d = PyModule_GetDict(m);
PyDict_SetItemString(d, "__version__",
PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));
#include "dcprotect.h"
if (PyErr_Occurred()) Py_FatalError("can't initialize module Splitter");
}
......@@ -10,7 +10,7 @@
static char intSet_module_documentation[] =
""
"\n$Id: intSet.c,v 1.3 1997/10/01 02:45:58 jim Exp $"
"\n$Id: intSet.c,v 1.4 1997/11/13 20:38:39 jim Exp $"
;
#include <limits.h>
......@@ -527,7 +527,7 @@ void
initintSet()
{
PyObject *m, *d;
char *rev="$Revision: 1.3 $";
char *rev="$Revision: 1.4 $";
UNLESS(ExtensionClassImported) return;
......@@ -555,6 +555,8 @@ initintSet()
PyDict_SetItemString(d, "__version__",
PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));
#include "dcprotect.h"
/* Check for errors */
if (PyErr_Occurred())
......@@ -566,6 +568,9 @@ initintSet()
Revision Log:
$Log: intSet.c,v $
Revision 1.4 1997/11/13 20:38:39 jim
added dcprotect
Revision 1.3 1997/10/01 02:45:58 jim
Minor reformat.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment