Commit 22c494a0 authored by Jeremy Hylton's avatar Jeremy Hylton

A few small cleanups.

Simplify initZopeSplitter() and remove unnecessary PyErr_Occurred().

Use string macros for objects that are guaranteed to be strings.

Remove unnecessary \ at end of line.

In innermost loop of splitter function, replace ASSIGN() macro with
Py_DECREF() and simple assignment.  The macro was doing more work
than necessary because it called XDECREF on an object that was
guaranteed not to be NULL.

Use less horizontal whitespace in next_word().
parent 328d8bdd
...@@ -101,10 +101,11 @@ check_synstop(Splitter *self, PyObject *word) ...@@ -101,10 +101,11 @@ check_synstop(Splitter *self, PyObject *word)
char *cword; char *cword;
int len; int len;
cword = PyString_AsString(word); cword = PyString_AS_STRING(word);
len = PyString_Size(word); len = PyString_GET_SIZE(word);
if(len < 2 && ! self->allow_single_chars) /* Single-letter words are stop words! */ if (len < 2 && !self->allow_single_chars)
/* Single-letter words are stop words! */
{ {
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
...@@ -114,7 +115,6 @@ check_synstop(Splitter *self, PyObject *word) ...@@ -114,7 +115,6 @@ check_synstop(Splitter *self, PyObject *word)
Test whether a word has any letters. * Test whether a word has any letters. *
*/ */
for (; --len >= 0 && ! isalpha((unsigned char)cword[len]); ) for (; --len >= 0 && ! isalpha((unsigned char)cword[len]); )
; ;
if (len < 0 && ! self->index_numbers) { if (len < 0 && ! self->index_numbers) {
Py_INCREF(Py_None); Py_INCREF(Py_None);
...@@ -130,11 +130,13 @@ check_synstop(Splitter *self, PyObject *word) ...@@ -130,11 +130,13 @@ check_synstop(Splitter *self, PyObject *word)
if (self->synstop == NULL) if (self->synstop == NULL)
return word; return word;
len = 0;
while ((value = PyObject_GetItem(self->synstop, word)) && while ((value = PyObject_GetItem(self->synstop, word)) &&
PyString_Check(value)) { PyString_Check(value)) {
ASSIGN(word,value); Py_DECREF(word);
word = value;
if(len++ > 100) if (len++ > 100)
break; /* Avoid infinite recurssion */ break; /* Avoid infinite recurssion */
} }
...@@ -177,64 +179,52 @@ next_word(Splitter *self, char **startpos, char **endpos) ...@@ -177,64 +179,52 @@ next_word(Splitter *self, char **startpos, char **endpos)
c = (unsigned char) *here; c = (unsigned char) *here;
/* Check to see if this character is part of a word */ /* Check to see if this character is part of a word */
if (isalnum((unsigned char)c) || c == '/' || c == '_') {
/* Found a word character */
if(isalnum((unsigned char)c) || c=='/' || c=='_') { /* Found a word character */ if (startpos && i == 0)
*startpos = here;
if(startpos && i==0) if (i++ < self->max_len)
*startpos=here;
if(i++ < self->max_len)
*b++ = c; *b++ = c;
} else if (i != 0) { /* We've found the end of a word */ } else if (i != 0) { /* We've found the end of a word */
if (i >= self->max_len)
if(i >= self->max_len) i =self->max_len; /* "stem" the long word */
i=self->max_len; /* "stem" the long word */
UNLESS(pyword = PyString_FromStringAndSize(wbuf, i)) { UNLESS(pyword = PyString_FromStringAndSize(wbuf, i)) {
self->here=here; self->here = here;
return NULL; return NULL;
} }
UNLESS(res = check_synstop(self, pyword)) { UNLESS(res = check_synstop(self, pyword)) {
self->here=here; self->here = here;
Py_DECREF(pyword); Py_DECREF(pyword);
return NULL; return NULL;
} }
if (res != Py_None) { if (res != Py_None) {
if(endpos) if (endpos)
*endpos=here; *endpos = here;
self->here = here;
self->here=here;
Py_DECREF(pyword); Py_DECREF(pyword);
self->index++; self->index++;
return res; return res;
} }
/* The word is a stopword, so ignore it */ /* The word is a stopword, so ignore it */
Py_DECREF(res); Py_DECREF(res);
Py_DECREF(pyword); Py_DECREF(pyword);
i = 0; i = 0;
b = wbuf;
b=wbuf;
} }
here++; here++;
} }
self->here=here; self->here=here;
/* We've reached the end of the string */ /* We've reached the end of the string */
if(i >= self->max_len) if (i >= self->max_len)
i=self->max_len; /* "stem" the long word */ i = self->max_len; /* "stem" the long word */
if (i == 0) { if (i == 0) {
/* No words */ /* No words */
...@@ -249,10 +239,9 @@ next_word(Splitter *self, char **startpos, char **endpos) ...@@ -249,10 +239,9 @@ next_word(Splitter *self, char **startpos, char **endpos)
*endpos=here; *endpos=here;
res = check_synstop(self, pyword); res = check_synstop(self, pyword);
Py_DECREF(pyword); Py_DECREF(pyword);
if(PyString_Check(res)) if (PyString_Check(res))
self->index++; self->index++;
return res; return res;
...@@ -295,7 +284,7 @@ Splitter_split(Splitter*self) ...@@ -295,7 +284,7 @@ Splitter_split(Splitter*self)
while (1) { while (1) {
Py_XDECREF(word); Py_XDECREF(word);
UNLESS(word = next_word(self,NULL,NULL)) return NULL; UNLESS(word = next_word(self, NULL, NULL)) return NULL;
if (word == Py_None) { if (word == Py_None) {
return list; return list;
...@@ -453,7 +442,7 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds) ...@@ -453,7 +442,7 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds)
int max_len= 64; int max_len= 64;
int casefolding = 1; int casefolding = 1;
UNLESS(PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiiii",splitter_args, \ UNLESS(PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiiii",splitter_args,
&doc, &doc,
&synstop, &synstop,
&encoding, &encoding,
...@@ -495,9 +484,9 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds) ...@@ -495,9 +484,9 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds)
UNLESS(self->text = PyObject_Str(doc)) goto err; UNLESS(self->text = PyObject_Str(doc)) goto err;
UNLESS(self->here=PyString_AsString(self->text)) goto err; UNLESS(self->here = PyString_AS_STRING(self->text)) goto err;
self->end = self->here + PyString_Size(self->text); self->end = self->here + PyString_GET_SIZE(self->text);
self->index = -1; self->index = -1;
self->allow_single_chars = single_char; self->allow_single_chars = single_char;
...@@ -527,23 +516,14 @@ static char Splitter_module_documentation[] = ...@@ -527,23 +516,14 @@ static char Splitter_module_documentation[] =
"\n" "\n"
"for use in an inverted index\n" "for use in an inverted index\n"
"\n" "\n"
"$Id: ZopeSplitter.c,v 1.8 2002/03/21 15:48:54 htrd Exp $\n" "$Id: ZopeSplitter.c,v 1.9 2002/04/30 04:15:12 jeremy Exp $\n"
; ;
void void
initZopeSplitter(void) initZopeSplitter(void)
{ {
PyObject *m, *d;
/* Create the module and add the functions */ /* Create the module and add the functions */
m = Py_InitModule4("ZopeSplitter", Splitter_module_methods, Py_InitModule4("ZopeSplitter", Splitter_module_methods,
Splitter_module_documentation, Splitter_module_documentation, NULL, PYTHON_API_VERSION);
(PyObject*)NULL,PYTHON_API_VERSION);
/* Add some symbolic constants to the module */
d = PyModule_GetDict(m);
if (PyErr_Occurred())
Py_FatalError("can't initialize module Splitter");
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment