Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
a989ae56
Commit
a989ae56
authored
Jan 21, 2002
by
Andreas Jung
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
introducing new 'casefolding' parameter
parent
97f1e13c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
66 additions
and
26 deletions
+66
-26
lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
...ex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
+21
-9
lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
.../TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
+20
-10
lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
...ndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
+25
-7
No files found.
lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
View file @
a989ae56
...
...
@@ -35,6 +35,7 @@ typedef struct
int
allow_single_chars
;
int
index_numbers
;
int
max_len
;
int
casefolding
;
}
Splitter
;
...
...
@@ -251,7 +252,10 @@ next_word(Splitter *self, char **startpos, char **endpos)
continue
;
}
c
=
mytolower
(
*
here
);
if
(
self
->
casefolding
)
c
=
mytolower
(
*
here
);
else
c
=
(
*
here
);
/* Check to see if this character is part of a word */
...
...
@@ -490,7 +494,7 @@ static PyTypeObject SplitterType = {
SplitterType__doc__
/* Documentation string */
};
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
"singlechar"
,
"indexnumbers"
,
"maxlen"
,
NULL
};
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
"singlechar"
,
"indexnumbers"
,
"maxlen"
,
"casefolding"
,
NULL
};
static
PyObject
*
get_Splitter
(
PyObject
*
modinfo
,
PyObject
*
args
,
PyObject
*
keywds
)
...
...
@@ -501,8 +505,9 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
int
single_char
=
0
;
int
index_numbers
=
0
;
int
max_len
=
64
;
int
casefolding
=
1
;
UNLESS
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Osiii
"
,
splitter_args
,
&
doc
,
&
synstop
,
&
encoding
,
&
single_char
,
&
index_numbers
,
&
max_len
))
return
NULL
;
UNLESS
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Osiii
i"
,
splitter_args
,
&
doc
,
&
synstop
,
&
encoding
,
&
single_char
,
&
index_numbers
,
&
max_len
,
&
casefolding
))
return
NULL
;
if
(
index_numbers
<
0
||
index_numbers
>
1
)
{
...
...
@@ -510,6 +515,11 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
return
NULL
;
}
if
(
casefolding
<
0
||
casefolding
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"casefolding must be 0 or 1"
);
return
NULL
;
}
if
(
single_char
<
0
||
single_char
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"singlechar must be 0 or 1"
);
return
NULL
;
...
...
@@ -521,7 +531,6 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
}
UNLESS
(
self
=
PyObject_NEW
(
Splitter
,
&
SplitterType
))
return
NULL
;
if
(
synstop
)
{
...
...
@@ -539,6 +548,7 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
self
->
allow_single_chars
=
single_char
;
self
->
index_numbers
=
index_numbers
;
self
->
max_len
=
max_len
;
self
->
casefolding
=
casefolding
;
self
->
index
=
-
1
;
...
...
@@ -553,7 +563,7 @@ err:
static
struct
PyMethodDef
Splitter_module_methods
[]
=
{
{
"ISO_8859_1_Splitter"
,
(
PyCFunction
)
get_Splitter
,
METH_VARARGS
|
METH_KEYWORDS
,
"ISO_8859_1_Splitter(doc[,synstop][,encoding][,singlechar][,indexnumbers][,maxlen]) -- Return a word splitter"
"ISO_8859_1_Splitter(doc[,synstop][,encoding][,singlechar][,indexnumbers][,maxlen]
[,casefolding]
) -- Return a word splitter"
},
{
NULL
,
NULL
}
...
...
@@ -564,7 +574,7 @@ static char Splitter_module_documentation[] =
"
\n
"
"for use in an inverted index
\n
"
"
\n
"
"$Id: ISO_8859_1_Splitter.c,v 1.
6 2002/01/09 15:17:34
andreasjung Exp $
\n
"
"$Id: ISO_8859_1_Splitter.c,v 1.
7 2002/01/21 19:28:55
andreasjung Exp $
\n
"
;
...
...
@@ -573,20 +583,22 @@ void
initISO_8859_1_Splitter
(
void
)
{
PyObject
*
m
,
*
d
;
char
*
rev
=
"$Revision: 1.
6
$"
;
char
*
rev
=
"$Revision: 1.
7
$"
;
/* Create the module and add the functions */
initSplitterTrtabs
();
if
(
PyErr_Occurred
())
Py_FatalError
(
"can't initialize module Splitter 1"
);
m
=
Py_InitModule4
(
"ISO_8859_1_Splitter"
,
Splitter_module_methods
,
Splitter_module_documentation
,
(
PyObject
*
)
NULL
,
PYTHON_API_VERSION
);
if
(
PyErr_Occurred
())
Py_FatalError
(
"can't initialize module Splitter 2"
);
/* Add some symbolic constants to the module */
d
=
PyModule_GetDict
(
m
);
if
(
PyErr_Occurred
())
Py_FatalError
(
"can't initialize module Splitter 3"
);
PyDict_SetItemString
(
d
,
"__version__"
,
PyString_FromStringAndSize
(
rev
+
11
,
strlen
(
rev
+
11
)
-
2
));
if
(
PyErr_Occurred
())
Py_FatalError
(
"can't initialize module Splitter 4"
);
if
(
PyErr_Occurred
())
Py_FatalError
(
"can't initialize module Splitter"
);
}
lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
View file @
a989ae56
...
...
@@ -25,11 +25,12 @@ typedef struct
int
max_len
;
int
allow_single_chars
;
int
index_numbers
;
int
casefolding
;
}
Splitter
;
static
PyUnicodeObject
*
prepareString
(
PyUnicodeObject
*
o
);
PyUnicodeObject
*
prepareString
(
Splitter
*
self
,
PyUnicodeObject
*
o
);
static
PyObject
*
checkSynword
(
Splitter
*
self
,
PyObject
*
word
)
{
...
...
@@ -201,7 +202,7 @@ static int splitUnicodeString(Splitter *self,PyUnicodeObject *doc)
int
i
=
0
;
int
start
=
0
;
doc1
=
prepareString
(
doc
);
doc1
=
prepareString
(
self
,
doc
);
if
(
doc1
==
NULL
)
return
-
1
;
...
...
@@ -297,18 +298,20 @@ void fixlower(PyUnicodeObject *self)
static
PyUnicodeObject
*
prepareString
(
PyUnicodeObject
*
o
)
PyUnicodeObject
*
prepareString
(
Splitter
*
self
,
PyUnicodeObject
*
o
)
{
PyUnicodeObject
*
u
;
u
=
(
PyUnicodeObject
*
)
PyUnicode_FromUnicode
(
o
->
str
,
o
->
length
);
if
(
u
!=
NULL
)
fixlower
(
u
);
if
(
u
!=
NULL
){
if
(
self
->
casefolding
)
fixlower
(
u
);
}
return
u
;
}
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
"indexnumbers"
,
"singlechar"
,
"maxlen"
,
NULL
};
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
"indexnumbers"
,
"singlechar"
,
"maxlen"
,
"casefolding"
,
NULL
};
static
PyObject
*
...
...
@@ -320,8 +323,9 @@ newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
int
index_numbers
=
0
;
int
max_len
=
64
;
int
single_char
=
0
;
int
casefolding
=
1
;
if
(
!
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Osiii
"
,
splitter_args
,
&
doc
,
&
synstop
,
&
encoding
,
&
index_numbers
,
&
single_char
,
&
max_len
)))
return
NULL
;
if
(
!
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Osiii
i"
,
splitter_args
,
&
doc
,
&
synstop
,
&
encoding
,
&
index_numbers
,
&
single_char
,
&
max_len
,
&
casefolding
)))
return
NULL
;
#ifdef DEBUG
puts
(
"got text"
);
...
...
@@ -334,6 +338,11 @@ newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
return
NULL
;
}
if
(
casefolding
<
0
||
casefolding
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"casefolding must be 0 or 1"
);
return
NULL
;
}
if
(
single_char
<
0
||
single_char
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"singlechar must be 0 or 1"
);
return
NULL
;
...
...
@@ -371,6 +380,7 @@ newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
self
->
index_numbers
=
index_numbers
;
self
->
max_len
=
max_len
;
self
->
allow_single_chars
=
single_char
;
self
->
casefolding
=
casefolding
;
if
((
splitUnicodeString
(
self
,(
PyUnicodeObject
*
)
unicodedoc
))
<
0
)
goto
err
;
...
...
@@ -389,7 +399,7 @@ static struct PyMethodDef Splitter_module_methods[] =
{
{
"UnicodeSplitter"
,
(
PyCFunction
)
newSplitter
,
METH_VARARGS
|
METH_KEYWORDS
,
"UnicodeSplitter(doc[,synstop][,encoding='latin1']) "
"UnicodeSplitter(doc[,synstop][,encoding='latin1']
[,indexnumbers][,maxlen][,singlechar][,casefolding]
) "
"-- Return a word splitter"
},
{
NULL
,
NULL
}
...
...
@@ -400,7 +410,7 @@ static char Splitter_module_documentation[] =
"
\n
"
"for use in an inverted index
\n
"
"
\n
"
"$Id: UnicodeSplitter.c,v 1.1
3 2002/01/09 15:17:34
andreasjung Exp $
\n
"
"$Id: UnicodeSplitter.c,v 1.1
4 2002/01/21 19:28:55
andreasjung Exp $
\n
"
;
...
...
@@ -408,7 +418,7 @@ void
initUnicodeSplitter
(
void
)
{
PyObject
*
m
,
*
d
;
char
*
rev
=
"$Revision: 1.1
3
$"
;
char
*
rev
=
"$Revision: 1.1
4
$"
;
/* Create the module and add the functions */
m
=
Py_InitModule4
(
"UnicodeSplitter"
,
Splitter_module_methods
,
...
...
lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
View file @
a989ae56
...
...
@@ -28,6 +28,7 @@ typedef struct
int
allow_single_chars
;
int
index_numbers
;
int
max_len
;
int
casefolding
;
}
Splitter
;
...
...
@@ -170,7 +171,10 @@ next_word(Splitter *self, char **startpos, char **endpos)
continue
;
}
c
=
tolower
((
unsigned
char
)
*
here
);
if
(
self
->
casefolding
)
c
=
tolower
((
unsigned
char
)
*
here
);
else
c
=
(
unsigned
char
)
*
here
;
/* Check to see if this character is part of a word */
...
...
@@ -435,7 +439,7 @@ static PyTypeObject SplitterType = {
SplitterType__doc__
/* Documentation string */
};
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
"singlechar"
,
"indexnumbers"
,
"maxlen"
,
NULL
};
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
"singlechar"
,
"indexnumbers"
,
"maxlen"
,
"casefolding"
,
NULL
};
static
PyObject
*
...
...
@@ -447,9 +451,17 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds)
int
single_char
=
0
;
int
index_numbers
=
0
;
int
max_len
=
64
;
int
casefolding
=
1
;
UNLESS
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Osiii"
,
splitter_args
,
\
&
doc
,
&
synstop
,
&
encoding
,
&
single_char
,
&
index_numbers
,
&
max_len
))
return
NULL
;
UNLESS
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Osiiii"
,
splitter_args
,
\
&
doc
,
&
synstop
,
&
encoding
,
&
single_char
,
&
index_numbers
,
&
max_len
,
&
casefolding
))
return
NULL
;
if
(
index_numbers
<
0
||
index_numbers
>
1
)
{
...
...
@@ -457,6 +469,11 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds)
return
NULL
;
}
if
(
casefolding
<
0
||
casefolding
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"casefolding must be 0 or 1"
);
return
NULL
;
}
if
(
single_char
<
0
||
single_char
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"singlechar must be 0 or 1"
);
return
NULL
;
...
...
@@ -486,6 +503,7 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds)
self
->
allow_single_chars
=
single_char
;
self
->
index_numbers
=
index_numbers
;
self
->
max_len
=
max_len
;
self
->
casefolding
=
casefolding
;
return
(
PyObject
*
)
self
;
...
...
@@ -498,7 +516,7 @@ err:
static
struct
PyMethodDef
Splitter_module_methods
[]
=
{
{
"ZopeSplitter"
,
(
PyCFunction
)
get_Splitter
,
METH_VARARGS
|
METH_KEYWORDS
,
"ZopeSplitter(doc[,synstop][,encoding][,singlechar][,indexnumbers][,maxlen]) -- Return a word splitter"
"ZopeSplitter(doc[,synstop][,encoding][,singlechar][,indexnumbers][,maxlen]
[,casefolding]
) -- Return a word splitter"
},
{
NULL
,
NULL
}
...
...
@@ -509,7 +527,7 @@ static char Splitter_module_documentation[] =
"
\n
"
"for use in an inverted index
\n
"
"
\n
"
"$Id: ZopeSplitter.c,v 1.
6 2002/01/09 15:17:34
andreasjung Exp $
\n
"
"$Id: ZopeSplitter.c,v 1.
7 2002/01/21 19:28:55
andreasjung Exp $
\n
"
;
...
...
@@ -517,7 +535,7 @@ void
initZopeSplitter
(
void
)
{
PyObject
*
m
,
*
d
;
char
*
rev
=
"$Revision: 1.
6
$"
;
char
*
rev
=
"$Revision: 1.
7
$"
;
/* Create the module and add the functions */
m
=
Py_InitModule4
(
"ZopeSplitter"
,
Splitter_module_methods
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment