Commit 6293667d authored by Andreas Jung's avatar Andreas Jung

added new 'encoding' parameter to change default encoding

parent e54094a2
...@@ -278,15 +278,19 @@ PyUnicodeObject *prepareString(PyUnicodeObject *o) ...@@ -278,15 +278,19 @@ PyUnicodeObject *prepareString(PyUnicodeObject *o)
return u; return u;
} }
static char *splitter_args[]={"encoding",NULL};
static PyObject * static PyObject *
get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds) get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
{ {
Splitter *self; Splitter *self=NULL;
PyObject *doc, *unicodedoc,*synstop=NULL; PyObject *doc=NULL, *unicodedoc=NULL,*synstop=NULL;
char *encoding = "latin1";
if (! (self = PyObject_NEW(Splitter, &SplitterType))) return NULL; if (! (self = PyObject_NEW(Splitter, &SplitterType))) return NULL;
if (! (PyArg_ParseTuple(args,"O|O",&doc,&synstop))) return NULL; if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|Os",splitter_args,&doc,&synstop,&encoding))) return NULL;
#ifdef DEBUG #ifdef DEBUG
puts("got text"); puts("got text");
...@@ -294,11 +298,10 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds) ...@@ -294,11 +298,10 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
fflush(stdout); fflush(stdout);
#endif #endif
if (PyString_Check(doc)) { if (PyString_Check(doc)) {
// This sux a bit. The default encoding should be ascii or latin1.
// But there must be better support to pass an optional encoding parameter
unicodedoc = PyUnicode_FromEncodedObject(doc,"latin1","strict"); unicodedoc = PyUnicode_FromEncodedObject(doc,encoding,"strict");
if (! unicodedoc) goto err; if (! unicodedoc) goto err;
...@@ -325,8 +328,8 @@ err: ...@@ -325,8 +328,8 @@ err:
static struct PyMethodDef Splitter_module_methods[] = static struct PyMethodDef Splitter_module_methods[] =
{ {
{ "UnicodeSplitter", (PyCFunction)get_Splitter, METH_VARARGS, { "UnicodeSplitter", (PyCFunction)get_Splitter, METH_VARARGS|METH_KEYWORDS,
"UnicodeSplitter(doc[,synstop]) -- Return a word splitter" "UnicodeSplitter(doc[,synstop][,encoding='latin1']) -- Return a word splitter"
}, },
{ NULL, NULL } { NULL, NULL }
}; };
...@@ -336,7 +339,7 @@ static char Splitter_module_documentation[] = ...@@ -336,7 +339,7 @@ static char Splitter_module_documentation[] =
"\n" "\n"
"for use in an inverted index\n" "for use in an inverted index\n"
"\n" "\n"
"$Id: UnicodeSplitter.c,v 1.3 2001/10/17 14:49:23 andreasjung Exp $\n" "$Id: UnicodeSplitter.c,v 1.4 2001/10/17 15:29:50 andreasjung Exp $\n"
; ;
...@@ -344,7 +347,7 @@ void ...@@ -344,7 +347,7 @@ void
initUnicodeSplitter(void) initUnicodeSplitter(void)
{ {
PyObject *m, *d; PyObject *m, *d;
char *rev="$Revision: 1.3 $"; char *rev="$Revision: 1.4 $";
/* Create the module and add the functions */ /* Create the module and add the functions */
m = Py_InitModule4("UnicodeSplitter", Splitter_module_methods, m = Py_InitModule4("UnicodeSplitter", Splitter_module_methods,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment