Commit dca49499 authored by Andrew McNamara's avatar Andrew McNamara

Improved the implementation of the internal "dialect" type. The new

implementation features better error reporting, and better compliance
with the PEP.
parent a6fe31ae
......@@ -68,7 +68,7 @@ class Dialect:
elif not isinstance(self.lineterminator, str):
errors.append("lineterminator must be a string")
if self.doublequote not in (True, False):
if self.doublequote not in (True, False) and self.quoting != QUOTE_NONE:
errors.append("doublequote parameter must be True or False")
if self.skipinitialspace not in (True, False):
......
......@@ -17,44 +17,112 @@ class Test_Csv(unittest.TestCase):
from the high level interface. Further tests of this nature are done
in TestDialectRegistry.
"""
def test_reader_arg_valid(self):
self.assertRaises(TypeError, csv.reader)
self.assertRaises(TypeError, csv.reader, None)
self.assertRaises(AttributeError, csv.reader, [], bad_attr = 0)
self.assertRaises(csv.Error, csv.reader, [], 'foo')
def _test_arg_valid(self, ctor, arg):
self.assertRaises(TypeError, ctor)
self.assertRaises(TypeError, ctor, None)
self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
self.assertRaises(TypeError, ctor, arg, delimiter = 0)
self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
self.assertRaises(csv.Error, ctor, arg, 'foo')
self.assertRaises(TypeError, ctor, arg, None)
self.assertRaises(TypeError, ctor, arg, delimiter=None)
self.assertRaises(TypeError, ctor, arg, delimiter=1)
self.assertRaises(TypeError, ctor, arg, quotechar=1)
self.assertRaises(TypeError, ctor, arg, lineterminator=None)
self.assertRaises(TypeError, ctor, arg, lineterminator=1)
self.assertRaises(TypeError, ctor, arg, quoting=None)
# We now allow this, only raising an exception if quoting is needed.
# self.assertRaises(TypeError, ctor, arg, quotechar=None)
# self.assertRaises(TypeError, ctor, arg,
# quoting=csv.QUOTE_NONE, escapechar=None)
# No longer complains about dialects with invalid attributes [AM]
# class BadDialect:
# bad_attr = 0
# self.assertRaises(AttributeError, csv.reader, [], BadDialect)
class BadClass:
def __init__(self):
raise IOError
self.assertRaises(IOError, csv.reader, [], BadClass)
self.assertRaises(TypeError, csv.reader, [], None)
class BadDialect:
bad_attr = 0
self.assertRaises(AttributeError, csv.reader, [], BadDialect)
def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
def test_writer_arg_valid(self):
self.assertRaises(TypeError, csv.writer)
self.assertRaises(TypeError, csv.writer, None)
self.assertRaises(AttributeError, csv.writer, StringIO(), bad_attr = 0)
self._test_arg_valid(csv.writer, StringIO())
def _test_attrs(self, obj):
def _test_default_attrs(self, ctor, *args):
obj = ctor(*args)
# Check defaults
self.assertEqual(obj.dialect.delimiter, ',')
obj.dialect.delimiter = '\t'
self.assertEqual(obj.dialect.delimiter, '\t')
self.assertRaises(TypeError, delattr, obj.dialect, 'delimiter')
self.assertRaises(TypeError, setattr, obj.dialect,
'lineterminator', None)
obj.dialect.escapechar = None
self.assertEqual(obj.dialect.doublequote, True)
self.assertEqual(obj.dialect.escapechar, None)
self.assertEqual(obj.dialect.lineterminator, "\r\n")
self.assertEqual(obj.dialect.quotechar, '"')
self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL)
self.assertEqual(obj.dialect.skipinitialspace, False)
self.assertEqual(obj.dialect.strict, False)
# Try deleting or changing attributes (they are read-only)
self.assertRaises(TypeError, delattr, obj.dialect, 'delimiter')
self.assertRaises(TypeError, setattr, obj.dialect, 'delimiter', ':')
self.assertRaises(TypeError, delattr, obj.dialect, 'quoting')
self.assertRaises(TypeError, setattr, obj.dialect, 'quoting', None)
obj.dialect.quoting = csv.QUOTE_MINIMAL
self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL)
def test_reader_attrs(self):
self._test_attrs(csv.reader([]))
self._test_default_attrs(csv.reader, [])
def test_writer_attrs(self):
self._test_attrs(csv.writer(StringIO()))
self._test_default_attrs(csv.writer, StringIO())
def _test_kw_attrs(self, ctor, *args):
# Now try with alternate options
kwargs = dict(delimiter=':', doublequote=False, escapechar='\\',
lineterminator='\r', quotechar='*',
quoting=csv.QUOTE_NONE, skipinitialspace=True,
strict=True)
obj = ctor(*args, **kwargs)
self.assertEqual(obj.dialect.delimiter, ':')
self.assertEqual(obj.dialect.doublequote, False)
self.assertEqual(obj.dialect.escapechar, '\\')
self.assertEqual(obj.dialect.lineterminator, "\r")
self.assertEqual(obj.dialect.quotechar, '*')
self.assertEqual(obj.dialect.quoting, csv.QUOTE_NONE)
self.assertEqual(obj.dialect.skipinitialspace, True)
self.assertEqual(obj.dialect.strict, True)
def test_reader_kw_attrs(self):
self._test_kw_attrs(csv.reader, [])
def test_writer_kw_attrs(self):
self._test_kw_attrs(csv.writer, StringIO())
def _test_dialect_attrs(self, ctor, *args):
# Now try with dialect-derived options
class dialect:
delimiter='-'
doublequote=False
escapechar='^'
lineterminator='$'
quotechar='#'
quoting=csv.QUOTE_ALL
skipinitialspace=True
strict=False
args = args + (dialect,)
obj = ctor(*args)
self.assertEqual(obj.dialect.delimiter, '-')
self.assertEqual(obj.dialect.doublequote, False)
self.assertEqual(obj.dialect.escapechar, '^')
self.assertEqual(obj.dialect.lineterminator, "$")
self.assertEqual(obj.dialect.quotechar, '#')
self.assertEqual(obj.dialect.quoting, csv.QUOTE_ALL)
self.assertEqual(obj.dialect.skipinitialspace, True)
self.assertEqual(obj.dialect.strict, False)
def test_reader_dialect_attrs(self):
self._test_dialect_attrs(csv.reader, [])
def test_writer_dialect_attrs(self):
self._test_dialect_attrs(csv.writer, StringIO())
def _write_test(self, fields, expect, **kwargs):
fd, name = tempfile.mkstemp()
......@@ -166,6 +234,13 @@ class Test_Csv(unittest.TestCase):
self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\')
self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\')
def test_read_quoting(self):
self._read_test(['1,",3,",5'], [['1', ',3,', '5']])
self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
quotechar=None, escapechar='\\')
self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
quoting=csv.QUOTE_NONE, escapechar='\\')
def test_read_bigfield(self):
# This exercises the buffer realloc functionality
bigstring = 'X' * 50000
......@@ -297,7 +372,7 @@ class TestDialectRegistry(unittest.TestCase):
def test_bad_dialect(self):
# Unknown parameter
self.assertRaises(AttributeError, csv.reader, [], bad_attr = 0)
self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
# Bad values
self.assertRaises(TypeError, csv.reader, [], delimiter = None)
self.assertRaises(TypeError, csv.reader, [], quoting = -1)
......
......@@ -73,7 +73,7 @@ typedef struct {
char escapechar; /* escape character */
int skipinitialspace; /* ignore spaces following delimiter? */
PyObject *lineterminator; /* string to write between records */
QuoteStyle quoting; /* style of quoting to write */
int quoting; /* style of quoting to write */
int strict; /* raise exception on bad CSV */
} DialectObj;
......@@ -130,17 +130,6 @@ get_dialect_from_registry(PyObject * name_obj)
return dialect_obj;
}
static int
check_delattr(PyObject *v)
{
if (v == NULL) {
PyErr_SetString(PyExc_TypeError,
"Cannot delete attribute");
return -1;
}
return 0;
}
static PyObject *
get_string(PyObject *str)
{
......@@ -148,25 +137,6 @@ get_string(PyObject *str)
return str;
}
static int
set_string(PyObject **str, PyObject *v)
{
if (check_delattr(v) < 0)
return -1;
if (!PyString_Check(v)
#ifdef Py_USING_UNICODE
&& !PyUnicode_Check(v)
#endif
) {
PyErr_BadArgument();
return -1;
}
Py_XDECREF(*str);
Py_INCREF(v);
*str = v;
return 0;
}
static PyObject *
get_nullchar_as_None(char c)
{
......@@ -178,48 +148,22 @@ get_nullchar_as_None(char c)
return PyString_FromStringAndSize((char*)&c, 1);
}
static int
set_None_as_nullchar(char * addr, PyObject *v)
{
if (check_delattr(v) < 0)
return -1;
if (v == Py_None)
*addr = '\0';
else if (!PyString_Check(v) || PyString_Size(v) != 1) {
PyErr_BadArgument();
return -1;
}
else {
char *s = PyString_AsString(v);
if (s == NULL)
return -1;
*addr = s[0];
}
return 0;
}
static PyObject *
Dialect_get_lineterminator(DialectObj *self)
{
return get_string(self->lineterminator);
}
static int
Dialect_set_lineterminator(DialectObj *self, PyObject *value)
{
return set_string(&self->lineterminator, value);
}
static PyObject *
Dialect_get_escapechar(DialectObj *self)
{
return get_nullchar_as_None(self->escapechar);
}
static int
Dialect_set_escapechar(DialectObj *self, PyObject *value)
static PyObject *
Dialect_get_quotechar(DialectObj *self)
{
return set_None_as_nullchar(&self->escapechar, value);
return get_nullchar_as_None(self->quotechar);
}
static PyObject *
......@@ -229,51 +173,109 @@ Dialect_get_quoting(DialectObj *self)
}
static int
Dialect_set_quoting(DialectObj *self, PyObject *v)
_set_bool(const char *name, int *target, PyObject *src, int dflt)
{
if (src == NULL)
*target = dflt;
else
*target = PyObject_IsTrue(src);
return 0;
}
static int
_set_int(const char *name, int *target, PyObject *src, int dflt)
{
if (src == NULL)
*target = dflt;
else {
if (!PyInt_Check(src)) {
PyErr_Format(PyExc_TypeError,
"\"%s\" must be an integer", name);
return -1;
}
*target = PyInt_AsLong(src);
}
return 0;
}
static int
_set_char(const char *name, char *target, PyObject *src, char dflt)
{
if (src == NULL)
*target = dflt;
else {
if (src == Py_None)
*target = '\0';
else if (!PyString_Check(src) || PyString_Size(src) != 1) {
PyErr_Format(PyExc_TypeError,
"\"%s\" must be an 1-character string",
name);
return -1;
}
else {
char *s = PyString_AsString(src);
if (s == NULL)
return -1;
*target = s[0];
}
}
return 0;
}
static int
_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
{
if (src == NULL)
*target = PyString_FromString(dflt);
else {
if (src == Py_None)
*target = NULL;
else if (!PyString_Check(src)
#ifdef Py_USING_UNICODE
&& !PyUnicode_Check(src)
#endif
) {
PyErr_Format(PyExc_TypeError,
"\"%s\" must be an string", name);
return -1;
} else {
Py_XDECREF(*target);
Py_INCREF(src);
*target = src;
}
}
return 0;
}
static int
dialect_check_quoting(int quoting)
{
int quoting;
StyleDesc *qs = quote_styles;
if (check_delattr(v) < 0)
return -1;
if (!PyInt_Check(v)) {
PyErr_BadArgument();
return -1;
}
quoting = PyInt_AsLong(v);
for (qs = quote_styles; qs->name; qs++) {
if (qs->style == quoting) {
self->quoting = quoting;
if (qs->style == quoting)
return 0;
}
}
PyErr_BadArgument();
PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
return -1;
}
static struct PyMethodDef Dialect_methods[] = {
{ NULL, NULL }
};
#define D_OFF(x) offsetof(DialectObj, x)
static struct PyMemberDef Dialect_memberlist[] = {
{ "quotechar", T_CHAR, D_OFF(quotechar) },
{ "delimiter", T_CHAR, D_OFF(delimiter) },
{ "skipinitialspace", T_INT, D_OFF(skipinitialspace) },
{ "doublequote", T_INT, D_OFF(doublequote) },
{ "strict", T_INT, D_OFF(strict) },
{ "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
{ "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
{ "doublequote", T_INT, D_OFF(doublequote), READONLY },
{ "strict", T_INT, D_OFF(strict), READONLY },
{ NULL }
};
static PyGetSetDef Dialect_getsetlist[] = {
{ "escapechar", (getter)Dialect_get_escapechar,
(setter)Dialect_set_escapechar },
{ "lineterminator", (getter)Dialect_get_lineterminator,
(setter)Dialect_set_lineterminator },
{ "quoting", (getter)Dialect_get_quoting,
(setter)Dialect_set_quoting },
{NULL},
{ "escapechar", (getter)Dialect_get_escapechar},
{ "lineterminator", (getter)Dialect_get_lineterminator},
{ "quotechar", (getter)Dialect_get_quotechar},
{ "quoting", (getter)Dialect_get_quoting},
{NULL},
};
static void
......@@ -283,107 +285,155 @@ Dialect_dealloc(DialectObj *self)
self->ob_type->tp_free((PyObject *)self);
}
/*
* Return a new reference to a dialect instance
*
* If given a string, looks up the name in our dialect registry
* If given a class, instantiate (which runs python validity checks)
* If given an instance, return a new reference to the instance
*/
static PyObject *
dialect_instantiate(PyObject *dialect)
{
Py_INCREF(dialect);
/* If dialect is a string, look it up in our registry */
if (PyString_Check(dialect)
#ifdef Py_USING_UNICODE
|| PyUnicode_Check(dialect)
#endif
) {
PyObject * new_dia;
new_dia = get_dialect_from_registry(dialect);
Py_DECREF(dialect);
return new_dia;
}
/* A class rather than an instance? Instantiate */
if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
PyObject * new_dia;
new_dia = PyObject_CallFunction(dialect, "");
Py_DECREF(dialect);
return new_dia;
}
/* Make sure we finally have an instance */
if (!PyInstance_Check(dialect)) {
PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
Py_DECREF(dialect);
return NULL;
}
return dialect;
}
static char *dialect_kws[] = {
"dialect",
"delimiter",
"doublequote",
"escapechar",
"lineterminator",
"quotechar",
"quoting",
"skipinitialspace",
"strict",
NULL
};
static int
dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
{
PyObject *dialect = NULL, *name_obj, *value_obj;
self->quotechar = '"';
self->delimiter = ',';
self->escapechar = '\0';
self->skipinitialspace = 0;
Py_XDECREF(self->lineterminator);
self->lineterminator = PyString_FromString("\r\n");
if (self->lineterminator == NULL)
int ret = -1;
PyObject *dialect = NULL;
PyObject *delimiter = NULL;
PyObject *doublequote = NULL;
PyObject *escapechar = NULL;
PyObject *lineterminator = NULL;
PyObject *quotechar = NULL;
PyObject *quoting = NULL;
PyObject *skipinitialspace = NULL;
PyObject *strict = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"|OOOOOOOOO", dialect_kws,
&dialect,
&delimiter,
&doublequote,
&escapechar,
&lineterminator,
&quotechar,
&quoting,
&skipinitialspace,
&strict))
return -1;
self->quoting = QUOTE_MINIMAL;
self->doublequote = 1;
self->strict = 0;
if (!PyArg_UnpackTuple(args, "", 0, 1, &dialect))
return -1;
Py_XINCREF(dialect);
if (kwargs != NULL) {
PyObject * key = PyString_FromString("dialect");
PyObject * d;
d = PyDict_GetItem(kwargs, key);
if (d) {
Py_INCREF(d);
Py_XDECREF(dialect);
PyDict_DelItem(kwargs, key);
dialect = d;
}
Py_DECREF(key);
}
if (dialect != NULL) {
int i;
PyObject * dir_list;
Py_XINCREF(delimiter);
Py_XINCREF(doublequote);
Py_XINCREF(escapechar);
Py_XINCREF(lineterminator);
Py_XINCREF(quotechar);
Py_XINCREF(quoting);
Py_XINCREF(skipinitialspace);
Py_XINCREF(strict);
if (dialect != NULL) {
dialect = dialect_instantiate(dialect);
if (dialect == NULL)
goto err;
#define DIALECT_GETATTR(v, n) \
if (v == NULL) \
v = PyObject_GetAttrString(dialect, n)
DIALECT_GETATTR(delimiter, "delimiter");
DIALECT_GETATTR(doublequote, "doublequote");
DIALECT_GETATTR(escapechar, "escapechar");
DIALECT_GETATTR(lineterminator, "lineterminator");
DIALECT_GETATTR(quotechar, "quotechar");
DIALECT_GETATTR(quoting, "quoting");
DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
DIALECT_GETATTR(strict, "strict");
PyErr_Clear();
Py_DECREF(dialect);
}
/* If dialect is a string, look it up in our registry */
if (PyString_Check(dialect)
#ifdef Py_USING_UNICODE
|| PyUnicode_Check(dialect)
#endif
) {
PyObject * new_dia;
new_dia = get_dialect_from_registry(dialect);
Py_DECREF(dialect);
if (new_dia == NULL)
return -1;
dialect = new_dia;
}
/* A class rather than an instance? Instantiate */
if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
PyObject * new_dia;
new_dia = PyObject_CallFunction(dialect, "");
Py_DECREF(dialect);
if (new_dia == NULL)
return -1;
dialect = new_dia;
}
/* Make sure we finally have an instance */
if (!PyInstance_Check(dialect) ||
(dir_list = PyObject_Dir(dialect)) == NULL) {
PyErr_SetString(PyExc_TypeError,
"dialect must be an instance");
Py_DECREF(dialect);
return -1;
}
/* And extract the attributes */
for (i = 0; i < PyList_GET_SIZE(dir_list); ++i) {
char *s;
name_obj = PyList_GET_ITEM(dir_list, i);
s = PyString_AsString(name_obj);
if (s == NULL)
return -1;
if (s[0] == '_')
continue;
value_obj = PyObject_GetAttr(dialect, name_obj);
if (value_obj) {
if (PyObject_SetAttr((PyObject *)self,
name_obj, value_obj)) {
Py_DECREF(value_obj);
Py_DECREF(dir_list);
Py_DECREF(dialect);
return -1;
}
Py_DECREF(value_obj);
}
}
Py_DECREF(dir_list);
Py_DECREF(dialect);
}
if (kwargs != NULL) {
int pos = 0;
/* check types and convert to C values */
#define DIASET(meth, name, target, src, dflt) \
if (meth(name, target, src, dflt)) \
goto err
DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
DIASET(_set_bool, "strict", &self->strict, strict, 0);
/* validate options */
if (dialect_check_quoting(self->quoting))
goto err;
if (self->delimiter == 0) {
PyErr_SetString(PyExc_TypeError, "delimiter must be set");
goto err;
}
if (quotechar == Py_None && self->quoting != QUOTE_NONE)
self->quoting = QUOTE_NONE;
if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
PyErr_SetString(PyExc_TypeError,
"quotechar must be set if quoting enabled");
goto err;
}
if (self->lineterminator == 0) {
PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
goto err;
}
while (PyDict_Next(kwargs, &pos, &name_obj, &value_obj)) {
if (PyObject_SetAttr((PyObject *)self,
name_obj, value_obj))
return -1;
}
}
return 0;
ret = 0;
err:
Py_XDECREF(delimiter);
Py_XDECREF(doublequote);
Py_XDECREF(escapechar);
Py_XDECREF(lineterminator);
Py_XDECREF(quotechar);
Py_XDECREF(quoting);
Py_XDECREF(skipinitialspace);
Py_XDECREF(strict);
return ret;
}
static PyObject *
......@@ -433,7 +483,7 @@ static PyTypeObject Dialect_Type = {
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
Dialect_methods, /* tp_methods */
0, /* tp_methods */
Dialect_memberlist, /* tp_members */
Dialect_getsetlist, /* tp_getset */
0, /* tp_base */
......@@ -509,7 +559,8 @@ parse_process_char(ReaderObj *self, char c)
parse_save_field(self);
self->state = START_RECORD;
}
else if (c == dialect->quotechar) {
else if (c == dialect->quotechar &&
dialect->quoting != QUOTE_NONE) {
/* start quoted field */
self->state = IN_QUOTED_FIELD;
}
......@@ -572,7 +623,8 @@ parse_process_char(ReaderObj *self, char c)
/* Possible escape character */
self->state = ESCAPE_IN_QUOTED_FIELD;
}
else if (c == dialect->quotechar) {
else if (c == dialect->quotechar &&
dialect->quoting != QUOTE_NONE) {
if (dialect->doublequote) {
/* doublequote; " represented by "" */
self->state = QUOTE_IN_QUOTED_FIELD;
......@@ -1332,7 +1384,7 @@ csv_register_dialect(PyObject *module, PyObject *args)
return NULL;
}
Py_INCREF(dialect_obj);
/* A class rather than an instance? Instanciate */
/* A class rather than an instance? Instantiate */
if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
PyObject * new_dia;
new_dia = PyObject_CallFunction(dialect_obj, "");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment