Commit c89f284d authored by Andrew McNamara's avatar Andrew McNamara

When using QUOTE_NONNUMERIC, we now test for "numericness" with

PyNumber_Check, rather than trying to convert to a float.  Reimplemented
writer - now raises exceptions when it sees a quotechar but neither
doublequote or escapechar are set. Doublequote results are now more
consistent (eg, single quote should generate """", rather than "",
which is ambiguous).
parent 31d8896e
......@@ -152,25 +152,35 @@ class Test_Csv(unittest.TestCase):
(bigstring, bigstring))
def test_write_quoting(self):
self._write_test(['a','1','p,q'], 'a,1,"p,q"')
self._write_test(['a',1,'p,q'], 'a,1,"p,q"')
self.assertRaises(csv.Error,
self._write_test,
['a','1','p,q'], 'a,1,"p,q"',
['a',1,'p,q'], 'a,1,p,q',
quoting = csv.QUOTE_NONE)
self._write_test(['a','1','p,q'], 'a,1,"p,q"',
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
quoting = csv.QUOTE_MINIMAL)
self._write_test(['a','1','p,q'], '"a",1,"p,q"',
self._write_test(['a',1,'p,q'], '"a",1,"p,q"',
quoting = csv.QUOTE_NONNUMERIC)
self._write_test(['a','1','p,q'], '"a","1","p,q"',
self._write_test(['a',1,'p,q'], '"a","1","p,q"',
quoting = csv.QUOTE_ALL)
def test_write_escape(self):
self._write_test(['a','1','p,q'], 'a,1,"p,q"',
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
escapechar='\\')
# FAILED - needs to be fixed [am]:
# self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"',
# escapechar='\\', doublequote = 0)
self._write_test(['a','1','p,q'], 'a,1,p\\,q',
self.assertRaises(csv.Error,
self._write_test,
['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar=None, doublequote=False)
self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar='\\', doublequote = False)
self._write_test(['"'], '""""',
escapechar='\\', quoting = csv.QUOTE_MINIMAL)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_MINIMAL,
doublequote = False)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_NONE)
self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
escapechar='\\', quoting = csv.QUOTE_NONE)
def test_writerows(self):
......
......@@ -45,6 +45,9 @@ Library
+ quotechar=None and quoting=QUOTE_NONE now work the way PEP 305
dictates.
+ the parser now removes the escapechar prefix from escaped characters.
+ QUOTE_NONNUMERIC now tests for numeric objects, rather than attempting
to cast to float.
+ writer doublequote handling improved.
+ Dialect classes passed to the module are no longer instantiated by
the module before being parsed (the former validation scheme required
this, but the mechanism was unreliable).
......
......@@ -944,81 +944,65 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
{
DialectObj *dialect = self->dialect;
int i, rec_len;
char *lineterm;
#define ADDCH(c) \
do {\
if (copy_phase) \
self->rec[rec_len] = c;\
rec_len++;\
} while(0)
lineterm = PyString_AsString(dialect->lineterminator);
if (lineterm == NULL)
return -1;
rec_len = self->rec_len;
/* If this is not the first field we need a field separator.
*/
if (self->num_fields > 0) {
if (copy_phase)
self->rec[rec_len] = dialect->delimiter;
rec_len++;
}
/* Handle preceding quote.
*/
switch (dialect->quoting) {
case QUOTE_ALL:
*quoted = 1;
if (copy_phase)
self->rec[rec_len] = dialect->quotechar;
rec_len++;
break;
case QUOTE_MINIMAL:
case QUOTE_NONNUMERIC:
/* We only know about quoted in the copy phase.
*/
if (copy_phase && *quoted) {
self->rec[rec_len] = dialect->quotechar;
rec_len++;
}
break;
case QUOTE_NONE:
break;
}
/* Copy/count field data.
*/
/* If this is not the first field we need a field separator */
if (self->num_fields > 0)
ADDCH(dialect->delimiter);
/* Handle preceding quote */
if (copy_phase && *quoted)
ADDCH(dialect->quotechar);
/* Copy/count field data */
for (i = 0;; i++) {
char c = field[i];
int want_escape = 0;
if (c == '\0')
break;
/* If in doublequote mode we escape quote chars with a
* quote.
*/
if (dialect->quoting != QUOTE_NONE &&
c == dialect->quotechar && dialect->doublequote) {
if (copy_phase)
self->rec[rec_len] = dialect->quotechar;
*quoted = 1;
rec_len++;
}
/* Some special characters need to be escaped. If we have a
* quote character switch to quoted field instead of escaping
* individual characters.
*/
if (!*quoted
&& (c == dialect->delimiter ||
c == dialect->escapechar ||
c == '\n' || c == '\r')) {
if (dialect->quoting != QUOTE_NONE)
*quoted = 1;
else if (dialect->escapechar) {
if (copy_phase)
self->rec[rec_len] = dialect->escapechar;
rec_len++;
}
if (c == dialect->delimiter ||
c == dialect->escapechar ||
c == dialect->quotechar ||
strchr(lineterm, c)) {
if (dialect->quoting == QUOTE_NONE)
want_escape = 1;
else {
PyErr_Format(error_obj,
"delimiter must be quoted or escaped");
return -1;
if (c == dialect->quotechar) {
if (dialect->doublequote)
ADDCH(dialect->quotechar);
else
want_escape = 1;
}
if (!want_escape)
*quoted = 1;
}
if (want_escape) {
if (!dialect->escapechar) {
PyErr_Format(error_obj,
"need to escape, but no escapechar set");
return -1;
}
ADDCH(dialect->escapechar);
}
}
/* Copy field character into record buffer.
*/
if (copy_phase)
self->rec[rec_len] = c;
rec_len++;
ADDCH(c);
}
/* If field is empty check if it needs to be quoted.
......@@ -1033,20 +1017,14 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
*quoted = 1;
}
/* Handle final quote character on field.
*/
if (*quoted) {
if (copy_phase)
self->rec[rec_len] = dialect->quotechar;
ADDCH(dialect->quotechar);
else
/* Didn't know about leading quote until we found it
* necessary in field data - compensate for it now.
*/
rec_len++;
rec_len++;
rec_len += 2;
}
return rec_len;
#undef ADDCH
}
static int
......@@ -1146,18 +1124,16 @@ csv_writerow(WriterObj *self, PyObject *seq)
if (field == NULL)
return NULL;
quoted = 0;
if (dialect->quoting == QUOTE_NONNUMERIC) {
PyObject *num;
num = PyNumber_Float(field);
if (num == NULL) {
quoted = 1;
PyErr_Clear();
}
else {
Py_DECREF(num);
}
switch (dialect->quoting) {
case QUOTE_NONNUMERIC:
quoted = !PyNumber_Check(field);
break;
case QUOTE_ALL:
quoted = 1;
break;
default:
quoted = 0;
break;
}
if (PyString_Check(field)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment