Commit c89f284d authored by Andrew McNamara's avatar Andrew McNamara

When using QUOTE_NONNUMERIC, we now test for "numericness" with

PyNumber_Check, rather than trying to convert to a float.  Reimplemented
writer - now raises exceptions when it sees a quotechar but neither
doublequote or escapechar are set. Doublequote results are now more
consistent (eg, single quote should generate """", rather than "",
which is ambiguous).
parent 31d8896e
...@@ -152,25 +152,35 @@ class Test_Csv(unittest.TestCase): ...@@ -152,25 +152,35 @@ class Test_Csv(unittest.TestCase):
(bigstring, bigstring)) (bigstring, bigstring))
def test_write_quoting(self): def test_write_quoting(self):
self._write_test(['a','1','p,q'], 'a,1,"p,q"') self._write_test(['a',1,'p,q'], 'a,1,"p,q"')
self.assertRaises(csv.Error, self.assertRaises(csv.Error,
self._write_test, self._write_test,
['a','1','p,q'], 'a,1,"p,q"', ['a',1,'p,q'], 'a,1,p,q',
quoting = csv.QUOTE_NONE) quoting = csv.QUOTE_NONE)
self._write_test(['a','1','p,q'], 'a,1,"p,q"', self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
quoting = csv.QUOTE_MINIMAL) quoting = csv.QUOTE_MINIMAL)
self._write_test(['a','1','p,q'], '"a",1,"p,q"', self._write_test(['a',1,'p,q'], '"a",1,"p,q"',
quoting = csv.QUOTE_NONNUMERIC) quoting = csv.QUOTE_NONNUMERIC)
self._write_test(['a','1','p,q'], '"a","1","p,q"', self._write_test(['a',1,'p,q'], '"a","1","p,q"',
quoting = csv.QUOTE_ALL) quoting = csv.QUOTE_ALL)
def test_write_escape(self): def test_write_escape(self):
self._write_test(['a','1','p,q'], 'a,1,"p,q"', self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
escapechar='\\') escapechar='\\')
# FAILED - needs to be fixed [am]: self.assertRaises(csv.Error,
# self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"', self._write_test,
# escapechar='\\', doublequote = 0) ['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
self._write_test(['a','1','p,q'], 'a,1,p\\,q', escapechar=None, doublequote=False)
self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar='\\', doublequote = False)
self._write_test(['"'], '""""',
escapechar='\\', quoting = csv.QUOTE_MINIMAL)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_MINIMAL,
doublequote = False)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_NONE)
self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
escapechar='\\', quoting = csv.QUOTE_NONE) escapechar='\\', quoting = csv.QUOTE_NONE)
def test_writerows(self): def test_writerows(self):
......
...@@ -45,6 +45,9 @@ Library ...@@ -45,6 +45,9 @@ Library
+ quotechar=None and quoting=QUOTE_NONE now work the way PEP 305 + quotechar=None and quoting=QUOTE_NONE now work the way PEP 305
dictates. dictates.
+ the parser now removes the escapechar prefix from escaped characters. + the parser now removes the escapechar prefix from escaped characters.
+ QUOTE_NONNUMERIC now tests for numeric objects, rather than attempting
to cast to float.
+ writer doublequote handling improved.
+ Dialect classes passed to the module are no longer instantiated by + Dialect classes passed to the module are no longer instantiated by
the module before being parsed (the former validation scheme required the module before being parsed (the former validation scheme required
this, but the mechanism was unreliable). this, but the mechanism was unreliable).
......
...@@ -944,81 +944,65 @@ join_append_data(WriterObj *self, char *field, int quote_empty, ...@@ -944,81 +944,65 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
{ {
DialectObj *dialect = self->dialect; DialectObj *dialect = self->dialect;
int i, rec_len; int i, rec_len;
char *lineterm;
#define ADDCH(c) \
do {\
if (copy_phase) \
self->rec[rec_len] = c;\
rec_len++;\
} while(0)
lineterm = PyString_AsString(dialect->lineterminator);
if (lineterm == NULL)
return -1;
rec_len = self->rec_len; rec_len = self->rec_len;
/* If this is not the first field we need a field separator. /* If this is not the first field we need a field separator */
*/ if (self->num_fields > 0)
if (self->num_fields > 0) { ADDCH(dialect->delimiter);
if (copy_phase)
self->rec[rec_len] = dialect->delimiter; /* Handle preceding quote */
rec_len++; if (copy_phase && *quoted)
} ADDCH(dialect->quotechar);
/* Handle preceding quote.
*/ /* Copy/count field data */
switch (dialect->quoting) {
case QUOTE_ALL:
*quoted = 1;
if (copy_phase)
self->rec[rec_len] = dialect->quotechar;
rec_len++;
break;
case QUOTE_MINIMAL:
case QUOTE_NONNUMERIC:
/* We only know about quoted in the copy phase.
*/
if (copy_phase && *quoted) {
self->rec[rec_len] = dialect->quotechar;
rec_len++;
}
break;
case QUOTE_NONE:
break;
}
/* Copy/count field data.
*/
for (i = 0;; i++) { for (i = 0;; i++) {
char c = field[i]; char c = field[i];
int want_escape = 0;
if (c == '\0') if (c == '\0')
break; break;
/* If in doublequote mode we escape quote chars with a
* quote.
*/
if (dialect->quoting != QUOTE_NONE &&
c == dialect->quotechar && dialect->doublequote) {
if (copy_phase)
self->rec[rec_len] = dialect->quotechar;
*quoted = 1;
rec_len++;
}
/* Some special characters need to be escaped. If we have a if (c == dialect->delimiter ||
* quote character switch to quoted field instead of escaping c == dialect->escapechar ||
* individual characters. c == dialect->quotechar ||
*/ strchr(lineterm, c)) {
if (!*quoted if (dialect->quoting == QUOTE_NONE)
&& (c == dialect->delimiter || want_escape = 1;
c == dialect->escapechar ||
c == '\n' || c == '\r')) {
if (dialect->quoting != QUOTE_NONE)
*quoted = 1;
else if (dialect->escapechar) {
if (copy_phase)
self->rec[rec_len] = dialect->escapechar;
rec_len++;
}
else { else {
PyErr_Format(error_obj, if (c == dialect->quotechar) {
"delimiter must be quoted or escaped"); if (dialect->doublequote)
return -1; ADDCH(dialect->quotechar);
else
want_escape = 1;
}
if (!want_escape)
*quoted = 1;
}
if (want_escape) {
if (!dialect->escapechar) {
PyErr_Format(error_obj,
"need to escape, but no escapechar set");
return -1;
}
ADDCH(dialect->escapechar);
} }
} }
/* Copy field character into record buffer. /* Copy field character into record buffer.
*/ */
if (copy_phase) ADDCH(c);
self->rec[rec_len] = c;
rec_len++;
} }
/* If field is empty check if it needs to be quoted. /* If field is empty check if it needs to be quoted.
...@@ -1033,20 +1017,14 @@ join_append_data(WriterObj *self, char *field, int quote_empty, ...@@ -1033,20 +1017,14 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
*quoted = 1; *quoted = 1;
} }
/* Handle final quote character on field.
*/
if (*quoted) { if (*quoted) {
if (copy_phase) if (copy_phase)
self->rec[rec_len] = dialect->quotechar; ADDCH(dialect->quotechar);
else else
/* Didn't know about leading quote until we found it rec_len += 2;
* necessary in field data - compensate for it now.
*/
rec_len++;
rec_len++;
} }
return rec_len; return rec_len;
#undef ADDCH
} }
static int static int
...@@ -1146,18 +1124,16 @@ csv_writerow(WriterObj *self, PyObject *seq) ...@@ -1146,18 +1124,16 @@ csv_writerow(WriterObj *self, PyObject *seq)
if (field == NULL) if (field == NULL)
return NULL; return NULL;
quoted = 0; switch (dialect->quoting) {
if (dialect->quoting == QUOTE_NONNUMERIC) { case QUOTE_NONNUMERIC:
PyObject *num; quoted = !PyNumber_Check(field);
break;
num = PyNumber_Float(field); case QUOTE_ALL:
if (num == NULL) { quoted = 1;
quoted = 1; break;
PyErr_Clear(); default:
} quoted = 0;
else { break;
Py_DECREF(num);
}
} }
if (PyString_Check(field)) { if (PyString_Check(field)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment