Commit 4626458c authored by Guido van Rossum's avatar Guido van Rossum

SF patch# 1767398 by Adam Hupp.

Fix csv to read/write bytes from/to binary files.
Fix the unit tests to test this and to use with TemporaryFile().
parent dd766d53
This diff is collapsed.
...@@ -95,9 +95,9 @@ typedef struct { ...@@ -95,9 +95,9 @@ typedef struct {
PyObject_HEAD PyObject_HEAD
int doublequote; /* is " represented by ""? */ int doublequote; /* is " represented by ""? */
char delimiter; /* field separator */ Py_UNICODE delimiter; /* field separator */
char quotechar; /* quote character */ Py_UNICODE quotechar; /* quote character */
char escapechar; /* escape character */ Py_UNICODE escapechar; /* escape character */
int skipinitialspace; /* ignore spaces following delimiter? */ int skipinitialspace; /* ignore spaces following delimiter? */
PyObject *lineterminator; /* string to write between records */ PyObject *lineterminator; /* string to write between records */
int quoting; /* style of quoting to write */ int quoting; /* style of quoting to write */
...@@ -116,9 +116,9 @@ typedef struct { ...@@ -116,9 +116,9 @@ typedef struct {
PyObject *fields; /* field list for current record */ PyObject *fields; /* field list for current record */
ParserState state; /* current CSV parse state */ ParserState state; /* current CSV parse state */
char *field; /* build current field in here */ Py_UNICODE *field; /* build current field in here */
int field_size; /* size of allocated buffer */ int field_size; /* size of allocated buffer */
int field_len; /* length of current field */ Py_ssize_t field_len; /* length of current field */
int numeric_field; /* treat field as numeric */ int numeric_field; /* treat field as numeric */
unsigned long line_num; /* Source-file line number */ unsigned long line_num; /* Source-file line number */
} ReaderObj; } ReaderObj;
...@@ -134,9 +134,9 @@ typedef struct { ...@@ -134,9 +134,9 @@ typedef struct {
DialectObj *dialect; /* parsing dialect */ DialectObj *dialect; /* parsing dialect */
char *rec; /* buffer for parser.join */ Py_UNICODE *rec; /* buffer for parser.join */
int rec_size; /* size of allocated record */ int rec_size; /* size of allocated record */
int rec_len; /* length of record */ Py_ssize_t rec_len; /* length of record */
int num_fields; /* number of fields in record */ int num_fields; /* number of fields in record */
} WriterObj; } WriterObj;
...@@ -176,7 +176,7 @@ get_nullchar_as_None(char c) ...@@ -176,7 +176,7 @@ get_nullchar_as_None(char c)
return Py_None; return Py_None;
} }
else else
return PyString_FromStringAndSize((char*)&c, 1); return PyUnicode_DecodeASCII((char*)&c, 1, NULL);
} }
static PyObject * static PyObject *
...@@ -230,17 +230,18 @@ _set_int(const char *name, int *target, PyObject *src, int dflt) ...@@ -230,17 +230,18 @@ _set_int(const char *name, int *target, PyObject *src, int dflt)
} }
static int static int
_set_char(const char *name, char *target, PyObject *src, char dflt) _set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
{ {
if (src == NULL) if (src == NULL)
*target = dflt; *target = dflt;
else { else {
*target = '\0'; *target = '\0';
if (src != Py_None) { if (src != Py_None) {
const char *buf; Py_UNICODE *buf;
Py_ssize_t len; Py_ssize_t len;
if (PyObject_AsCharBuffer(src, &buf, &len) < 0 || buf = PyUnicode_AsUnicode(src);
len > 1) { len = PyUnicode_GetSize(src);
if (buf == NULL || len > 1) {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"\"%s\" must be an 1-character string", "\"%s\" must be an 1-character string",
name); name);
...@@ -257,7 +258,7 @@ static int ...@@ -257,7 +258,7 @@ static int
_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
{ {
if (src == NULL) if (src == NULL)
*target = PyString_FromString(dflt); *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
else { else {
if (src == Py_None) if (src == Py_None)
*target = NULL; *target = NULL;
...@@ -528,7 +529,7 @@ parse_save_field(ReaderObj *self) ...@@ -528,7 +529,7 @@ parse_save_field(ReaderObj *self)
{ {
PyObject *field; PyObject *field;
field = PyString_FromStringAndSize(self->field, self->field_len); field = PyUnicode_FromUnicode(self->field, self->field_len);
if (field == NULL) if (field == NULL)
return -1; return -1;
self->field_len = 0; self->field_len = 0;
...@@ -556,11 +557,12 @@ parse_grow_buff(ReaderObj *self) ...@@ -556,11 +557,12 @@ parse_grow_buff(ReaderObj *self)
self->field_size = 4096; self->field_size = 4096;
if (self->field != NULL) if (self->field != NULL)
PyMem_Free(self->field); PyMem_Free(self->field);
self->field = PyMem_Malloc(self->field_size); self->field = PyMem_New(Py_UNICODE, self->field_size);
} }
else { else {
self->field_size *= 2; self->field_size *= 2;
self->field = PyMem_Realloc(self->field, self->field_size); self->field = PyMem_Resize(self->field, Py_UNICODE,
self->field_size);
} }
if (self->field == NULL) { if (self->field == NULL) {
PyErr_NoMemory(); PyErr_NoMemory();
...@@ -570,7 +572,7 @@ parse_grow_buff(ReaderObj *self) ...@@ -570,7 +572,7 @@ parse_grow_buff(ReaderObj *self)
} }
static int static int
parse_add_char(ReaderObj *self, char c) parse_add_char(ReaderObj *self, Py_UNICODE c)
{ {
if (self->field_len >= field_limit) { if (self->field_len >= field_limit) {
PyErr_Format(error_obj, "field larger than field limit (%ld)", PyErr_Format(error_obj, "field larger than field limit (%ld)",
...@@ -584,7 +586,7 @@ parse_add_char(ReaderObj *self, char c) ...@@ -584,7 +586,7 @@ parse_add_char(ReaderObj *self, char c)
} }
static int static int
parse_process_char(ReaderObj *self, char c) parse_process_char(ReaderObj *self, Py_UNICODE c)
{ {
DialectObj *dialect = self->dialect; DialectObj *dialect = self->dialect;
...@@ -771,8 +773,8 @@ Reader_iternext(ReaderObj *self) ...@@ -771,8 +773,8 @@ Reader_iternext(ReaderObj *self)
{ {
PyObject *lineobj; PyObject *lineobj;
PyObject *fields = NULL; PyObject *fields = NULL;
char *line, c; Py_UNICODE *line, c;
int linelen; Py_ssize_t linelen;
if (parse_reset(self) < 0) if (parse_reset(self) < 0)
return NULL; return NULL;
...@@ -786,10 +788,8 @@ Reader_iternext(ReaderObj *self) ...@@ -786,10 +788,8 @@ Reader_iternext(ReaderObj *self)
return NULL; return NULL;
} }
++self->line_num; ++self->line_num;
line = PyUnicode_AsUnicode(lineobj);
line = PyString_AsString(lineobj); linelen = PyUnicode_GetSize(lineobj);
linelen = PyString_Size(lineobj);
if (line == NULL || linelen < 0) { if (line == NULL || linelen < 0) {
Py_DECREF(lineobj); Py_DECREF(lineobj);
return NULL; return NULL;
...@@ -962,12 +962,13 @@ join_reset(WriterObj *self) ...@@ -962,12 +962,13 @@ join_reset(WriterObj *self)
* record length. * record length.
*/ */
static int static int
join_append_data(WriterObj *self, char *field, int quote_empty, join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
int *quoted, int copy_phase) int *quoted, int copy_phase)
{ {
DialectObj *dialect = self->dialect; DialectObj *dialect = self->dialect;
int i, rec_len; int i;
char *lineterm; int rec_len;
Py_UNICODE *lineterm;
#define ADDCH(c) \ #define ADDCH(c) \
do {\ do {\
...@@ -976,7 +977,7 @@ join_append_data(WriterObj *self, char *field, int quote_empty, ...@@ -976,7 +977,7 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
rec_len++;\ rec_len++;\
} while(0) } while(0)
lineterm = PyString_AsString(dialect->lineterminator); lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
if (lineterm == NULL) if (lineterm == NULL)
return -1; return -1;
...@@ -991,8 +992,9 @@ join_append_data(WriterObj *self, char *field, int quote_empty, ...@@ -991,8 +992,9 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
ADDCH(dialect->quotechar); ADDCH(dialect->quotechar);
/* Copy/count field data */ /* Copy/count field data */
for (i = 0;; i++) { /* If field is null just pass over */
char c = field[i]; for (i = 0; field; i++) {
Py_UNICODE c = field[i];
int want_escape = 0; int want_escape = 0;
if (c == '\0') if (c == '\0')
...@@ -1001,7 +1003,7 @@ join_append_data(WriterObj *self, char *field, int quote_empty, ...@@ -1001,7 +1003,7 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
if (c == dialect->delimiter || if (c == dialect->delimiter ||
c == dialect->escapechar || c == dialect->escapechar ||
c == dialect->quotechar || c == dialect->quotechar ||
strchr(lineterm, c)) { Py_UNICODE_strchr(lineterm, c)) {
if (dialect->quoting == QUOTE_NONE) if (dialect->quoting == QUOTE_NONE)
want_escape = 1; want_escape = 1;
else { else {
...@@ -1058,13 +1060,14 @@ join_check_rec_size(WriterObj *self, int rec_len) ...@@ -1058,13 +1060,14 @@ join_check_rec_size(WriterObj *self, int rec_len)
self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
if (self->rec != NULL) if (self->rec != NULL)
PyMem_Free(self->rec); PyMem_Free(self->rec);
self->rec = PyMem_Malloc(self->rec_size); self->rec = PyMem_New(Py_UNICODE, self->rec_size);
} }
else { else {
char *old_rec = self->rec; Py_UNICODE* old_rec = self->rec;
self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
self->rec = PyMem_Realloc(self->rec, self->rec_size); self->rec = PyMem_Resize(self->rec, Py_UNICODE,
self->rec_size);
if (self->rec == NULL) if (self->rec == NULL)
PyMem_Free(old_rec); PyMem_Free(old_rec);
} }
...@@ -1077,7 +1080,7 @@ join_check_rec_size(WriterObj *self, int rec_len) ...@@ -1077,7 +1080,7 @@ join_check_rec_size(WriterObj *self, int rec_len)
} }
static int static int
join_append(WriterObj *self, char *field, int *quoted, int quote_empty) join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
{ {
int rec_len; int rec_len;
...@@ -1099,9 +1102,9 @@ static int ...@@ -1099,9 +1102,9 @@ static int
join_append_lineterminator(WriterObj *self) join_append_lineterminator(WriterObj *self)
{ {
int terminator_len; int terminator_len;
char *terminator; Py_UNICODE *terminator;
terminator_len = PyString_Size(self->dialect->lineterminator); terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
if (terminator_len == -1) if (terminator_len == -1)
return 0; return 0;
...@@ -1109,10 +1112,11 @@ join_append_lineterminator(WriterObj *self) ...@@ -1109,10 +1112,11 @@ join_append_lineterminator(WriterObj *self)
if (!join_check_rec_size(self, self->rec_len + terminator_len)) if (!join_check_rec_size(self, self->rec_len + terminator_len))
return 0; return 0;
terminator = PyString_AsString(self->dialect->lineterminator); terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
if (terminator == NULL) if (terminator == NULL)
return 0; return 0;
memmove(self->rec + self->rec_len, terminator, terminator_len); memmove(self->rec + self->rec_len, terminator,
sizeof(Py_UNICODE)*terminator_len);
self->rec_len += terminator_len; self->rec_len += terminator_len;
return 1; return 1;
...@@ -1161,25 +1165,26 @@ csv_writerow(WriterObj *self, PyObject *seq) ...@@ -1161,25 +1165,26 @@ csv_writerow(WriterObj *self, PyObject *seq)
break; break;
} }
if (PyString_Check(field)) { if (PyUnicode_Check(field)) {
append_ok = join_append(self, append_ok = join_append(self,
PyString_AS_STRING(field), PyUnicode_AS_UNICODE(field),
&quoted, len == 1); &quoted, len == 1);
Py_DECREF(field); Py_DECREF(field);
} }
else if (field == Py_None) { else if (field == Py_None) {
append_ok = join_append(self, "", &quoted, len == 1); append_ok = join_append(self, NULL,
&quoted, len == 1);
Py_DECREF(field); Py_DECREF(field);
} }
else { else {
PyObject *str; PyObject *str;
str = PyObject_Str(field); str = PyObject_Unicode(field);
Py_DECREF(field); Py_DECREF(field);
if (str == NULL) if (str == NULL)
return NULL; return NULL;
append_ok = join_append(self,
append_ok = join_append(self, PyString_AS_STRING(str), PyUnicode_AS_UNICODE(str),
&quoted, len == 1); &quoted, len == 1);
Py_DECREF(str); Py_DECREF(str);
} }
...@@ -1193,7 +1198,8 @@ csv_writerow(WriterObj *self, PyObject *seq) ...@@ -1193,7 +1198,8 @@ csv_writerow(WriterObj *self, PyObject *seq)
return 0; return 0;
return PyObject_CallFunction(self->writeline, return PyObject_CallFunction(self->writeline,
"(s#)", self->rec, self->rec_len); "(u#)", self->rec,
self->rec_len);
} }
PyDoc_STRVAR(csv_writerows_doc, PyDoc_STRVAR(csv_writerows_doc,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment