Commit f09ca140 authored by Guido van Rossum's avatar Guido van Rossum

Checkpoint: half-fixed the bz2 module. 'U' is no longer supported.

parent da5b8f2d
...@@ -96,16 +96,12 @@ typedef fpos_t Py_off_t; ...@@ -96,16 +96,12 @@ typedef fpos_t Py_off_t;
typedef struct { typedef struct {
PyObject_HEAD PyObject_HEAD
PyObject *file; FILE *rawfp;
char* f_buf; /* Allocated readahead buffer */ char* f_buf; /* Allocated readahead buffer */
char* f_bufend; /* Points after last occupied position */ char* f_bufend; /* Points after last occupied position */
char* f_bufptr; /* Current buffer position */ char* f_bufptr; /* Current buffer position */
int f_univ_newline; /* Handle any newline convention */
int f_newlinetypes; /* Types of newlines seen */
int f_skipnextlf; /* Skip next \n */
BZFILE *fp; BZFILE *fp;
int mode; int mode;
Py_off_t pos; Py_off_t pos;
...@@ -233,9 +229,6 @@ Util_GetLine(BZ2FileObject *f, int n) ...@@ -233,9 +229,6 @@ Util_GetLine(BZ2FileObject *f, int n)
size_t increment; /* amount to increment the buffer */ size_t increment; /* amount to increment the buffer */
PyObject *v; PyObject *v;
int bzerror; int bzerror;
int newlinetypes = f->f_newlinetypes;
int skipnextlf = f->f_skipnextlf;
int univ_newline = f->f_univ_newline;
total_v_size = n > 0 ? n : 100; total_v_size = n > 0 ? n : 100;
v = PyBytes_FromStringAndSize((char *)NULL, total_v_size); v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
...@@ -247,47 +240,12 @@ Util_GetLine(BZ2FileObject *f, int n) ...@@ -247,47 +240,12 @@ Util_GetLine(BZ2FileObject *f, int n)
for (;;) { for (;;) {
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
if (univ_newline) { do {
while (1) { BZ2_bzRead(&bzerror, f->fp, &c, 1);
BZ2_bzRead(&bzerror, f->fp, &c, 1); f->pos++;
f->pos++; *buf++ = c;
if (bzerror != BZ_OK || buf == end) } while (bzerror == BZ_OK && c != '\n' && buf != end);
break;
if (skipnextlf) {
skipnextlf = 0;
if (c == '\n') {
/* Seeing a \n here with
* skipnextlf true means we
* saw a \r before.
*/
newlinetypes |= NEWLINE_CRLF;
BZ2_bzRead(&bzerror, f->fp,
&c, 1);
if (bzerror != BZ_OK)
break;
} else {
newlinetypes |= NEWLINE_CR;
}
}
if (c == '\r') {
skipnextlf = 1;
c = '\n';
} else if ( c == '\n')
newlinetypes |= NEWLINE_LF;
*buf++ = c;
if (c == '\n') break;
}
if (bzerror == BZ_STREAM_END && skipnextlf)
newlinetypes |= NEWLINE_CR;
} else /* If not universal newlines use the normal loop */
do {
BZ2_bzRead(&bzerror, f->fp, &c, 1);
f->pos++;
*buf++ = c;
} while (bzerror == BZ_OK && c != '\n' && buf != end);
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
f->f_newlinetypes = newlinetypes;
f->f_skipnextlf = skipnextlf;
if (bzerror == BZ_STREAM_END) { if (bzerror == BZ_STREAM_END) {
f->size = f->pos; f->size = f->pos;
f->mode = MODE_READ_EOF; f->mode = MODE_READ_EOF;
...@@ -329,74 +287,6 @@ Util_GetLine(BZ2FileObject *f, int n) ...@@ -329,74 +287,6 @@ Util_GetLine(BZ2FileObject *f, int n)
return v; return v;
} }
/* This is a hacked version of Python's
* fileobject.c:Py_UniversalNewlineFread(). */
size_t
Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
char* buf, size_t n, BZ2FileObject *f)
{
char *dst = buf;
int newlinetypes, skipnextlf;
assert(buf != NULL);
assert(stream != NULL);
if (!f->f_univ_newline)
return BZ2_bzRead(bzerror, stream, buf, n);
newlinetypes = f->f_newlinetypes;
skipnextlf = f->f_skipnextlf;
/* Invariant: n is the number of bytes remaining to be filled
* in the buffer.
*/
while (n) {
size_t nread;
int shortread;
char *src = dst;
nread = BZ2_bzRead(bzerror, stream, dst, n);
assert(nread <= n);
n -= nread; /* assuming 1 byte out for each in; will adjust */
shortread = n != 0; /* true iff EOF or error */
while (nread--) {
char c = *src++;
if (c == '\r') {
/* Save as LF and set flag to skip next LF. */
*dst++ = '\n';
skipnextlf = 1;
}
else if (skipnextlf && c == '\n') {
/* Skip LF, and remember we saw CR LF. */
skipnextlf = 0;
newlinetypes |= NEWLINE_CRLF;
++n;
}
else {
/* Normal char to be stored in buffer. Also
* update the newlinetypes flag if either this
* is an LF or the previous char was a CR.
*/
if (c == '\n')
newlinetypes |= NEWLINE_LF;
else if (skipnextlf)
newlinetypes |= NEWLINE_CR;
*dst++ = c;
skipnextlf = 0;
}
}
if (shortread) {
/* If this is EOF, update type flags. */
if (skipnextlf && *bzerror == BZ_STREAM_END)
newlinetypes |= NEWLINE_CR;
break;
}
}
f->f_newlinetypes = newlinetypes;
f->f_skipnextlf = skipnextlf;
return dst - buf;
}
/* This is a hacked version of Python's fileobject.c:drop_readahead(). */ /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
static void static void
Util_DropReadAhead(BZ2FileObject *f) Util_DropReadAhead(BZ2FileObject *f)
...@@ -429,8 +319,7 @@ Util_ReadAhead(BZ2FileObject *f, int bufsize) ...@@ -429,8 +319,7 @@ Util_ReadAhead(BZ2FileObject *f, int bufsize)
return -1; return -1;
} }
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf, chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
bufsize, f);
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
f->pos += chunksize; f->pos += chunksize;
if (bzerror == BZ_STREAM_END) { if (bzerror == BZ_STREAM_END) {
...@@ -548,10 +437,9 @@ BZ2File_read(BZ2FileObject *self, PyObject *args) ...@@ -548,10 +437,9 @@ BZ2File_read(BZ2FileObject *self, PyObject *args)
for (;;) { for (;;) {
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
chunksize = Util_UnivNewlineRead(&bzerror, self->fp, chunksize = BZ2_bzRead(&bzerror, self->fp,
BUF(ret)+bytesread, BUF(ret)+bytesread,
buffersize-bytesread, buffersize-bytesread);
self);
self->pos += chunksize; self->pos += chunksize;
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
bytesread += chunksize; bytesread += chunksize;
...@@ -685,9 +573,8 @@ BZ2File_readlines(BZ2FileObject *self, PyObject *args) ...@@ -685,9 +573,8 @@ BZ2File_readlines(BZ2FileObject *self, PyObject *args)
for (;;) { for (;;) {
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
nread = Util_UnivNewlineRead(&bzerror, self->fp, nread = BZ2_bzRead(&bzerror, self->fp,
buffer+nfilled, buffer+nfilled, buffersize-nfilled);
buffersize-nfilled, self);
self->pos += nread; self->pos += nread;
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
if (bzerror == BZ_STREAM_END) { if (bzerror == BZ_STREAM_END) {
...@@ -1043,10 +930,8 @@ BZ2File_seek(BZ2FileObject *self, PyObject *args) ...@@ -1043,10 +930,8 @@ BZ2File_seek(BZ2FileObject *self, PyObject *args)
assert(self->mode != MODE_READ_EOF); assert(self->mode != MODE_READ_EOF);
for (;;) { for (;;) {
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
chunksize = Util_UnivNewlineRead( chunksize = BZ2_bzRead(&bzerror, self->fp,
&bzerror, self->fp, buffer, buffersize);
buffer, buffersize,
self);
self->pos += chunksize; self->pos += chunksize;
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
...@@ -1075,19 +960,14 @@ BZ2File_seek(BZ2FileObject *self, PyObject *args) ...@@ -1075,19 +960,14 @@ BZ2File_seek(BZ2FileObject *self, PyObject *args)
offset -= self->pos; offset -= self->pos;
} else { } else {
/* we cannot move back, so rewind the stream */ /* we cannot move back, so rewind the stream */
FILE *fp = NULL; /* XXX temporary!!! */
BZ2_bzReadClose(&bzerror, self->fp); BZ2_bzReadClose(&bzerror, self->fp);
if (bzerror != BZ_OK) { if (bzerror != BZ_OK) {
Util_CatchBZ2Error(bzerror); Util_CatchBZ2Error(bzerror);
goto cleanup; goto cleanup;
} }
ret = PyObject_CallMethod(self->file, "seek", "(i)", 0); rewind(self->rawfp);
if (!ret)
goto cleanup;
Py_DECREF(ret);
ret = NULL;
self->pos = 0; self->pos = 0;
self->fp = BZ2_bzReadOpen(&bzerror, fp, self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
0, 0, NULL, 0); 0, 0, NULL, 0);
if (bzerror != BZ_OK) { if (bzerror != BZ_OK) {
Util_CatchBZ2Error(bzerror); Util_CatchBZ2Error(bzerror);
...@@ -1110,8 +990,7 @@ BZ2File_seek(BZ2FileObject *self, PyObject *args) ...@@ -1110,8 +990,7 @@ BZ2File_seek(BZ2FileObject *self, PyObject *args)
* condition above). buffersize is 8192. */ * condition above). buffersize is 8192. */
readsize = (size_t)(offset-bytesread); readsize = (size_t)(offset-bytesread);
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
chunksize = Util_UnivNewlineRead(&bzerror, self->fp, chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
buffer, readsize, self);
self->pos += chunksize; self->pos += chunksize;
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
bytesread += chunksize; bytesread += chunksize;
...@@ -1177,6 +1056,10 @@ BZ2File_close(BZ2FileObject *self) ...@@ -1177,6 +1056,10 @@ BZ2File_close(BZ2FileObject *self)
PyObject *ret = NULL; PyObject *ret = NULL;
int bzerror = BZ_OK; int bzerror = BZ_OK;
if (self->mode == MODE_CLOSED) {
Py_RETURN_NONE;
}
ACQUIRE_LOCK(self); ACQUIRE_LOCK(self);
switch (self->mode) { switch (self->mode) {
case MODE_READ: case MODE_READ:
...@@ -1189,11 +1072,14 @@ BZ2File_close(BZ2FileObject *self) ...@@ -1189,11 +1072,14 @@ BZ2File_close(BZ2FileObject *self)
break; break;
} }
self->mode = MODE_CLOSED; self->mode = MODE_CLOSED;
ret = PyObject_CallMethod(self->file, "close", NULL); fclose(self->rawfp);
if (bzerror != BZ_OK) { self->rawfp = NULL;
if (bzerror == BZ_OK) {
Py_INCREF(Py_None);
ret = Py_None;
}
else {
Util_CatchBZ2Error(bzerror); Util_CatchBZ2Error(bzerror);
Py_XDECREF(ret);
ret = NULL;
} }
RELEASE_LOCK(self); RELEASE_LOCK(self);
...@@ -1218,63 +1104,15 @@ static PyMethodDef BZ2File_methods[] = { ...@@ -1218,63 +1104,15 @@ static PyMethodDef BZ2File_methods[] = {
/* ===================================================================== */ /* ===================================================================== */
/* Getters and setters of BZ2File. */ /* Getters and setters of BZ2File. */
/* This is a hacked version of Python's fileobject.c:get_newlines(). */
static PyObject *
BZ2File_get_newlines(BZ2FileObject *self, void *closure)
{
switch (self->f_newlinetypes) {
case NEWLINE_UNKNOWN:
Py_INCREF(Py_None);
return Py_None;
case NEWLINE_CR:
return PyBytes_FromStringAndSize("\r", 1);
case NEWLINE_LF:
return PyBytes_FromStringAndSize("\n", 1);
case NEWLINE_CR|NEWLINE_LF:
return Py_BuildValue("(ss)", "\r", "\n");
case NEWLINE_CRLF:
return PyBytes_FromStringAndSize("\r\n", 2);
case NEWLINE_CR|NEWLINE_CRLF:
return Py_BuildValue("(ss)", "\r", "\r\n");
case NEWLINE_LF|NEWLINE_CRLF:
return Py_BuildValue("(ss)", "\n", "\r\n");
case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
default:
PyErr_Format(PyExc_SystemError,
"Unknown newlines value 0x%x\n",
self->f_newlinetypes);
return NULL;
}
}
static PyObject * static PyObject *
BZ2File_get_closed(BZ2FileObject *self, void *closure) BZ2File_get_closed(BZ2FileObject *self, void *closure)
{ {
return PyInt_FromLong(self->mode == MODE_CLOSED); return PyInt_FromLong(self->mode == MODE_CLOSED);
} }
static PyObject *
BZ2File_get_mode(BZ2FileObject *self, void *closure)
{
return PyObject_GetAttrString(self->file, "mode");
}
static PyObject *
BZ2File_get_name(BZ2FileObject *self, void *closure)
{
return PyObject_GetAttrString(self->file, "name");
}
static PyGetSetDef BZ2File_getset[] = { static PyGetSetDef BZ2File_getset[] = {
{"closed", (getter)BZ2File_get_closed, NULL, {"closed", (getter)BZ2File_get_closed, NULL,
"True if the file is closed"}, "True if the file is closed"},
{"newlines", (getter)BZ2File_get_newlines, NULL,
"end-of-line convention used in this file"},
{"mode", (getter)BZ2File_get_mode, NULL,
"file mode ('r', 'w', or 'U')"},
{"name", (getter)BZ2File_get_name, NULL,
"file name"},
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };
...@@ -1286,9 +1124,8 @@ static int ...@@ -1286,9 +1124,8 @@ static int
BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
{ {
static char *kwlist[] = {"filename", "mode", "buffering", static char *kwlist[] = {"filename", "mode", "buffering",
"compresslevel", 0}; "compresslevel", 0};
FILE *fp = NULL; /* XXX temporary!!! */ char *name;
PyObject *name;
char *mode = "r"; char *mode = "r";
int buffering = -1; int buffering = -1;
int compresslevel = 9; int compresslevel = 9;
...@@ -1297,7 +1134,7 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) ...@@ -1297,7 +1134,7 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
self->size = -1; self->size = -1;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File", if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|sii:BZ2File",
kwlist, &name, &mode, &buffering, kwlist, &name, &mode, &buffering,
&compresslevel)) &compresslevel))
return -1; return -1;
...@@ -1321,14 +1158,6 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) ...@@ -1321,14 +1158,6 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
case 'b': case 'b':
break; break;
case 'U':
#ifdef __VMS
self->f_univ_newline = 0;
#else
self->f_univ_newline = 1;
#endif
break;
default: default:
error = 1; error = 1;
break; break;
...@@ -1349,10 +1178,12 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) ...@@ -1349,10 +1178,12 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
mode = (mode_char == 'r') ? "rb" : "wb"; mode = (mode_char == 'r') ? "rb" : "wb";
self->file = NULL; /* XXX io.open(name, mode, buffering); */ self->rawfp = fopen(name, mode);
PyErr_SetString(PyExc_RuntimeError, "can't open bz2 files yet"); if (self->rawfp == NULL) {
if (self->file == NULL) PyErr_SetFromErrno(PyExc_IOError);
return -1; return -1;
}
/* XXX Ignore buffering */
/* From now on, we have stuff to dealloc, so jump to error label /* From now on, we have stuff to dealloc, so jump to error label
* instead of returning */ * instead of returning */
...@@ -1366,12 +1197,10 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) ...@@ -1366,12 +1197,10 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
#endif #endif
if (mode_char == 'r') if (mode_char == 'r')
self->fp = BZ2_bzReadOpen(&bzerror, self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
fp,
0, 0, NULL, 0); 0, 0, NULL, 0);
else else
self->fp = BZ2_bzWriteOpen(&bzerror, self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
fp,
compresslevel, 0, 0); compresslevel, 0, 0);
if (bzerror != BZ_OK) { if (bzerror != BZ_OK) {
...@@ -1384,7 +1213,8 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) ...@@ -1384,7 +1213,8 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
return 0; return 0;
error: error:
Py_CLEAR(self->file); fclose(self->rawfp);
self->rawfp = NULL;
#ifdef WITH_THREAD #ifdef WITH_THREAD
if (self->lock) { if (self->lock) {
PyThread_free_lock(self->lock); PyThread_free_lock(self->lock);
...@@ -1413,7 +1243,8 @@ BZ2File_dealloc(BZ2FileObject *self) ...@@ -1413,7 +1243,8 @@ BZ2File_dealloc(BZ2FileObject *self)
break; break;
} }
Util_DropReadAhead(self); Util_DropReadAhead(self);
Py_XDECREF(self->file); if (self->rawfp != NULL)
fclose(self->rawfp);
self->ob_type->tp_free((PyObject *)self); self->ob_type->tp_free((PyObject *)self);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment