Commit d340f383 authored by Tim Peters's avatar Tim Peters

Use the "MS" getline hack (fgets()) by default on non-get_unlocked

platforms.  See NEWS for details.
parent 5fa72939
...@@ -25,11 +25,23 @@ Core language, builtins, and interpreter ...@@ -25,11 +25,23 @@ Core language, builtins, and interpreter
- Even if you don't use file.xreadlines(), you may expect a speedup on - Even if you don't use file.xreadlines(), you may expect a speedup on
line-by-line input. The file.readline() method has been optimized line-by-line input. The file.readline() method has been optimized
quite a bit in platform-specific ways, both on Windows (using an quite a bit in platform-specific ways: on systems (like Linux) that
incredibly complex, but nevertheless thread-safe), and on systems support flockfile(), getc_unlocked(), and funlockfile(), those are
(like Linux) that support flockfile(), getc_unlocked(), and used by default. On systems (like Windows) without getc_unlocked(),
funlockfile(). In addition, the fileinput module, while still slow, a complicated (but still thread-safe) method using fgets() is used by
has been sped up too, by using file.readlines(sizehint). default.
You can force use of the fgets() method by #define'ing
USE_FGETS_IN_GETLINE at build time (it may be faster than
getc_unlocked()).
You can force fgets() not to be used by #define'ing
DONT_USE_FGETS_IN_GETLINE (this is the first thing to try if std test
test_bufio.py fails -- and let us know if it does!).
- In addition, the fileinput module, while still slower than the other
methods on most platforms, has been sped up too, by using
file.readlines(sizehint).
- Support for run-time warnings has been added, including a new - Support for run-time warnings has been added, including a new
command line option (-W) to specify the disposition of warnings. command line option (-W) to specify the disposition of warnings.
......
...@@ -635,7 +635,7 @@ file_readinto(PyFileObject *f, PyObject *args) ...@@ -635,7 +635,7 @@ file_readinto(PyFileObject *f, PyObject *args)
} }
/************************************************************************** /**************************************************************************
Win32 MS routine to get next line. Routine to get next line using platform fgets().
Under MSVC 6: Under MSVC 6:
...@@ -651,23 +651,41 @@ So we use fgets for speed(!), despite that it's painful. ...@@ -651,23 +651,41 @@ So we use fgets for speed(!), despite that it's painful.
MS realloc is also slow. MS realloc is also slow.
In the usual case, we have one pleasantly small line already sitting in a Reports from other platforms on this method vs getc_unlocked (which MS doesn't
stdio buffer, and we optimize heavily for that case. have):
Linux a wash
CAUTION: This routine cheats, relying on that MSVC 6 fgets doesn't overwrite Solaris a wash
any buffer positions to the right of the terminating null byte. Seems Tru64 Unix getline_via_fgets significantly faster
unlikely that will change in the future, but ... std test test_bufio should
catch it if that changes. CAUTION: The C std isn't clear about this: in those cases where fgets
writes something into the buffer, can it write into any position beyond the
required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
known on which it does; and it would be a strange way to code fgets. Still,
getline_via_fgets may not work correctly if it does. The std test
test_bufio.py should fail if platform fgets() routinely writes beyond the
trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
**************************************************************************/ **************************************************************************/
/* if Win32 and MS's compiler */ /* Use this routine if told to, or by default on non-get_unlocked()
#if defined(MS_WIN32) && defined(_MSC_VER) * platforms unless told not to. Yikes! Let's spell that out:
#define USE_MS_GETLINE_HACK * On a platform with getc_unlocked():
* By default, use getc_unlocked().
* If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
* On a platform without getc_unlocked():
* By default, use fgets().
* If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
*/
#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
#define USE_FGETS_IN_GETLINE
#endif
#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
#undef USE_FGETS_IN_GETLINE
#endif #endif
#ifdef USE_MS_GETLINE_HACK #ifdef USE_FGETS_IN_GETLINE
static PyObject* static PyObject*
ms_getline_hack(FILE *fp) getline_via_fgets(FILE *fp)
{ {
/* INITBUFSIZE is the maximum line length that lets us get away with the fast /* INITBUFSIZE is the maximum line length that lets us get away with the fast
* no-realloc path. get_line uses 100 for its initial size, but isn't trying * no-realloc path. get_line uses 100 for its initial size, but isn't trying
...@@ -686,14 +704,14 @@ ms_getline_hack(FILE *fp) ...@@ -686,14 +704,14 @@ ms_getline_hack(FILE *fp)
char* pvfree; /* address of next free slot */ char* pvfree; /* address of next free slot */
char* pvend; /* address one beyond last free slot */ char* pvend; /* address one beyond last free slot */
char* p; /* temp */ char* p; /* temp */
char msbuf[INITBUFSIZE]; char buf[INITBUFSIZE];
/* Optimize for normal case: avoid _PyString_Resize if at all /* Optimize for normal case: avoid _PyString_Resize if at all
* possible via first reading into auto msbuf. * possible via first reading into auto buf.
*/ */
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
memset(msbuf, '\n', INITBUFSIZE); memset(buf, '\n', INITBUFSIZE);
p = fgets(msbuf, INITBUFSIZE, fp); p = fgets(buf, INITBUFSIZE, fp);
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
if (p == NULL) { if (p == NULL) {
...@@ -704,7 +722,7 @@ ms_getline_hack(FILE *fp) ...@@ -704,7 +722,7 @@ ms_getline_hack(FILE *fp)
return v; return v;
} }
/* fgets read *something* */ /* fgets read *something* */
p = memchr(msbuf, '\n', INITBUFSIZE); p = memchr(buf, '\n', INITBUFSIZE);
if (p != NULL) { if (p != NULL) {
/* Did the \n come from fgets or from us? /* Did the \n come from fgets or from us?
* Since fgets stops at the first \n, and then writes \0, if * Since fgets stops at the first \n, and then writes \0, if
...@@ -712,34 +730,34 @@ ms_getline_hack(FILE *fp) ...@@ -712,34 +730,34 @@ ms_getline_hack(FILE *fp)
* could not have come from us, since the \n's we filled the * could not have come from us, since the \n's we filled the
* buffer with have only more \n's to the right. * buffer with have only more \n's to the right.
*/ */
pvend = msbuf + INITBUFSIZE; pvend = buf + INITBUFSIZE;
if (p+1 < pvend && *(p+1) == '\0') { if (p+1 < pvend && *(p+1) == '\0') {
/* It's from fgets: we win! In particular, we /* It's from fgets: we win! In particular, we
* haven't done any mallocs yet, and can build the * haven't done any mallocs yet, and can build the
* final result on the first try. * final result on the first try.
*/ */
v = PyString_FromStringAndSize(msbuf, p - msbuf + 1); v = PyString_FromStringAndSize(buf, p - buf + 1);
return v; return v;
} }
/* Must be from us: fgets didn't fill the buffer and didn't /* Must be from us: fgets didn't fill the buffer and didn't
* find a newline, so it must be the last and newline-free * find a newline, so it must be the last and newline-free
* line of the file. * line of the file.
*/ */
assert(p > msbuf && *(p-1) == '\0'); assert(p > buf && *(p-1) == '\0');
v = PyString_FromStringAndSize(msbuf, p - msbuf - 1); v = PyString_FromStringAndSize(buf, p - buf - 1);
return v; return v;
} }
/* yuck: fgets overwrote all the newlines, i.e. the entire buffer. /* yuck: fgets overwrote all the newlines, i.e. the entire buffer.
* So this line isn't over yet, or maybe it is but we're exactly at * So this line isn't over yet, or maybe it is but we're exactly at
* EOF; in either case, we're tired <wink>. * EOF; in either case, we're tired <wink>.
*/ */
assert(msbuf[INITBUFSIZE-1] == '\0'); assert(buf[INITBUFSIZE-1] == '\0');
total_v_size = INITBUFSIZE + INCBUFSIZE; total_v_size = INITBUFSIZE + INCBUFSIZE;
v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size); v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
if (v == NULL) if (v == NULL)
return v; return v;
/* copy over everything except the last null byte */ /* copy over everything except the last null byte */
memcpy(BUF(v), msbuf, INITBUFSIZE-1); memcpy(BUF(v), buf, INITBUFSIZE-1);
pvfree = BUF(v) + INITBUFSIZE - 1; pvfree = BUF(v) + INITBUFSIZE - 1;
/* Keep reading stuff into v; if it ever ends successfully, break /* Keep reading stuff into v; if it ever ends successfully, break
...@@ -798,7 +816,7 @@ ms_getline_hack(FILE *fp) ...@@ -798,7 +816,7 @@ ms_getline_hack(FILE *fp)
#undef INITBUFSIZE #undef INITBUFSIZE
#undef INCBUFSIZE #undef INCBUFSIZE
} }
#endif /* ifdef USE_MS_GETLINE_HACK */ #endif /* ifdef USE_FGETS_IN_GETLINE */
/* Internal routine to get a line. /* Internal routine to get a line.
Size argument interpretation: Size argument interpretation:
...@@ -825,10 +843,9 @@ get_line(PyFileObject *f, int n) ...@@ -825,10 +843,9 @@ get_line(PyFileObject *f, int n)
size_t n1, n2; size_t n1, n2;
PyObject *v; PyObject *v;
#ifdef USE_MS_GETLINE_HACK #ifdef USE_FGETS_IN_GETLINE
if (n <= 0) if (n <= 0)
return ms_getline_hack(fp); return getline_via_fgets(fp);
#endif #endif
n2 = n > 0 ? n : 100; n2 = n > 0 ? n : 100;
v = PyString_FromStringAndSize((char *)NULL, n2); v = PyString_FromStringAndSize((char *)NULL, n2);
...@@ -967,10 +984,10 @@ static PyObject * ...@@ -967,10 +984,10 @@ static PyObject *
file_xreadlines(PyFileObject *f, PyObject *args) file_xreadlines(PyFileObject *f, PyObject *args)
{ {
static PyObject* xreadlines_function = NULL; static PyObject* xreadlines_function = NULL;
if (!PyArg_ParseTuple(args, ":xreadlines")) if (!PyArg_ParseTuple(args, ":xreadlines"))
return NULL; return NULL;
if (!xreadlines_function) { if (!xreadlines_function) {
PyObject *xreadlines_module = PyObject *xreadlines_module =
PyImport_ImportModule("xreadlines"); PyImport_ImportModule("xreadlines");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment