Commit 26447c09 authored by Antoine Pitrou's avatar Antoine Pitrou

Merged revisions 77461 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r77461 | antoine.pitrou | 2010-01-13 08:55:48 +0100 (mer., 13 janv. 2010) | 5 lines

  Issue #7622: Improve the split(), rsplit(), splitlines() and replace()
  methods of bytes, bytearray and unicode objects by using a common
  implementation based on stringlib's fast search.  Patch by Florent Xicluna.
........
parent 67a22e9d
...@@ -582,6 +582,7 @@ BYTESTR_DEPS = \ ...@@ -582,6 +582,7 @@ BYTESTR_DEPS = \
$(srcdir)/Objects/stringlib/fastsearch.h \ $(srcdir)/Objects/stringlib/fastsearch.h \
$(srcdir)/Objects/stringlib/find.h \ $(srcdir)/Objects/stringlib/find.h \
$(srcdir)/Objects/stringlib/partition.h \ $(srcdir)/Objects/stringlib/partition.h \
$(srcdir)/Objects/stringlib/split.h \
$(srcdir)/Objects/stringlib/stringdefs.h \ $(srcdir)/Objects/stringlib/stringdefs.h \
$(srcdir)/Objects/stringlib/string_format.h \ $(srcdir)/Objects/stringlib/string_format.h \
$(srcdir)/Objects/stringlib/transmogrify.h \ $(srcdir)/Objects/stringlib/transmogrify.h \
......
...@@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1? ...@@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #7622: Improve the split(), rsplit(), splitlines() and replace()
methods of bytes, bytearray and unicode objects by using a common
implementation based on stringlib's fast search. Patch by Florent Xicluna.
- Issue #7632: Fix a crash in dtoa.c that occurred in debug builds - Issue #7632: Fix a crash in dtoa.c that occurred in debug builds
when parsing certain long numeric strings corresponding to subnormal when parsing certain long numeric strings corresponding to subnormal
values. Also fix a number of bugs in dtoa.c that could lead to values. Also fix a number of bugs in dtoa.c that could lead to
......
This diff is collapsed.
This diff is collapsed.
...@@ -9,28 +9,22 @@ ...@@ -9,28 +9,22 @@
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len, stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len) const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t maxcount)
{ {
Py_ssize_t count; Py_ssize_t count;
if (str_len < 0) if (str_len < 0)
return 0; /* start > len(str) */ return 0; /* start > len(str) */
if (sub_len == 0) if (sub_len == 0)
return str_len + 1; return (str_len < maxcount) ? str_len + 1 : maxcount;
count = fastsearch(str, str_len, sub, sub_len, FAST_COUNT); count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
if (count < 0) if (count < 0)
count = 0; /* no match */ return 0; /* no match */
return count; return count;
} }
#endif #endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/
...@@ -107,4 +107,3 @@ stringlib_swapcase(PyObject *self) ...@@ -107,4 +107,3 @@ stringlib_swapcase(PyObject *self)
STRINGLIB_LEN(self)); STRINGLIB_LEN(self));
return newobj; return newobj;
} }
...@@ -18,10 +18,13 @@ ...@@ -18,10 +18,13 @@
#define FAST_SEARCH 1 #define FAST_SEARCH 1
#define FAST_RSEARCH 2 #define FAST_RSEARCH 2
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1)))))
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1)))))
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
const STRINGLIB_CHAR* p, Py_ssize_t m, const STRINGLIB_CHAR* p, Py_ssize_t m,
int mode) Py_ssize_t maxcount, int mode)
{ {
long mask; long mask;
Py_ssize_t skip, count = 0; Py_ssize_t skip, count = 0;
...@@ -29,7 +32,7 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -29,7 +32,7 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
w = n - m; w = n - m;
if (w < 0) if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
return -1; return -1;
/* look for special cases */ /* look for special cases */
...@@ -39,8 +42,11 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -39,8 +42,11 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* use special case for 1-character strings */ /* use special case for 1-character strings */
if (mode == FAST_COUNT) { if (mode == FAST_COUNT) {
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
if (s[i] == p[0]) if (s[i] == p[0]) {
count++; count++;
if (count == maxcount)
return maxcount;
}
return count; return count;
} else if (mode == FAST_SEARCH) { } else if (mode == FAST_SEARCH) {
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
...@@ -56,19 +62,20 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -56,19 +62,20 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
mlast = m - 1; mlast = m - 1;
skip = mlast - 1; skip = mlast - 1;
mask = 0;
if (mode != FAST_RSEARCH) { if (mode != FAST_RSEARCH) {
/* create compressed boyer-moore delta 1 table */ /* create compressed boyer-moore delta 1 table */
/* process pattern[:-1] */ /* process pattern[:-1] */
for (mask = i = 0; i < mlast; i++) { for (i = 0; i < mlast; i++) {
mask |= (1 << (p[i] & 0x1F)); BLOOM_ADD(mask, p[i]);
if (p[i] == p[mlast]) if (p[i] == p[mlast])
skip = mlast - i - 1; skip = mlast - i - 1;
} }
/* process pattern[-1] outside the loop */ /* process pattern[-1] outside the loop */
mask |= (1 << (p[mlast] & 0x1F)); BLOOM_ADD(mask, p[mlast]);
for (i = 0; i <= w; i++) { for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */ /* note: using mlast in the skip path slows things down on x86 */
...@@ -82,17 +89,19 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -82,17 +89,19 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (mode != FAST_COUNT) if (mode != FAST_COUNT)
return i; return i;
count++; count++;
if (count == maxcount)
return maxcount;
i = i + mlast; i = i + mlast;
continue; continue;
} }
/* miss: check if next character is part of pattern */ /* miss: check if next character is part of pattern */
if (!(mask & (1 << (s[i+m] & 0x1F)))) if (!BLOOM(mask, s[i+m]))
i = i + m; i = i + m;
else else
i = i + skip; i = i + skip;
} else { } else {
/* skip: check if next character is part of pattern */ /* skip: check if next character is part of pattern */
if (!(mask & (1 << (s[i+m] & 0x1F)))) if (!BLOOM(mask, s[i+m]))
i = i + m; i = i + m;
} }
} }
...@@ -101,10 +110,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -101,10 +110,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* create compressed boyer-moore delta 1 table */ /* create compressed boyer-moore delta 1 table */
/* process pattern[0] outside the loop */ /* process pattern[0] outside the loop */
mask = (1 << (p[0] & 0x1F)); BLOOM_ADD(mask, p[0]);
/* process pattern[:0:-1] */ /* process pattern[:0:-1] */
for (i = mlast; i > 0; i--) { for (i = mlast; i > 0; i--) {
mask |= (1 << (p[i] & 0x1F)); BLOOM_ADD(mask, p[i]);
if (p[i] == p[0]) if (p[i] == p[0])
skip = i - 1; skip = i - 1;
} }
...@@ -119,13 +128,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -119,13 +128,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* got a match! */ /* got a match! */
return i; return i;
/* miss: check if previous character is part of pattern */ /* miss: check if previous character is part of pattern */
if (!(mask & (1 << (s[i-1] & 0x1F)))) if (!BLOOM(mask, s[i-1]))
i = i - m; i = i - m;
else else
i = i - skip; i = i - skip;
} else { } else {
/* skip: check if previous character is part of pattern */ /* skip: check if previous character is part of pattern */
if (!(mask & (1 << (s[i-1] & 0x1F)))) if (!BLOOM(mask, s[i-1]))
i = i - m; i = i - m;
} }
} }
...@@ -137,10 +146,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, ...@@ -137,10 +146,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
} }
#endif #endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/
...@@ -19,7 +19,7 @@ stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -19,7 +19,7 @@ stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
if (sub_len == 0) if (sub_len == 0)
return offset; return offset;
pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH); pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);
if (pos >= 0) if (pos >= 0)
pos += offset; pos += offset;
...@@ -39,7 +39,7 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -39,7 +39,7 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
if (sub_len == 0) if (sub_len == 0)
return str_len + offset; return str_len + offset;
pos = fastsearch(str, str_len, sub, sub_len, FAST_RSEARCH); pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
if (pos >= 0) if (pos >= 0)
pos += offset; pos += offset;
...@@ -47,22 +47,27 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -47,22 +47,27 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos; return pos;
} }
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
if (start < 0) ADJUST_INDICES(start, end, str_len);
start += str_len;
if (start < 0)
start = 0;
if (end > str_len)
end = str_len;
if (end < 0)
end += str_len;
if (end < 0)
end = 0;
return stringlib_find(str + start, end - start, sub, sub_len, start); return stringlib_find(str + start, end - start, sub, sub_len, start);
} }
...@@ -71,17 +76,7 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, ...@@ -71,17 +76,7 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
if (start < 0) ADJUST_INDICES(start, end, str_len);
start += str_len;
if (start < 0)
start = 0;
if (end > str_len)
end = str_len;
if (end < 0)
end += str_len;
if (end < 0)
end = 0;
return stringlib_rfind(str + start, end - start, sub, sub_len, start); return stringlib_rfind(str + start, end - start, sub, sub_len, start);
} }
...@@ -96,9 +91,9 @@ stringlib_contains_obj(PyObject* str, PyObject* sub) ...@@ -96,9 +91,9 @@ stringlib_contains_obj(PyObject* str, PyObject* sub)
) != -1; ) != -1;
} }
#endif /* STRINGLIB_STR */ #endif /* STRINGLIB_WANT_CONTAINS_OBJ */
#ifdef FROM_UNICODE #if STRINGLIB_IS_UNICODE
/* /*
This function is a helper for the "find" family (find, rfind, index, This function is a helper for the "find" family (find, rfind, index,
...@@ -146,13 +141,6 @@ _ParseTupleFinds (PyObject *args, PyObject **substring, ...@@ -146,13 +141,6 @@ _ParseTupleFinds (PyObject *args, PyObject **substring,
return 1; return 1;
} }
#endif /* FROM_UNICODE */ #endif /* STRINGLIB_IS_UNICODE */
#endif /* STRINGLIB_FIND_H */ #endif /* STRINGLIB_FIND_H */
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/
...@@ -8,33 +8,39 @@ ...@@ -8,33 +8,39 @@
#endif #endif
Py_LOCAL_INLINE(PyObject*) Py_LOCAL_INLINE(PyObject*)
stringlib_partition( stringlib_partition(PyObject* str_obj,
PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len PyObject* sep_obj,
) const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
{ {
PyObject* out; PyObject* out;
Py_ssize_t pos; Py_ssize_t pos;
if (sep_len == 0) { if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator"); PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL; return NULL;
} }
out = PyTuple_New(3); out = PyTuple_New(3);
if (!out) if (!out)
return NULL; return NULL;
pos = fastsearch(str, str_len, sep, sep_len, FAST_SEARCH); pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0) { if (pos < 0) {
Py_INCREF(str_obj); #if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
Py_INCREF(STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
Py_INCREF(STRINGLIB_EMPTY); #else
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); Py_INCREF(str_obj);
return out; PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
#endif
return out;
} }
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
...@@ -44,41 +50,47 @@ stringlib_partition( ...@@ -44,41 +50,47 @@ stringlib_partition(
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
Py_DECREF(out); Py_DECREF(out);
return NULL; return NULL;
} }
return out; return out;
} }
Py_LOCAL_INLINE(PyObject*) Py_LOCAL_INLINE(PyObject*)
stringlib_rpartition( stringlib_rpartition(PyObject* str_obj,
PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len PyObject* sep_obj,
) const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
{ {
PyObject* out; PyObject* out;
Py_ssize_t pos; Py_ssize_t pos;
if (sep_len == 0) { if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator"); PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL; return NULL;
} }
out = PyTuple_New(3); out = PyTuple_New(3);
if (!out) if (!out)
return NULL; return NULL;
pos = fastsearch(str, str_len, sep, sep_len, FAST_RSEARCH); pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0) { if (pos < 0) {
Py_INCREF(STRINGLIB_EMPTY); #if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
Py_INCREF(STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
Py_INCREF(str_obj); #else
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj); Py_INCREF(STRINGLIB_EMPTY);
return out; PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
#endif
return out;
} }
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
...@@ -88,18 +100,11 @@ stringlib_rpartition( ...@@ -88,18 +100,11 @@ stringlib_rpartition(
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
Py_DECREF(out); Py_DECREF(out);
return NULL; return NULL;
} }
return out; return out;
} }
#endif #endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/
This diff is collapsed.
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#define STRINGLIB_TYPE_NAME "string" #define STRINGLIB_TYPE_NAME "string"
#define STRINGLIB_PARSE_CODE "S" #define STRINGLIB_PARSE_CODE "S"
#define STRINGLIB_EMPTY nullstring #define STRINGLIB_EMPTY nullstring
#define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9')) #define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1) #define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
#define STRINGLIB_TOUPPER Py_TOUPPER #define STRINGLIB_TOUPPER Py_TOUPPER
......
/* NOTE: this API is -ONLY- for use with single byte character strings. */ /* NOTE: this API is -ONLY- for use with single byte character strings. */
/* Do not use it with Unicode. */ /* Do not use it with Unicode. */
#include "bytes_methods.h"
#ifndef STRINGLIB_MUTABLE
#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
#define STRINGLIB_MUTABLE 0
#endif
/* the more complicated methods. parts of these should be pulled out into the /* the more complicated methods. parts of these should be pulled out into the
shared code in bytes_methods.c to cut down on duplicate code bloat. */ shared code in bytes_methods.c to cut down on duplicate code bloat. */
...@@ -269,87 +262,3 @@ stringlib_zfill(PyObject *self, PyObject *args) ...@@ -269,87 +262,3 @@ stringlib_zfill(PyObject *self, PyObject *args)
return (PyObject*) s; return (PyObject*) s;
} }
#define _STRINGLIB_SPLIT_APPEND(data, left, right) \
str = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
PyDoc_STRVAR(splitlines__doc__,
"B.splitlines([keepends]) -> list of lines\n\
\n\
Return a list of the lines in B, breaking at line boundaries.\n\
Line breaks are not included in the resulting list unless keepends\n\
is given and true.");
static PyObject*
stringlib_splitlines(PyObject *self, PyObject *args)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len;
int keepends = 0;
PyObject *list;
PyObject *str;
char *data;
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
return NULL;
data = STRINGLIB_STR(self);
len = STRINGLIB_LEN(self);
/* This does not use the preallocated list because splitlines is
usually run with hundreds of newlines. The overhead of
switching between PyList_SET_ITEM and append causes about a
2-3% slowdown for that common case. A smarter implementation
could move the if check out, so the SET_ITEMs are done first
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
list = PyList_New(0);
if (!list)
goto onError;
for (i = j = 0; i < len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < len && data[i] != '\n' && data[i] != '\r')
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < len) {
if (data[i] == '\r' && i + 1 < len &&
data[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
_STRINGLIB_SPLIT_APPEND(data, j, eol);
j = i;
}
if (j < len) {
_STRINGLIB_SPLIT_APPEND(data, j, len);
}
return list;
onError:
Py_XDECREF(list);
return NULL;
}
#undef _STRINGLIB_SPLIT_APPEND
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#define STRINGLIB_TYPE_NAME "unicode" #define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U" #define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty #define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL #define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL #define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER #define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER
......
This diff is collapsed.
...@@ -1490,6 +1490,10 @@ ...@@ -1490,6 +1490,10 @@
RelativePath="..\..\Objects\sliceobject.c" RelativePath="..\..\Objects\sliceobject.c"
> >
</File> </File>
<File
RelativePath="..\..\Objects\stringlib\split.h"
>
</File>
<File <File
RelativePath="..\..\Objects\structseq.c" RelativePath="..\..\Objects\structseq.c"
> >
......
...@@ -1495,6 +1495,10 @@ ...@@ -1495,6 +1495,10 @@
RelativePath="..\Objects\sliceobject.c" RelativePath="..\Objects\sliceobject.c"
> >
</File> </File>
<File
RelativePath="..\Objects\stringlib\split.h"
>
</File>
<File <File
RelativePath="..\Objects\structseq.c" RelativePath="..\Objects\structseq.c"
> >
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment