In text I/O, optimize scanning for new lines with 1-byte unicode chars

c28e2e53 · Antoine Pitrou · f364e7b5 · c28e2e53
Commit c28e2e53 authored Nov 13, 2011 by Antoine Pitrou
Show whitespace changes
Inline Side-by-side

Showing with 20 additions and 12 deletions

Modules/_io/textio.c Modules/_io/textio.c +20 -12

No files found.
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -365,6 +365,9 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
            */
            if (seennl == 0 &&
                memchr(in_str, '\n', kind * len) != NULL) {
+                if (kind == PyUnicode_1BYTE_KIND)
+                    seennl |= SEEN_LF;
+                else {
                    Py_ssize_t i = 0;
                    for (;;) {
                        Py_UCS4 c;
@@ -380,6 +383,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
                            break;
                    }
                }
+            }
            /* Finished: we have scanned for newlines, and none of them
               need translating */
        }
@@ -1597,6 +1601,10 @@ textiowrapper_read(textio *self, PyObject *args)
 static char *
 find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
 {
+    if (kind == PyUnicode_1BYTE_KIND) {
+        assert(ch < 256);
+        return (char *) memchr((void *) s, (char) ch, end - s);
+    }
    for (;;) {
        while (PyUnicode_READ(kind, s, 0) > ch)
            s += kind;