New I/O code from Tony Lownds implement newline feature correctly,

and implements .newlines attribute in a 2.x-compatible fashion.

New I/O code from Tony Lownds implement newline feature correctly,
and implements .newlines attribute in a 2.x-compatible fashion.
17596665 · Guido van Rossum · 384283f1 · 17596665 · 17596665 · 17596665
Commit 17596665 authored Aug 18, 2007 by Guido van Rossum
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 248 additions and 73 deletions

Lib/io.py Lib/io.py +160 -50

Lib/test/test_io.py Lib/test/test_io.py +85 -12

Lib/test/test_univnewlines.py Lib/test/test_univnewlines.py +3 -11

No files found.
--- a/Lib/io.py
+++ b/Lib/io.py
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
 """Unit tests for io.py."""

+import os
 import sys
 import time
 import array
@@ -481,30 +482,61 @@ class TextIOWrapperTest(unittest.TestCase):
    def tearDown(self):
        test_support.unlink(test_support.TESTFN)

+    def testNewlinesInput(self):
+        testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG"
+        normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
+        for newline, expected in [
+            (None, normalized.decode("ASCII").splitlines(True)),
+            ("", testdata.decode("ASCII").splitlines(True)),
+            ("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
+            ("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
+            ("\r",  ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]),
+            ]:
+            buf = io.BytesIO(testdata)
+            txt = io.TextIOWrapper(buf, encoding="ASCII", newline=newline)
+            self.assertEquals(txt.readlines(), expected)
+            txt.seek(0)
+            self.assertEquals(txt.read(), "".join(expected))
+
+    def testNewlinesOutput(self):
+        testdict = {
+            "": b"AAA\nBBB\nCCC\nX\rY\r\nZ",
+            "\n": b"AAA\nBBB\nCCC\nX\rY\r\nZ",
+            "\r": b"AAA\rBBB\rCCC\rX\rY\r\rZ",
+            "\r\n": b"AAA\r\nBBB\r\nCCC\r\nX\rY\r\r\nZ",
+            }
+        tests = [(None, testdict[os.linesep])] + sorted(testdict.items())
+        for newline, expected in tests:
+            buf = io.BytesIO()
+            txt = io.TextIOWrapper(buf, encoding="ASCII", newline=newline)
+            txt.write("AAA\nB")
+            txt.write("BB\nCCC\n")
+            txt.write("X\rY\r\nZ")
+            txt.flush()
+            self.assertEquals(buf.getvalue(), expected)
+
    def testNewlines(self):
        input_lines = [ "unix\n", "windows\r\n", "os9\r", "last\n", "nonl" ]

        tests = [
            [ None, [ 'unix\n', 'windows\n', 'os9\n', 'last\n', 'nonl' ] ],
-            [ '\n', input_lines ],
-            [ '\r\n', input_lines ],
+            [ '', input_lines ],
+            [ '\n', [ "unix\n", "windows\r\n", "os9\rlast\n", "nonl" ] ],
+            [ '\r\n', [ "unix\nwindows\r\n", "os9\rlast\nnonl" ] ],
+            [ '\r', [ "unix\nwindows\r", "\nos9\r", "last\nnonl" ] ],
        ]

        encodings = ('utf-8', 'latin-1')

-        # Try a range of pad sizes to test the case where \r is the last
+        # Try a range of buffer sizes to test the case where \r is the last
        # character in TextIOWrapper._pending_line.
        for encoding in encodings:
+            # XXX: str.encode() should return bytes
+            data = bytes(''.join(input_lines).encode(encoding))
            for do_reads in (False, True):
-                for padlen in chain(range(10), range(50, 60)):
-                    pad = '.' * padlen
-                    data_lines = [ pad + line for line in input_lines ]
-                    # XXX: str.encode() should return bytes
-                    data = bytes(''.join(data_lines).encode(encoding))
-
-                    for newline, exp_line_ends in tests:
-                        exp_lines = [ pad + line for line in exp_line_ends ]
-                        bufio = io.BufferedReader(io.BytesIO(data))
+                for bufsize in range(1, 10):
+                    for newline, exp_lines in tests:
+                        bufio = io.BufferedReader(io.BytesIO(data), bufsize)
                        textio = io.TextIOWrapper(bufio, newline=newline,
                                                  encoding=encoding)
                        if do_reads:
@@ -522,6 +554,47 @@ class TextIOWrapperTest(unittest.TestCase):
                            self.assertEquals(got_line, exp_line)
                        self.assertEquals(len(got_lines), len(exp_lines))

+    def testNewlinesInput(self):
+        testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG"
+        normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
+        for newline, expected in [
+            (None, normalized.decode("ASCII").splitlines(True)),
+            ("", testdata.decode("ASCII").splitlines(True)),
+            ("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
+            ("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
+            ("\r",  ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]),
+            ]:
+            buf = io.BytesIO(testdata)
+            txt = io.TextIOWrapper(buf, encoding="ASCII", newline=newline)
+            self.assertEquals(txt.readlines(), expected)
+            txt.seek(0)
+            self.assertEquals(txt.read(), "".join(expected))
+
+    def testNewlinesOutput(self):
+        import os
+        orig_linesep = os.linesep
+        data = "AAA\nBBB\rCCC\n"
+        data_lf = b"AAA\nBBB\rCCC\n"
+        data_cr = b"AAA\rBBB\rCCC\r"
+        data_crlf = b"AAA\r\nBBB\rCCC\r\n"
+        for os.linesep, newline, expected in [
+            ("\n", None, data_lf),
+            ("\r\n", None, data_crlf),
+            ("\n", "", data_lf),
+            ("\r\n", "", data_lf),
+            ("\n", "\n", data_lf),
+            ("\r\n", "\n", data_lf),
+            ("\n", "\r", data_cr),
+            ("\r\n", "\r", data_cr),
+            ("\n", "\r\n", data_crlf),
+            ("\r\n", "\r\n", data_crlf),
+            ]:
+            buf = io.BytesIO()
+            txt = io.TextIOWrapper(buf, encoding="ASCII", newline=newline)
+            txt.write(data)
+            txt.close()
+            self.assertEquals(buf.getvalue(), expected)
+
    # Systematic tests of the text I/O API

    def testBasicIO(self):

--- a/Lib/test/test_univnewlines.py
+++ b/Lib/test/test_univnewlines.py
@@ -12,9 +12,8 @@ FATX = 'x' * (2**14)

 DATA_TEMPLATE = [
    "line1=1",
-    "line2='this is a very long line designed to go past the magic " +
-        "hundred character limit that is inside fileobject.c and which " +
-        "is meant to speed up the common case, but we also want to test " +
+    "line2='this is a very long line designed to go past any default " +
+        "buffer limits that exist in io.py but we also want to test " +
        "the uncommon case, naturally.'",
    "def line3():pass",
    "line4 = '%s'" % FATX,
@@ -32,7 +31,7 @@ DATA_SPLIT = [x + "\n" for x in DATA_TEMPLATE]
 class TestGenericUnivNewlines(unittest.TestCase):
    # use a class variable DATA to define the data to write to the file
    # and a class variable NEWLINE to set the expected newlines value
-    READMODE = 'U'
+    READMODE = 'r'
    WRITEMODE = 'wb'

    def setUp(self):
@@ -79,12 +78,6 @@ class TestGenericUnivNewlines(unittest.TestCase):
        self.assertEqual(data, DATA_SPLIT[1:])


-class TestNativeNewlines(TestGenericUnivNewlines):
-    NEWLINE = None
-    DATA = DATA_LF
-    READMODE = 'r'
-    WRITEMODE = 'w'
-
 class TestCRNewlines(TestGenericUnivNewlines):
    NEWLINE = '\r'
    DATA = DATA_CR
@@ -104,7 +97,6 @@ class TestMixedNewlines(TestGenericUnivNewlines):

 def test_main():
    test_support.run_unittest(
-        TestNativeNewlines,
        TestCRNewlines,
        TestLFNewlines,
        TestCRLFNewlines,