Make all the multibyte codec tests pass.

Changes to io.py, necessary to make this work: - Redid io.StringIO as a TextIOWrapper on top of a BytesIO instance. - Got rid of _MemoryIOMixin, folding it into BytesIO instead. - The read() functions that take -1 to mean "eveything" now also take None. - Added readline() support to BufferedIOBase. :-(

Make all the multibyte codec tests pass.
Changes to io.py, necessary to make this work: - Redid io.StringIO as a TextIOWrapper on top of a BytesIO instance. - Got rid of _MemoryIOMixin, folding it into BytesIO instead. - The read() functions that take -1 to mean "eveything" now also take None. - Added readline() support to BufferedIOBase. :-(
024da5c2 · Guido van Rossum · f4cfc8f6 · 024da5c2 · 024da5c2 · 024da5c2
Commit 024da5c2 authored May 17, 2007 by Guido van Rossum
10 changed files
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -415,8 +415,8 @@ class BufferedIOBase(IOBase):
    def read(self, n: int = -1) -> bytes:
        """read(n: int = -1) -> bytes.  Read and return up to n bytes.

-        If the argument is omitted, or negative, reads and returns all
-        data until EOF.
+        If the argument is omitted, None, or negative, reads and
+        returns all data until EOF.

        If the argument is positive, and the underlying raw stream is
        not 'interactive', multiple raw reads may be issued to satisfy
@@ -450,6 +450,20 @@ class BufferedIOBase(IOBase):
        b[:n] = data
        return n

+    def readline(self, sizehint: int = -1) -> bytes:
+        """For backwards compatibility, a (slow) readline()."""
+        if sizehint is None:
+            sizehint = -1
+        res = b""
+        while sizehint < 0 or len(res) < sizehint:
+            b = self.read(1)
+            if not b:
+                break
+            res += b
+            if b == b"\n":
+                break
+        return res
+
    def write(self, b: bytes) -> int:
        """write(b: bytes) -> int.  Write the given buffer to the IO stream.

@@ -518,19 +532,25 @@ class _BufferedIOMixin(BufferedIOBase):
        return self.raw.isatty()


-class _MemoryIOMixin(BufferedIOBase):
+class BytesIO(BufferedIOBase):

-    # XXX docstring
+    """Buffered I/O implementation using an in-memory bytes buffer."""

-    def __init__(self, buffer):
+    # XXX More docs
+
+    def __init__(self, initial_bytes=None):
+        buffer = b""
+        if initial_bytes is not None:
+            buffer += initial_bytes
        self._buffer = buffer
        self._pos = 0

    def getvalue(self):
        return self._buffer

-    def read(self, n=-1):
-        assert n is not None
+    def read(self, n=None):
+        if n is None:
+            n = -1
        if n < 0:
            n = len(self._buffer)
        newpos = min(len(self._buffer), self._pos + n)
@@ -538,6 +558,9 @@ class _MemoryIOMixin(BufferedIOBase):
        self._pos = newpos
        return b

+    def read1(self, n):
+        return self.read(n)
+
    def write(self, b):
        n = len(b)
        newpos = self._pos + n
@@ -575,65 +598,6 @@ class _MemoryIOMixin(BufferedIOBase):
        return True


-class BytesIO(_MemoryIOMixin):
-
-    """Buffered I/O implementation using a bytes buffer, like StringIO."""
-
-    # XXX More docs
-
-    def __init__(self, initial_bytes=None):
-        buffer = b""
-        if initial_bytes is not None:
-            buffer += initial_bytes
-        _MemoryIOMixin.__init__(self, buffer)
-
-
-# XXX This should inherit from TextIOBase
-class StringIO(_MemoryIOMixin):
-
-    """Buffered I/O implementation using a string buffer, like StringIO."""
-
-    # XXX More docs
-
-    # Reuses the same code as BytesIO, but encode strings on the way in
-    # and decode them on the way out.
-
-    charsize = len("!".encode("unicode-internal"))
-
-    def __init__(self, initial_string=None):
-        if initial_string is not None:
-            buffer = initial_string.encode("unicode-internal")
-        else:
-            buffer = b""
-        _MemoryIOMixin.__init__(self, buffer)
-
-    def getvalue(self):
-        return self._buffer.encode("unicode-internal")
-
-    def read(self, n=-1):
-        return super(StringIO, self).read(n*self.charsize) \
-                                    .decode("unicode-internal")
-
-    def write(self, s):
-        return super(StringIO, self).write(s.encode("unicode-internal")) \
-                                    //self.charsize
-
-    def seek(self, pos, whence=0):
-        return super(StringIO, self).seek(self.charsize*pos, whence) \
-                                    //self.charsize
-
-    def tell(self):
-        return super(StringIO, self).tell()//self.charsize
-
-    def truncate(self, pos=None):
-        if pos is not None:
-            pos *= self.charsize
-        return super(StringIO, self).truncate(pos)//self.charsize
-
-    def readinto(self, b: bytes) -> int:
-        self._unsupported("readinto")
-
-
 class BufferedReader(_BufferedIOMixin):

    """Buffer for a readable sequential RawIO object."""
@@ -646,7 +610,7 @@ class BufferedReader(_BufferedIOMixin):
        self._read_buf = b""
        self.buffer_size = buffer_size

-    def read(self, n=-1):
+    def read(self, n=None):
        """Read n bytes.

        Returns exactly n bytes of data unless the underlying raw IO
@@ -654,7 +618,8 @@ class BufferedReader(_BufferedIOMixin):
        mode. If n is negative, read until EOF or until read() would
        block.
        """
-        assert n is not None
+        if n is None:
+            n = -1
        nodata_val = b""
        while n < 0 or len(self._read_buf) < n:
            to_read = max(self.buffer_size,
@@ -801,7 +766,9 @@ class BufferedRWPair(BufferedIOBase):
        self.reader = BufferedReader(reader, buffer_size)
        self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)

-    def read(self, n=-1):
+    def read(self, n=None):
+        if n is None:
+            n = -1
        return self.reader.read(n)

    def readinto(self, b):
@@ -861,7 +828,9 @@ class BufferedRandom(BufferedWriter, BufferedReader):
        else:
            return self.raw.tell() - len(self._read_buf)

-    def read(self, n=-1):
+    def read(self, n=None):
+        if n is None:
+            n = -1
        self.flush()
        return BufferedReader.read(self, n)

@@ -1129,7 +1098,9 @@ class TextIOWrapper(TextIOBase):
        except UnicodeEncodeError:
            return u

-    def read(self, n: int = -1):
+    def read(self, n=None):
+        if n is None:
+            n = -1
        decoder = self._decoder or self._get_decoder()
        res = self._pending
        if n < 0:
@@ -1146,7 +1117,7 @@ class TextIOWrapper(TextIOBase):
            self._pending = res[n:]
            return self._simplify(res[:n])

-    def __next__(self) -> str:
+    def __next__(self):
        self._telling = False
        line = self.readline()
        if not line:
@@ -1218,3 +1189,17 @@ class TextIOWrapper(TextIOBase):
            return self._simplify(line[:endpos] + "\n")
        else:
            return self._simplify(line[:nextpos])
+
+
+class StringIO(TextIOWrapper):
+
+    # XXX This is really slow, but fully functional
+
+    def __init__(self, initial_value=""):
+        super(StringIO, self).__init__(BytesIO(), "utf-8")
+        if initial_value:
+            self.write(initial_value)
+            self.seek(0)
+
+    def getvalue(self):
+        return self.buffer.getvalue().decode("utf-8")
--- a/Lib/test/cjkencodings_test.py
+++ b/Lib/test/cjkencodings_test.py
--- a/Lib/test/test_codecencodings_cn.py
+++ b/Lib/test/test_codecencodings_cn.py
@@ -13,12 +13,12 @@ class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
    tstring = test_multibytecodec_support.load_teststring('gb2312')
    codectests = (
        # invalid bytes
-        ("abc\x81\x81\xc1\xc4", "strict",  None),
-        ("abc\xc8", "strict",  None),
-        ("abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
-        ("abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
-        ("abc\x81\x81\xc1\xc4", "ignore",  "abc\u804a"),
-        ("\xc1\x64", "strict", None),
+        (b"abc\x81\x81\xc1\xc4", "strict",  None),
+        (b"abc\xc8", "strict",  None),
+        (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
+        (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+        (b"abc\x81\x81\xc1\xc4", "ignore",  "abc\u804a"),
+        (b"\xc1\x64", "strict", None),
    )

 class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
@@ -26,12 +26,12 @@ class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
    tstring = test_multibytecodec_support.load_teststring('gbk')
    codectests = (
        # invalid bytes
-        ("abc\x80\x80\xc1\xc4", "strict",  None),
-        ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
-        ("\x83\x34\x83\x31", "strict", None),
+        (b"abc\x80\x80\xc1\xc4", "strict",  None),
+        (b"abc\xc8", "strict",  None),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
+        (b"\x83\x34\x83\x31", "strict", None),
        ("\u30fb", "strict", None),
    )

@@ -40,13 +40,13 @@ class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
    tstring = test_multibytecodec_support.load_teststring('gb18030')
    codectests = (
        # invalid bytes
-        ("abc\x80\x80\xc1\xc4", "strict",  None),
-        ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
-        ("abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
-        ("\u30fb", "strict", "\x819\xa79"),
+        (b"abc\x80\x80\xc1\xc4", "strict",  None),
+        (b"abc\xc8", "strict",  None),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
+        (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
+        ("\u30fb", "strict", b"\x819\xa79"),
    )
    has_iso10646 = True


--- a/Lib/test/test_codecencodings_hk.py
+++ b/Lib/test/test_codecencodings_hk.py
@@ -13,11 +13,11 @@ class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase):
    tstring = test_multibytecodec_support.load_teststring('big5hkscs')
    codectests = (
        # invalid bytes
-        ("abc\x80\x80\xc1\xc4", "strict",  None),
-        ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u8b10"),
+        (b"abc\x80\x80\xc1\xc4", "strict",  None),
+        (b"abc\xc8", "strict",  None),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u8b10"),
    )

 def test_main():

--- a/Lib/test/test_codecencodings_jp.py
+++ b/Lib/test/test_codecencodings_jp.py
@@ -13,14 +13,14 @@ class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = (
        # invalid bytes
-        ("abc\x81\x00\x81\x00\x82\x84", "strict",  None),
-        ("abc\xf8", "strict",  None),
-        ("abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
-        ("abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
-        ("abc\x81\x00\x82\x84", "ignore",  "abc\uff44"),
+        (b"abc\x81\x00\x81\x00\x82\x84", "strict",  None),
+        (b"abc\xf8", "strict",  None),
+        (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
+        (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
+        (b"abc\x81\x00\x82\x84", "ignore",  "abc\uff44"),
        # sjis vs cp932
-        ("\\\x7e", "replace", "\\\x7e"),
-        ("\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
+        (b"\\\x7e", "replace", "\\\x7e"),
+        (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
    )

 class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
@@ -29,28 +29,28 @@ class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
    tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
    codectests = (
        # invalid bytes
-        ("abc\x80\x80\xc1\xc4", "strict",  None),
-        ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
-        ("abc\x8f\x83\x83", "replace", "abc\ufffd"),
-        ("\xc1\x64", "strict", None),
-        ("\xa1\xc0", "strict", "\uff3c"),
+        (b"abc\x80\x80\xc1\xc4", "strict",  None),
+        (b"abc\xc8", "strict",  None),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
+        (b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
+        (b"\xc1\x64", "strict", None),
+        (b"\xa1\xc0", "strict", "\uff3c"),
    )
    xmlcharnametest = (
        "\xab\u211c\xbb = \u2329\u1234\u232a",
-        "\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;"
+        b"\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;"
    )

 eucjp_commontests = (
-    ("abc\x80\x80\xc1\xc4", "strict",  None),
-    ("abc\xc8", "strict",  None),
-    ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
-    ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
-    ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
-    ("abc\x8f\x83\x83", "replace", "abc\ufffd"),
-    ("\xc1\x64", "strict", None),
+    (b"abc\x80\x80\xc1\xc4", "strict",  None),
+    (b"abc\xc8", "strict",  None),
+    (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
+    (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
+    (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
+    (b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
+    (b"\xc1\x64", "strict", None),
 )

 class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
@@ -58,25 +58,25 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
    encoding = 'euc_jp'
    tstring = test_multibytecodec_support.load_teststring('euc_jp')
    codectests = eucjp_commontests + (
-        ("\xa1\xc0\\", "strict", "\uff3c\\"),
-        ("\xa5", "strict", "\x5c"),
-        ("\u203e", "strict", "\x7e"),
+        (b"\xa1\xc0\\", "strict", "\uff3c\\"),
+        ("\xa5", "strict", b"\x5c"),
+        ("\u203e", "strict", b"\x7e"),
    )

 shiftjis_commonenctests = (
-    ("abc\x80\x80\x82\x84", "strict",  None),
-    ("abc\xf8", "strict",  None),
-    ("abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
-    ("abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
-    ("abc\x80\x80\x82\x84def", "ignore",  "abc\uff44def"),
+    (b"abc\x80\x80\x82\x84", "strict",  None),
+    (b"abc\xf8", "strict",  None),
+    (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
+    (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
+    (b"abc\x80\x80\x82\x84def", "ignore",  "abc\uff44def"),
 )

 class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis'
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
-        ("\\\x7e", "strict", "\\\x7e"),
-        ("\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
+        (b"\\\x7e", "strict", "\\\x7e"),
+        (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
    )

 class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
@@ -84,18 +84,18 @@ class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
    tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
    codectests = (
        # invalid bytes
-        ("abc\x80\x80\x82\x84", "strict",  None),
-        ("abc\xf8", "strict",  None),
-        ("abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
-        ("abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
-        ("abc\x80\x80\x82\x84def", "ignore",  "abc\uff44def"),
+        (b"abc\x80\x80\x82\x84", "strict",  None),
+        (b"abc\xf8", "strict",  None),
+        (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
+        (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
+        (b"abc\x80\x80\x82\x84def", "ignore",  "abc\uff44def"),
        # sjis vs cp932
-        ("\\\x7e", "replace", "\xa5\u203e"),
-        ("\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
+        (b"\\\x7e", "replace", "\xa5\u203e"),
+        (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
    )
    xmlcharnametest = (
        "\xab\u211c\xbb = \u2329\u1234\u232a",
-        "\x85G&real;\x85Q = &lang;&#4660;&rang;"
+        b"\x85G&real;\x85Q = &lang;&#4660;&rang;"
    )

 def test_main():

--- a/Lib/test/test_codecencodings_kr.py
+++ b/Lib/test/test_codecencodings_kr.py
@@ -13,11 +13,11 @@ class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
    tstring = test_multibytecodec_support.load_teststring('cp949')
    codectests = (
        # invalid bytes
-        ("abc\x80\x80\xc1\xc4", "strict",  None),
-        ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\uc894"),
+        (b"abc\x80\x80\xc1\xc4", "strict",  None),
+        (b"abc\xc8", "strict",  None),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\uc894"),
    )

 class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
@@ -25,11 +25,11 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
    tstring = test_multibytecodec_support.load_teststring('euc_kr')
    codectests = (
        # invalid bytes
-        ("abc\x80\x80\xc1\xc4", "strict",  None),
-        ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\uc894"),
+        (b"abc\x80\x80\xc1\xc4", "strict",  None),
+        (b"abc\xc8", "strict",  None),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\uc894"),
    )

 class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
@@ -37,11 +37,11 @@ class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
    tstring = test_multibytecodec_support.load_teststring('johab')
    codectests = (
        # invalid bytes
-        ("abc\x80\x80\xc1\xc4", "strict",  None),
-        ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\ucd27"),
+        (b"abc\x80\x80\xc1\xc4", "strict",  None),
+        (b"abc\xc8", "strict",  None),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\ucd27"),
    )

 def test_main():

--- a/Lib/test/test_codecencodings_tw.py
+++ b/Lib/test/test_codecencodings_tw.py
@@ -13,11 +13,11 @@ class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
    tstring = test_multibytecodec_support.load_teststring('big5')
    codectests = (
        # invalid bytes
-        ("abc\x80\x80\xc1\xc4", "strict",  None),
-        ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u8b10"),
+        (b"abc\x80\x80\xc1\xc4", "strict",  None),
+        (b"abc\xc8", "strict",  None),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u8b10"),
    )

 def test_main():

--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
--- a/Lib/test/test_multibytecodec_support.py
+++ b/Lib/test/test_multibytecodec_support.py
@@ -7,12 +7,12 @@
 import sys, codecs, os.path
 import unittest
 from test import test_support
-from StringIO import StringIO
+from io import BytesIO

 class TestBase:
    encoding        = ''   # codec name
    codec           = None # codec tuple (with 4 elements)
-    tstring         = ''   # string to test StreamReader
+    tstring         = None # must set. 2 strings to test StreamReader

    codectests      = None # must set. codec test tuple
    roundtriptest   = 1    # set if roundtrip is possible with unicode
@@ -31,7 +31,7 @@ class TestBase:
        self.incrementaldecoder = self.codec.incrementaldecoder

    def test_chunkcoding(self):
-        for native, utf8 in zip(*[StringIO(f).readlines()
+        for native, utf8 in zip(*[map(bytes, str8(f).splitlines(1))
                                  for f in self.tstring]):
            u = self.decode(native)[0]
            self.assertEqual(u, utf8.decode('utf-8'))
@@ -40,7 +40,7 @@ class TestBase:

    def test_errorhandle(self):
        for source, scheme, expected in self.codectests:
-            if type(source) == type(''):
+            if isinstance(source, bytes):
                func = self.decode
            else:
                func = self.encode
@@ -57,7 +57,7 @@ class TestBase:
        s = "\u0b13\u0b23\u0b60 nd eggs"
        self.assertEqual(
            self.encode(s, "xmlcharrefreplace")[0],
-            "&#2835;&#2851;&#2912; nd eggs"
+            b"&#2835;&#2851;&#2912; nd eggs"
        )

    def test_customreplace_encode(self):
@@ -83,7 +83,7 @@ class TestBase:
            sin, sout = self.xmlcharnametest
        else:
            sin = "\xab\u211c\xbb = \u2329\u1234\u232a"
-            sout = "&laquo;&real;&raquo; = &lang;&#4660;&rang;"
+            sout = b"&laquo;&real;&raquo; = &lang;&#4660;&rang;"
        self.assertEqual(self.encode(sin,
                                    "test.xmlcharnamereplace")[0], sout)

@@ -92,7 +92,7 @@ class TestBase:
            return (ret, exc.end)
        codecs.register_error("test.cjktest", myreplace)

-        for ret in ([1, 2, 3], [], None, object(), 'string', ''):
+        for ret in ([1, 2, 3], [], None, object(), b'string', b''):
            self.assertRaises(TypeError, self.encode, self.unmappedunicode,
                              'test.cjktest')

@@ -101,7 +101,7 @@ class TestBase:
            return ('x', int(exc.end))
        codecs.register_error("test.cjktest", myreplace)
        self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
-                                     'test.cjktest'), ('abcdxefgh', 9))
+                                     'test.cjktest'), (b'abcdxefgh', 9))

        def myreplace(exc):
            return ('x', sys.maxint + 1)
@@ -127,14 +127,14 @@ class TestBase:
        codecs.register_error("test.cjktest", myreplace)
        self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
                                     'test.cjktest'),
-                ('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
+                (b'abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))

    def test_callback_forward_index(self):
        def myreplace(exc):
            return ('REPLACED', exc.end + 2)
        codecs.register_error("test.cjktest", myreplace)
        self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
-                                     'test.cjktest'), ('abcdREPLACEDgh', 9))
+                                     'test.cjktest'), (b'abcdREPLACEDgh', 9))

    def test_callback_index_outofbound(self):
        def myreplace(exc):
@@ -147,8 +147,8 @@ class TestBase:
        UTF8Reader = codecs.getreader('utf-8')
        for sizehint in [None] + list(range(1, 33)) + \
                        [64, 128, 256, 512, 1024]:
-            istream = UTF8Reader(StringIO(self.tstring[1]))
-            ostream = StringIO()
+            istream = UTF8Reader(BytesIO(self.tstring[1]))
+            ostream = BytesIO()
            encoder = self.incrementalencoder()
            while 1:
                if sizehint is not None:
@@ -167,8 +167,8 @@ class TestBase:
        UTF8Writer = codecs.getwriter('utf-8')
        for sizehint in [None, -1] + list(range(1, 33)) + \
                        [64, 128, 256, 512, 1024]:
-            istream = StringIO(self.tstring[0])
-            ostream = UTF8Writer(StringIO())
+            istream = BytesIO(self.tstring[0])
+            ostream = UTF8Writer(BytesIO())
            decoder = self.incrementaldecoder()
            while 1:
                data = istream.read(sizehint)
@@ -187,26 +187,26 @@ class TestBase:
        self.assertRaises(UnicodeEncodeError, e.encode, inv, True)

        e.errors = 'ignore'
-        self.assertEqual(e.encode(inv, True), '')
+        self.assertEqual(e.encode(inv, True), b'')

        e.reset()
        def tempreplace(exc):
            return ('called', exc.end)
        codecs.register_error('test.incremental_error_callback', tempreplace)
        e.errors = 'test.incremental_error_callback'
-        self.assertEqual(e.encode(inv, True), 'called')
+        self.assertEqual(e.encode(inv, True), b'called')

        # again
        e.errors = 'ignore'
-        self.assertEqual(e.encode(inv, True), '')
+        self.assertEqual(e.encode(inv, True), b'')

    def test_streamreader(self):
        UTF8Writer = codecs.getwriter('utf-8')
        for name in ["read", "readline", "readlines"]:
            for sizehint in [None, -1] + list(range(1, 33)) + \
                            [64, 128, 256, 512, 1024]:
-                istream = self.reader(StringIO(self.tstring[0]))
-                ostream = UTF8Writer(StringIO())
+                istream = self.reader(BytesIO(self.tstring[0]))
+                ostream = UTF8Writer(BytesIO())
                func = getattr(istream, name)
                while 1:
                    data = func(sizehint)
@@ -225,8 +225,8 @@ class TestBase:
        for name in readfuncs:
            for sizehint in [None] + list(range(1, 33)) + \
                            [64, 128, 256, 512, 1024]:
-                istream = UTF8Reader(StringIO(self.tstring[1]))
-                ostream = self.writer(StringIO())
+                istream = UTF8Reader(BytesIO(self.tstring[1]))
+                ostream = self.writer(BytesIO())
                func = getattr(istream, name)
                while 1:
                    if sizehint is not None:

--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -138,6 +138,11 @@ codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
 {
 	PyObject *cb;

+        if (PyUnicode_Check(value)) {
+		value = _PyUnicode_AsDefaultEncodedString(value, NULL);
+		if (value == NULL)
+			return -1;
+	}
 	if (!PyString_Check(value)) {
 		PyErr_SetString(PyExc_TypeError, "errors must be a string");
 		return -1;
@@ -322,11 +327,11 @@ multibytecodec_encerror(MultibyteCodec *codec,
 			goto errorexit;
 	}

-        assert(PyString_Check(retstr));
-	retstrsize = PyString_GET_SIZE(retstr);
+        assert(PyBytes_Check(retstr));
+	retstrsize = PyBytes_GET_SIZE(retstr);
 	REQUIRE_ENCODEBUFFER(buf, retstrsize);

-	memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
+	memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
 	buf->outbuf += retstrsize;

 	newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
@@ -1224,10 +1229,18 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
 		if (cres == NULL)
 			goto errorexit;

+		if (PyString_Check(cres)) {
+			PyObject *cres2 = PyBytes_FromObject(cres);
+			if (cres2 == NULL)
+				return NULL;
+			Py_DECREF(cres);
+			cres = cres2;
+		}
+
 		if (!PyBytes_Check(cres)) {
 			PyErr_Format(PyExc_TypeError,
                                     "stream function returned a "
-                                     "non-string object (%.100s)",
+                                     "non-bytes object (%.100s)",
                                     cres->ob_type->tp_name);
 			goto errorexit;
 		}
@@ -1596,8 +1609,8 @@ mbstreamwriter_reset(MultibyteStreamWriterObject *self)
 	if (pwrt == NULL)
 		return NULL;

-        assert(PyString_Check(pwrt));
-	if (PyString_Size(pwrt) > 0) {
+        assert(PyBytes_Check(pwrt));
+	if (PyBytes_Size(pwrt) > 0) {
 		PyObject *wr;
 		wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
 		if (wr == NULL) {