Commit 2fc8f773 authored by Georg Brandl's avatar Georg Brandl

Issue #20404: reject non-text encodings early in TextIOWrapper.

parent 2658bad0
...@@ -104,7 +104,14 @@ PyAPI_FUNC(PyObject *) PyCodec_Decode( ...@@ -104,7 +104,14 @@ PyAPI_FUNC(PyObject *) PyCodec_Decode(
Please note that these APIs are internal and should not Please note that these APIs are internal and should not
be used in Python C extensions. be used in Python C extensions.
XXX (ncoghlan): should we make these, or something like them, public
in Python 3.5+?
*/ */
PyAPI_FUNC(PyObject *) _PyCodec_LookupTextEncoding(
const char *encoding,
const char *alternate_command
);
PyAPI_FUNC(PyObject *) _PyCodec_EncodeText( PyAPI_FUNC(PyObject *) _PyCodec_EncodeText(
PyObject *object, PyObject *object,
...@@ -117,6 +124,19 @@ PyAPI_FUNC(PyObject *) _PyCodec_DecodeText( ...@@ -117,6 +124,19 @@ PyAPI_FUNC(PyObject *) _PyCodec_DecodeText(
const char *encoding, const char *encoding,
const char *errors const char *errors
); );
/* These two aren't actually text encoding specific, but _io.TextIOWrapper
* is the only current API consumer.
*/
PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalDecoder(
PyObject *codec_info,
const char *errors
);
PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalEncoder(
PyObject *codec_info,
const char *errors
);
#endif #endif
......
...@@ -1495,6 +1495,11 @@ class TextIOWrapper(TextIOBase): ...@@ -1495,6 +1495,11 @@ class TextIOWrapper(TextIOBase):
if not isinstance(encoding, str): if not isinstance(encoding, str):
raise ValueError("invalid encoding: %r" % encoding) raise ValueError("invalid encoding: %r" % encoding)
if not codecs.lookup(encoding)._is_text_encoding:
msg = ("%r is not a text encoding; "
"use codecs.open() to handle arbitrary codecs")
raise LookupError(msg % encoding)
if errors is None: if errors is None:
errors = "strict" errors = "strict"
else: else:
......
...@@ -1955,6 +1955,15 @@ class TextIOWrapperTest(unittest.TestCase): ...@@ -1955,6 +1955,15 @@ class TextIOWrapperTest(unittest.TestCase):
self.assertRaises(TypeError, t.__init__, b, newline=42) self.assertRaises(TypeError, t.__init__, b, newline=42)
self.assertRaises(ValueError, t.__init__, b, newline='xyzzy') self.assertRaises(ValueError, t.__init__, b, newline='xyzzy')
def test_non_text_encoding_codecs_are_rejected(self):
# Ensure the constructor complains if passed a codec that isn't
# marked as a text encoding
# http://bugs.python.org/issue20404
r = self.BytesIO()
b = self.BufferedWriter(r)
with self.assertRaisesRegex(LookupError, "is not a text encoding"):
self.TextIOWrapper(b, encoding="hex_codec")
def test_detach(self): def test_detach(self):
r = self.BytesIO() r = self.BytesIO()
b = self.BufferedWriter(r) b = self.BufferedWriter(r)
...@@ -2607,15 +2616,22 @@ class TextIOWrapperTest(unittest.TestCase): ...@@ -2607,15 +2616,22 @@ class TextIOWrapperTest(unittest.TestCase):
def test_illegal_decoder(self): def test_illegal_decoder(self):
# Issue #17106 # Issue #17106
# Bypass the early encoding check added in issue 20404
def _make_illegal_wrapper():
quopri = codecs.lookup("quopri_codec")
quopri._is_text_encoding = True
try:
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'),
newline='\n', encoding="quopri_codec")
finally:
quopri._is_text_encoding = False
return t
# Crash when decoder returns non-string # Crash when decoder returns non-string
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n', t = _make_illegal_wrapper()
encoding='quopri_codec')
self.assertRaises(TypeError, t.read, 1) self.assertRaises(TypeError, t.read, 1)
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n', t = _make_illegal_wrapper()
encoding='quopri_codec')
self.assertRaises(TypeError, t.readline) self.assertRaises(TypeError, t.readline)
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n', t = _make_illegal_wrapper()
encoding='quopri_codec')
self.assertRaises(TypeError, t.read) self.assertRaises(TypeError, t.read)
...@@ -3053,6 +3069,7 @@ class MiscIOTest(unittest.TestCase): ...@@ -3053,6 +3069,7 @@ class MiscIOTest(unittest.TestCase):
class CMiscIOTest(MiscIOTest): class CMiscIOTest(MiscIOTest):
io = io io = io
shutdown_error = "RuntimeError: could not find io module state"
def test_readinto_buffer_overflow(self): def test_readinto_buffer_overflow(self):
# Issue #18025 # Issue #18025
...@@ -3065,6 +3082,7 @@ class CMiscIOTest(MiscIOTest): ...@@ -3065,6 +3082,7 @@ class CMiscIOTest(MiscIOTest):
class PyMiscIOTest(MiscIOTest): class PyMiscIOTest(MiscIOTest):
io = pyio io = pyio
shutdown_error = "LookupError: unknown encoding: ascii"
@unittest.skipIf(os.name == 'nt', 'POSIX signals required for this test.') @unittest.skipIf(os.name == 'nt', 'POSIX signals required for this test.')
......
...@@ -836,7 +836,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) ...@@ -836,7 +836,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
char *kwlist[] = {"buffer", "encoding", "errors", char *kwlist[] = {"buffer", "encoding", "errors",
"newline", "line_buffering", "write_through", "newline", "line_buffering", "write_through",
NULL}; NULL};
PyObject *buffer, *raw; PyObject *buffer, *raw, *codec_info = NULL;
char *encoding = NULL; char *encoding = NULL;
char *errors = NULL; char *errors = NULL;
char *newline = NULL; char *newline = NULL;
...@@ -951,6 +951,17 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) ...@@ -951,6 +951,17 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
"could not determine default encoding"); "could not determine default encoding");
} }
/* Check we have been asked for a real text encoding */
codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
if (codec_info == NULL) {
Py_CLEAR(self->encoding);
goto error;
}
/* XXX: Failures beyond this point have the potential to leak elements
* of the partially constructed object (like self->encoding)
*/
if (errors == NULL) if (errors == NULL)
errors = "strict"; errors = "strict";
self->errors = PyBytes_FromString(errors); self->errors = PyBytes_FromString(errors);
...@@ -965,7 +976,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) ...@@ -965,7 +976,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
if (newline) { if (newline) {
self->readnl = PyUnicode_FromString(newline); self->readnl = PyUnicode_FromString(newline);
if (self->readnl == NULL) if (self->readnl == NULL)
return -1; goto error;
} }
self->writetranslate = (newline == NULL || newline[0] != '\0'); self->writetranslate = (newline == NULL || newline[0] != '\0');
if (!self->readuniversal && self->readnl) { if (!self->readuniversal && self->readnl) {
...@@ -989,8 +1000,8 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) ...@@ -989,8 +1000,8 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
if (r == -1) if (r == -1)
goto error; goto error;
if (r == 1) { if (r == 1) {
self->decoder = PyCodec_IncrementalDecoder( self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
encoding, errors); errors);
if (self->decoder == NULL) if (self->decoder == NULL)
goto error; goto error;
...@@ -1014,17 +1025,12 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) ...@@ -1014,17 +1025,12 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
if (r == -1) if (r == -1)
goto error; goto error;
if (r == 1) { if (r == 1) {
PyObject *ci; self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
self->encoder = PyCodec_IncrementalEncoder( errors);
encoding, errors);
if (self->encoder == NULL) if (self->encoder == NULL)
goto error; goto error;
/* Get the normalized named of the codec */ /* Get the normalized named of the codec */
ci = _PyCodec_Lookup(encoding); res = _PyObject_GetAttrId(codec_info, &PyId_name);
if (ci == NULL)
goto error;
res = _PyObject_GetAttrId(ci, &PyId_name);
Py_DECREF(ci);
if (res == NULL) { if (res == NULL) {
if (PyErr_ExceptionMatches(PyExc_AttributeError)) if (PyErr_ExceptionMatches(PyExc_AttributeError))
PyErr_Clear(); PyErr_Clear();
...@@ -1044,6 +1050,9 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) ...@@ -1044,6 +1050,9 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Py_XDECREF(res); Py_XDECREF(res);
} }
/* Finished sorting out the codec details */
Py_DECREF(codec_info);
self->buffer = buffer; self->buffer = buffer;
Py_INCREF(buffer); Py_INCREF(buffer);
...@@ -1106,6 +1115,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) ...@@ -1106,6 +1115,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
return 0; return 0;
error: error:
Py_XDECREF(codec_info);
return -1; return -1;
} }
......
...@@ -243,20 +243,15 @@ PyObject *codec_getitem(const char *encoding, int index) ...@@ -243,20 +243,15 @@ PyObject *codec_getitem(const char *encoding, int index)
return v; return v;
} }
/* Helper function to create an incremental codec. */ /* Helper functions to create an incremental codec. */
static static
PyObject *codec_getincrementalcodec(const char *encoding, PyObject *codec_makeincrementalcodec(PyObject *codec_info,
const char *errors, const char *errors,
const char *attrname) const char *attrname)
{ {
PyObject *codecs, *ret, *inccodec; PyObject *ret, *inccodec;
codecs = _PyCodec_Lookup(encoding); inccodec = PyObject_GetAttrString(codec_info, attrname);
if (codecs == NULL)
return NULL;
inccodec = PyObject_GetAttrString(codecs, attrname);
Py_DECREF(codecs);
if (inccodec == NULL) if (inccodec == NULL)
return NULL; return NULL;
if (errors) if (errors)
...@@ -267,6 +262,21 @@ PyObject *codec_getincrementalcodec(const char *encoding, ...@@ -267,6 +262,21 @@ PyObject *codec_getincrementalcodec(const char *encoding,
return ret; return ret;
} }
static
PyObject *codec_getincrementalcodec(const char *encoding,
const char *errors,
const char *attrname)
{
PyObject *codec_info, *ret;
codec_info = _PyCodec_Lookup(encoding);
if (codec_info == NULL)
return NULL;
ret = codec_makeincrementalcodec(codec_info, errors, attrname);
Py_DECREF(codec_info);
return ret;
}
/* Helper function to create a stream codec. */ /* Helper function to create a stream codec. */
static static
...@@ -290,6 +300,24 @@ PyObject *codec_getstreamcodec(const char *encoding, ...@@ -290,6 +300,24 @@ PyObject *codec_getstreamcodec(const char *encoding,
return streamcodec; return streamcodec;
} }
/* Helpers to work with the result of _PyCodec_Lookup
*/
PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info,
const char *errors)
{
return codec_makeincrementalcodec(codec_info, errors,
"incrementaldecoder");
}
PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info,
const char *errors)
{
return codec_makeincrementalcodec(codec_info, errors,
"incrementalencoder");
}
/* Convenience APIs to query the Codec registry. /* Convenience APIs to query the Codec registry.
All APIs return a codec object with incremented refcount. All APIs return a codec object with incremented refcount.
...@@ -447,15 +475,12 @@ PyObject *PyCodec_Decode(PyObject *object, ...@@ -447,15 +475,12 @@ PyObject *PyCodec_Decode(PyObject *object,
} }
/* Text encoding/decoding API */ /* Text encoding/decoding API */
static PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
PyObject *codec_getitem_checked(const char *encoding, const char *alternate_command)
const char *operation_name,
int index)
{ {
_Py_IDENTIFIER(_is_text_encoding); _Py_IDENTIFIER(_is_text_encoding);
PyObject *codec; PyObject *codec;
PyObject *attr; PyObject *attr;
PyObject *v;
int is_text_codec; int is_text_codec;
codec = _PyCodec_Lookup(encoding); codec = _PyCodec_Lookup(encoding);
...@@ -482,27 +507,44 @@ PyObject *codec_getitem_checked(const char *encoding, ...@@ -482,27 +507,44 @@ PyObject *codec_getitem_checked(const char *encoding,
Py_DECREF(codec); Py_DECREF(codec);
PyErr_Format(PyExc_LookupError, PyErr_Format(PyExc_LookupError,
"'%.400s' is not a text encoding; " "'%.400s' is not a text encoding; "
"use codecs.%s() to handle arbitrary codecs", "use %s to handle arbitrary codecs",
encoding, operation_name); encoding, alternate_command);
return NULL; return NULL;
} }
} }
} }
/* This appears to be a valid text encoding */
return codec;
}
static
PyObject *codec_getitem_checked(const char *encoding,
const char *alternate_command,
int index)
{
PyObject *codec;
PyObject *v;
codec = _PyCodec_LookupTextEncoding(encoding, alternate_command);
if (codec == NULL)
return NULL;
v = PyTuple_GET_ITEM(codec, index); v = PyTuple_GET_ITEM(codec, index);
Py_DECREF(codec);
Py_INCREF(v); Py_INCREF(v);
Py_DECREF(codec);
return v; return v;
} }
static PyObject * _PyCodec_TextEncoder(const char *encoding) static PyObject * _PyCodec_TextEncoder(const char *encoding)
{ {
return codec_getitem_checked(encoding, "encode", 0); return codec_getitem_checked(encoding, "codecs.encode()", 0);
} }
static PyObject * _PyCodec_TextDecoder(const char *encoding) static PyObject * _PyCodec_TextDecoder(const char *encoding)
{ {
return codec_getitem_checked(encoding, "decode", 1); return codec_getitem_checked(encoding, "codecs.decode()", 1);
} }
PyObject *_PyCodec_EncodeText(PyObject *object, PyObject *_PyCodec_EncodeText(PyObject *object,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment