Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
ad3e972a
Commit
ad3e972a
authored
Mar 02, 2014
by
Georg Brandl
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #20404: reject non-text encodings early in TextIOWrapper.
parent
ad51a118
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
134 additions
and
39 deletions
+134
-39
Include/codecs.h
Include/codecs.h
+20
-0
Lib/_pyio.py
Lib/_pyio.py
+5
-0
Lib/test/test_io.py
Lib/test/test_io.py
+24
-6
Modules/_io/textio.c
Modules/_io/textio.c
+22
-12
Python/codecs.c
Python/codecs.c
+63
-21
No files found.
Include/codecs.h
View file @
ad3e972a
...
...
@@ -104,7 +104,14 @@ PyAPI_FUNC(PyObject *) PyCodec_Decode(
Please note that these APIs are internal and should not
be used in Python C extensions.
XXX (ncoghlan): should we make these, or something like them, public
in Python 3.5+?
*/
PyAPI_FUNC
(
PyObject
*
)
_PyCodec_LookupTextEncoding
(
const
char
*
encoding
,
const
char
*
alternate_command
);
PyAPI_FUNC
(
PyObject
*
)
_PyCodec_EncodeText
(
PyObject
*
object
,
...
...
@@ -117,6 +124,19 @@ PyAPI_FUNC(PyObject *) _PyCodec_DecodeText(
const
char
*
encoding
,
const
char
*
errors
);
/* These two aren't actually text encoding specific, but _io.TextIOWrapper
* is the only current API consumer.
*/
PyAPI_FUNC
(
PyObject
*
)
_PyCodecInfo_GetIncrementalDecoder
(
PyObject
*
codec_info
,
const
char
*
errors
);
PyAPI_FUNC
(
PyObject
*
)
_PyCodecInfo_GetIncrementalEncoder
(
PyObject
*
codec_info
,
const
char
*
errors
);
#endif
...
...
Lib/_pyio.py
View file @
ad3e972a
...
...
@@ -1495,6 +1495,11 @@ class TextIOWrapper(TextIOBase):
if
not
isinstance
(
encoding
,
str
):
raise
ValueError
(
"invalid encoding: %r"
%
encoding
)
if
not
codecs
.
lookup
(
encoding
).
_is_text_encoding
:
msg
=
(
"%r is not a text encoding; "
"use codecs.open() to handle arbitrary codecs"
)
raise
LookupError
(
msg
%
encoding
)
if
errors
is
None
:
errors
=
"strict"
else
:
...
...
Lib/test/test_io.py
View file @
ad3e972a
...
...
@@ -1955,6 +1955,15 @@ class TextIOWrapperTest(unittest.TestCase):
self
.
assertRaises
(
TypeError
,
t
.
__init__
,
b
,
newline
=
42
)
self
.
assertRaises
(
ValueError
,
t
.
__init__
,
b
,
newline
=
'xyzzy'
)
def
test_non_text_encoding_codecs_are_rejected
(
self
):
# Ensure the constructor complains if passed a codec that isn't
# marked as a text encoding
# http://bugs.python.org/issue20404
r
=
self
.
BytesIO
()
b
=
self
.
BufferedWriter
(
r
)
with
self
.
assertRaisesRegex
(
LookupError
,
"is not a text encoding"
):
self
.
TextIOWrapper
(
b
,
encoding
=
"hex_codec"
)
def
test_detach
(
self
):
r
=
self
.
BytesIO
()
b
=
self
.
BufferedWriter
(
r
)
...
...
@@ -2607,15 +2616,22 @@ class TextIOWrapperTest(unittest.TestCase):
def
test_illegal_decoder
(
self
):
# Issue #17106
# Bypass the early encoding check added in issue 20404
def
_make_illegal_wrapper
():
quopri
=
codecs
.
lookup
(
"quopri_codec"
)
quopri
.
_is_text_encoding
=
True
try
:
t
=
self
.
TextIOWrapper
(
self
.
BytesIO
(
b'aaaaaa'
),
newline
=
'
\
n
'
,
encoding
=
"quopri_codec"
)
finally
:
quopri
.
_is_text_encoding
=
False
return
t
# Crash when decoder returns non-string
t
=
self
.
TextIOWrapper
(
self
.
BytesIO
(
b'aaaaaa'
),
newline
=
'
\
n
'
,
encoding
=
'quopri_codec'
)
t
=
_make_illegal_wrapper
()
self
.
assertRaises
(
TypeError
,
t
.
read
,
1
)
t
=
self
.
TextIOWrapper
(
self
.
BytesIO
(
b'aaaaaa'
),
newline
=
'
\
n
'
,
encoding
=
'quopri_codec'
)
t
=
_make_illegal_wrapper
()
self
.
assertRaises
(
TypeError
,
t
.
readline
)
t
=
self
.
TextIOWrapper
(
self
.
BytesIO
(
b'aaaaaa'
),
newline
=
'
\
n
'
,
encoding
=
'quopri_codec'
)
t
=
_make_illegal_wrapper
()
self
.
assertRaises
(
TypeError
,
t
.
read
)
...
...
@@ -3053,6 +3069,7 @@ class MiscIOTest(unittest.TestCase):
class
CMiscIOTest
(
MiscIOTest
):
io
=
io
shutdown_error
=
"RuntimeError: could not find io module state"
def
test_readinto_buffer_overflow
(
self
):
# Issue #18025
...
...
@@ -3065,6 +3082,7 @@ class CMiscIOTest(MiscIOTest):
class
PyMiscIOTest
(
MiscIOTest
):
io
=
pyio
shutdown_error
=
"LookupError: unknown encoding: ascii"
@
unittest
.
skipIf
(
os
.
name
==
'nt'
,
'POSIX signals required for this test.'
)
...
...
Modules/_io/textio.c
View file @
ad3e972a
...
...
@@ -836,7 +836,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
char
*
kwlist
[]
=
{
"buffer"
,
"encoding"
,
"errors"
,
"newline"
,
"line_buffering"
,
"write_through"
,
NULL
};
PyObject
*
buffer
,
*
raw
;
PyObject
*
buffer
,
*
raw
,
*
codec_info
=
NULL
;
char
*
encoding
=
NULL
;
char
*
errors
=
NULL
;
char
*
newline
=
NULL
;
...
...
@@ -951,6 +951,17 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
"could not determine default encoding"
);
}
/* Check we have been asked for a real text encoding */
codec_info
=
_PyCodec_LookupTextEncoding
(
encoding
,
"codecs.open()"
);
if
(
codec_info
==
NULL
)
{
Py_CLEAR
(
self
->
encoding
);
goto
error
;
}
/* XXX: Failures beyond this point have the potential to leak elements
* of the partially constructed object (like self->encoding)
*/
if
(
errors
==
NULL
)
errors
=
"strict"
;
self
->
errors
=
PyBytes_FromString
(
errors
);
...
...
@@ -965,7 +976,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
if
(
newline
)
{
self
->
readnl
=
PyUnicode_FromString
(
newline
);
if
(
self
->
readnl
==
NULL
)
return
-
1
;
goto
error
;
}
self
->
writetranslate
=
(
newline
==
NULL
||
newline
[
0
]
!=
'\0'
);
if
(
!
self
->
readuniversal
&&
self
->
readnl
)
{
...
...
@@ -989,8 +1000,8 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
if
(
r
==
-
1
)
goto
error
;
if
(
r
==
1
)
{
self
->
decoder
=
PyCodec_IncrementalDecoder
(
encoding
,
errors
);
self
->
decoder
=
_PyCodecInfo_GetIncrementalDecoder
(
codec_info
,
errors
);
if
(
self
->
decoder
==
NULL
)
goto
error
;
...
...
@@ -1014,17 +1025,12 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
if
(
r
==
-
1
)
goto
error
;
if
(
r
==
1
)
{
PyObject
*
ci
;
self
->
encoder
=
PyCodec_IncrementalEncoder
(
encoding
,
errors
);
self
->
encoder
=
_PyCodecInfo_GetIncrementalEncoder
(
codec_info
,
errors
);
if
(
self
->
encoder
==
NULL
)
goto
error
;
/* Get the normalized named of the codec */
ci
=
_PyCodec_Lookup
(
encoding
);
if
(
ci
==
NULL
)
goto
error
;
res
=
_PyObject_GetAttrId
(
ci
,
&
PyId_name
);
Py_DECREF
(
ci
);
res
=
_PyObject_GetAttrId
(
codec_info
,
&
PyId_name
);
if
(
res
==
NULL
)
{
if
(
PyErr_ExceptionMatches
(
PyExc_AttributeError
))
PyErr_Clear
();
...
...
@@ -1044,6 +1050,9 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Py_XDECREF
(
res
);
}
/* Finished sorting out the codec details */
Py_DECREF
(
codec_info
);
self
->
buffer
=
buffer
;
Py_INCREF
(
buffer
);
...
...
@@ -1106,6 +1115,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
return
0
;
error:
Py_XDECREF
(
codec_info
);
return
-
1
;
}
...
...
Python/codecs.c
View file @
ad3e972a
...
...
@@ -243,20 +243,15 @@ PyObject *codec_getitem(const char *encoding, int index)
return
v
;
}
/* Helper function to create an incremental codec. */
/* Helper functions to create an incremental codec. */
static
PyObject
*
codec_
getincrementalcodec
(
const
char
*
encoding
,
const
char
*
errors
,
const
char
*
attrname
)
PyObject
*
codec_
makeincrementalcodec
(
PyObject
*
codec_info
,
const
char
*
errors
,
const
char
*
attrname
)
{
PyObject
*
codecs
,
*
ret
,
*
inccodec
;
PyObject
*
ret
,
*
inccodec
;
codecs
=
_PyCodec_Lookup
(
encoding
);
if
(
codecs
==
NULL
)
return
NULL
;
inccodec
=
PyObject_GetAttrString
(
codecs
,
attrname
);
Py_DECREF
(
codecs
);
inccodec
=
PyObject_GetAttrString
(
codec_info
,
attrname
);
if
(
inccodec
==
NULL
)
return
NULL
;
if
(
errors
)
...
...
@@ -267,6 +262,21 @@ PyObject *codec_getincrementalcodec(const char *encoding,
return
ret
;
}
static
PyObject
*
codec_getincrementalcodec
(
const
char
*
encoding
,
const
char
*
errors
,
const
char
*
attrname
)
{
PyObject
*
codec_info
,
*
ret
;
codec_info
=
_PyCodec_Lookup
(
encoding
);
if
(
codec_info
==
NULL
)
return
NULL
;
ret
=
codec_makeincrementalcodec
(
codec_info
,
errors
,
attrname
);
Py_DECREF
(
codec_info
);
return
ret
;
}
/* Helper function to create a stream codec. */
static
...
...
@@ -290,6 +300,24 @@ PyObject *codec_getstreamcodec(const char *encoding,
return
streamcodec
;
}
/* Helpers to work with the result of _PyCodec_Lookup
*/
PyObject
*
_PyCodecInfo_GetIncrementalDecoder
(
PyObject
*
codec_info
,
const
char
*
errors
)
{
return
codec_makeincrementalcodec
(
codec_info
,
errors
,
"incrementaldecoder"
);
}
PyObject
*
_PyCodecInfo_GetIncrementalEncoder
(
PyObject
*
codec_info
,
const
char
*
errors
)
{
return
codec_makeincrementalcodec
(
codec_info
,
errors
,
"incrementalencoder"
);
}
/* Convenience APIs to query the Codec registry.
All APIs return a codec object with incremented refcount.
...
...
@@ -447,15 +475,12 @@ PyObject *PyCodec_Decode(PyObject *object,
}
/* Text encoding/decoding API */
static
PyObject
*
codec_getitem_checked
(
const
char
*
encoding
,
const
char
*
operation_name
,
int
index
)
PyObject
*
_PyCodec_LookupTextEncoding
(
const
char
*
encoding
,
const
char
*
alternate_command
)
{
_Py_IDENTIFIER
(
_is_text_encoding
);
PyObject
*
codec
;
PyObject
*
attr
;
PyObject
*
v
;
int
is_text_codec
;
codec
=
_PyCodec_Lookup
(
encoding
);
...
...
@@ -482,27 +507,44 @@ PyObject *codec_getitem_checked(const char *encoding,
Py_DECREF
(
codec
);
PyErr_Format
(
PyExc_LookupError
,
"'%.400s' is not a text encoding; "
"use
codecs.%s()
to handle arbitrary codecs"
,
encoding
,
operation_name
);
"use
%s
to handle arbitrary codecs"
,
encoding
,
alternate_command
);
return
NULL
;
}
}
}
/* This appears to be a valid text encoding */
return
codec
;
}
static
PyObject
*
codec_getitem_checked
(
const
char
*
encoding
,
const
char
*
alternate_command
,
int
index
)
{
PyObject
*
codec
;
PyObject
*
v
;
codec
=
_PyCodec_LookupTextEncoding
(
encoding
,
alternate_command
);
if
(
codec
==
NULL
)
return
NULL
;
v
=
PyTuple_GET_ITEM
(
codec
,
index
);
Py_DECREF
(
codec
);
Py_INCREF
(
v
);
Py_DECREF
(
codec
);
return
v
;
}
static
PyObject
*
_PyCodec_TextEncoder
(
const
char
*
encoding
)
{
return
codec_getitem_checked
(
encoding
,
"
encode
"
,
0
);
return
codec_getitem_checked
(
encoding
,
"
codecs.encode()
"
,
0
);
}
static
PyObject
*
_PyCodec_TextDecoder
(
const
char
*
encoding
)
{
return
codec_getitem_checked
(
encoding
,
"
decode
"
,
1
);
return
codec_getitem_checked
(
encoding
,
"
codecs.decode()
"
,
1
);
}
PyObject
*
_PyCodec_EncodeText
(
PyObject
*
object
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment