Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
24193deb
Commit
24193deb
authored
Jan 29, 2013
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Plain Diff
Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder.
parents
4dafd407
d679377b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
118 additions
and
54 deletions
+118
-54
Lib/test/test_codeccallbacks.py
Lib/test/test_codeccallbacks.py
+2
-2
Lib/test/test_codecs.py
Lib/test/test_codecs.py
+84
-0
Misc/NEWS
Misc/NEWS
+2
-0
Objects/unicodeobject.c
Objects/unicodeobject.c
+30
-52
No files found.
Lib/test/test_codeccallbacks.py
View file @
24193deb
...
...
@@ -271,12 +271,12 @@ class CodecCallbackTest(unittest.TestCase):
self
.
assertEqual
(
b"
\
\
u3042
\
u
3
xxx"
.
decode
(
"unicode-escape"
,
"test.handler1"
),
"
\
u3042
[<92><117><51>
<120>]
xx"
"
\
u3042
[<92><117><51>
]x
xx"
)
self
.
assertEqual
(
b"
\
\
u3042
\
u
3
xx"
.
decode
(
"unicode-escape"
,
"test.handler1"
),
"
\
u3042
[<92><117><51>
<120><120>]
"
"
\
u3042
[<92><117><51>
]xx
"
)
self
.
assertEqual
(
...
...
Lib/test/test_codecs.py
View file @
24193deb
...
...
@@ -21,6 +21,11 @@ except ImportError:
else
:
SIZEOF_WCHAR_T
=
ctypes
.
sizeof
(
ctypes
.
c_wchar
)
def
coding_checker
(
self
,
coder
):
def
check
(
input
,
expect
):
self
.
assertEqual
(
coder
(
input
),
(
expect
,
len
(
input
)))
return
check
class
Queue
(
object
):
"""
queue: write bytes at one end, read bytes from the other end
...
...
@@ -2009,6 +2014,85 @@ class TypesTest(unittest.TestCase):
self
.
assertRaises
(
UnicodeDecodeError
,
codecs
.
raw_unicode_escape_decode
,
br"\U00110000"
)
self
.
assertEqual
(
codecs
.
raw_unicode_escape_decode
(
r"\U00110000"
,
"replace"
),
(
"
\
ufffd
"
,
10
))
class
UnicodeEscapeTest
(
unittest
.
TestCase
):
def
test_empty
(
self
):
self
.
assertEqual
(
codecs
.
unicode_escape_encode
(
""
),
(
b""
,
0
))
self
.
assertEqual
(
codecs
.
unicode_escape_decode
(
b""
),
(
""
,
0
))
def
test_raw_encode
(
self
):
encode
=
codecs
.
unicode_escape_encode
for
b
in
range
(
32
,
127
):
if
b
!=
b'
\
\
'
[
0
]:
self
.
assertEqual
(
encode
(
chr
(
b
)),
(
bytes
([
b
]),
1
))
def
test_raw_decode
(
self
):
decode
=
codecs
.
unicode_escape_decode
for
b
in
range
(
256
):
if
b
!=
b'
\
\
'
[
0
]:
self
.
assertEqual
(
decode
(
bytes
([
b
])
+
b'0'
),
(
chr
(
b
)
+
'0'
,
2
))
def
test_escape_encode
(
self
):
encode
=
codecs
.
unicode_escape_encode
check
=
coding_checker
(
self
,
encode
)
check
(
'
\
t
'
,
br'\t'
)
check
(
'
\
n
'
,
br'\n'
)
check
(
'
\
r
'
,
br'\r'
)
check
(
'
\
\
'
,
br'\\'
)
for
b
in
range
(
32
):
if
chr
(
b
)
not
in
'
\
t
\
n
\
r
'
:
check
(
chr
(
b
),
(
'
\
\
x%02x'
%
b
).
encode
())
for
b
in
range
(
127
,
256
):
check
(
chr
(
b
),
(
'
\
\
x%02x'
%
b
).
encode
())
check
(
'
\
u20ac
'
,
br'\u20ac'
)
check
(
'
\
U0001d120
'
,
br'\U0001d120'
)
def
test_escape_decode
(
self
):
decode
=
codecs
.
unicode_escape_decode
check
=
coding_checker
(
self
,
decode
)
check
(
b"[
\
\
\
n
]"
,
"[]"
)
check
(
br'[\"]'
,
'["]'
)
check
(
br"[\']"
,
"[']"
)
check
(
br"[\\]"
,
r"[\
]
")
check(br"
[
\
a
]
", "
[
\
x07
]
")
check(br"
[
\
b
]
", "
[
\
x08
]
")
check(br"
[
\
t
]
", "
[
\
x09
]
")
check(br"
[
\
n
]
", "
[
\
x0a
]
")
check(br"
[
\
v
]
", "
[
\
x0b
]
")
check(br"
[
\
f
]
", "
[
\
x0c
]
")
check(br"
[
\
r
]
", "
[
\
x0d
]
")
check(br"
[
\
7
]
", "
[
\
x07
]
")
check(br"
[
\
8
]
", r"
[
\
8
]
")
check(br"
[
\
78
]
", "
[
\
x078
]
")
check(br"
[
\
41
]
", "
[
!
]
")
check(br"
[
\
418
]
", "
[
!
8
]
")
check(br"
[
\
101
]
", "
[
A
]
")
check(br"
[
\
1010
]
", "
[
A0
]
")
check(br"
[
\
x41
]
", "
[
A
]
")
check(br"
[
\
x410
]
", "
[
A0
]
")
check(br"
\
u20ac
", "
\
u20ac
")
check(br"
\
U0001d120
", "
\
U0001d120
")
for b in range(256):
if b not in b'
\
n
"
\
'
\
\
abtnvfr01234567xuUN'
:
check
(
b'
\
\
'
+
bytes
([
b
]),
'
\
\
'
+
chr
(
b
))
def
test_decode_errors
(
self
):
decode
=
codecs
.
unicode_escape_decode
for
c
,
d
in
(
b'x'
,
2
),
(
b'u'
,
4
),
(
b'U'
,
4
):
for
i
in
range
(
d
):
self
.
assertRaises
(
UnicodeDecodeError
,
decode
,
b"
\
\
"
+
c
+
b"0"
*
i
)
self
.
assertRaises
(
UnicodeDecodeError
,
decode
,
b"[
\
\
"
+
c
+
b"0"
*
i
+
b"]"
)
data
=
b"[
\
\
"
+
c
+
b"0"
*
i
+
b"]
\
\
"
+
c
+
b"0"
*
i
self
.
assertEqual
(
decode
(
data
,
"ignore"
),
(
"[]"
,
len
(
data
)))
self
.
assertEqual
(
decode
(
data
,
"replace"
),
(
"[
\
ufffd
]
\
ufffd
"
,
len
(
data
)))
self
.
assertRaises
(
UnicodeDecodeError
,
decode
,
br"\U00110000"
)
self
.
assertEqual
(
decode
(
br"\U00110000"
,
"ignore"
),
(
""
,
10
))
self
.
assertEqual
(
decode
(
br"\U00110000"
,
"replace"
),
(
"
\
ufffd
"
,
10
))
class
SurrogateEscapeTest
(
unittest
.
TestCase
):
def
test_utf8
(
self
):
...
...
Misc/NEWS
View file @
24193deb
...
...
@@ -162,6 +162,8 @@ Core and Builtins
Library
-------
- Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder.
- Issue #1602133: on Mac OS X a shared library build (``--enable-shared``)
now fills the ``os.environ`` variable correctly.
...
...
Objects/unicodeobject.c
View file @
24193deb
...
...
@@ -5508,7 +5508,6 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
const
char
*
starts
=
s
;
Py_ssize_t
startinpos
;
Py_ssize_t
endinpos
;
int
j
;
PyObject
*
v
;
const
char
*
end
;
char
*
message
;
...
...
@@ -5630,29 +5629,19 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
message
=
"truncated
\\
UXXXXXXXX escape"
;
hexescape:
chr
=
0
;
if
(
s
+
digits
>
end
)
{
endinpos
=
size
;
if
(
unicode_decode_call_errorhandler
(
errors
,
&
errorHandler
,
"unicodeescape"
,
"end of string in escape sequence"
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
i
))
goto
onError
;
goto
nextByte
;
}
for
(
j
=
0
;
j
<
digits
;
++
j
)
{
c
=
(
unsigned
char
)
s
[
j
];
if
(
!
Py_ISXDIGIT
(
c
))
{
endinpos
=
(
s
+
j
+
1
)
-
starts
;
if
(
unicode_decode_call_errorhandler
(
errors
,
&
errorHandler
,
"unicodeescape"
,
message
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
i
))
goto
onError
;
len
=
PyUnicode_GET_LENGTH
(
v
);
goto
nextByte
;
if
(
end
-
s
<
digits
)
{
/* count only hex digits */
for
(;
s
<
end
;
++
s
)
{
c
=
(
unsigned
char
)
*
s
;
if
(
!
Py_ISXDIGIT
(
c
))
goto
error
;
}
goto
error
;
}
for
(;
digits
--
;
++
s
)
{
c
=
(
unsigned
char
)
*
s
;
if
(
!
Py_ISXDIGIT
(
c
))
goto
error
;
chr
=
(
chr
<<
4
)
&
~
0xF
;
if
(
c
>=
'0'
&&
c
<=
'9'
)
chr
+=
c
-
'0'
;
...
...
@@ -5661,24 +5650,16 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
else
chr
+=
10
+
c
-
'A'
;
}
s
+=
j
;
if
(
chr
==
0xffffffff
&&
PyErr_Occurred
())
/* _decoding_error will have already written into the
target buffer. */
break
;
store:
/* when we get here, chr is a 32-bit unicode character */
if
(
chr
<=
MAX_UNICODE
)
{
WRITECHAR
(
chr
);
}
else
{
endinpos
=
s
-
starts
;
if
(
unicode_decode_call_errorhandler
(
errors
,
&
errorHandler
,
"unicodeescape"
,
"illegal Unicode character"
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
i
))
goto
onError
;
}
message
=
"illegal Unicode character"
;
if
(
chr
>
MAX_UNICODE
)
goto
error
;
WRITECHAR
(
chr
);
break
;
/* \N{name} */
...
...
@@ -5706,26 +5687,13 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
goto
store
;
}
}
endinpos
=
s
-
starts
;
if
(
unicode_decode_call_errorhandler
(
errors
,
&
errorHandler
,
"unicodeescape"
,
message
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
i
))
goto
onError
;
break
;
goto
error
;
default:
if
(
s
>
end
)
{
message
=
"
\\
at end of string"
;
s
--
;
endinpos
=
s
-
starts
;
if
(
unicode_decode_call_errorhandler
(
errors
,
&
errorHandler
,
"unicodeescape"
,
message
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
i
))
goto
onError
;
goto
error
;
}
else
{
WRITECHAR
(
'\\'
);
...
...
@@ -5733,8 +5701,18 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
}
break
;
}
nextByte:
;
continue
;
error:
endinpos
=
s
-
starts
;
if
(
unicode_decode_call_errorhandler
(
errors
,
&
errorHandler
,
"unicodeescape"
,
message
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
i
))
goto
onError
;
len
=
PyUnicode_GET_LENGTH
(
v
);
continue
;
}
#undef WRITECHAR
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment