Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
d679377b
Commit
d679377b
authored
Jan 29, 2013
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder.
parent
8e0ae2a4
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
117 additions
and
53 deletions
+117
-53
Lib/test/test_codeccallbacks.py
Lib/test/test_codeccallbacks.py
+2
-2
Lib/test/test_codecs.py
Lib/test/test_codecs.py
+85
-0
Misc/NEWS
Misc/NEWS
+2
-0
Objects/unicodeobject.c
Objects/unicodeobject.c
+28
-51
No files found.
Lib/test/test_codeccallbacks.py
View file @
d679377b
...
...
@@ -262,12 +262,12 @@ class CodecCallbackTest(unittest.TestCase):
self
.
assertEqual
(
b"
\
\
u3042
\
u
3
xxx"
.
decode
(
"unicode-escape"
,
"test.handler1"
),
"
\
u3042
[<92><117><51>
<120>]
xx"
"
\
u3042
[<92><117><51>
]x
xx"
)
self
.
assertEqual
(
b"
\
\
u3042
\
u
3
xx"
.
decode
(
"unicode-escape"
,
"test.handler1"
),
"
\
u3042
[<92><117><51>
<120><120>]
"
"
\
u3042
[<92><117><51>
]xx
"
)
self
.
assertEqual
(
...
...
Lib/test/test_codecs.py
View file @
d679377b
...
...
@@ -4,6 +4,11 @@ import codecs
import
locale
import
sys
,
_testcapi
,
io
def
coding_checker
(
self
,
coder
):
def
check
(
input
,
expect
):
self
.
assertEqual
(
coder
(
input
),
(
expect
,
len
(
input
)))
return
check
class
Queue
(
object
):
"""
queue: write bytes at one end, read bytes from the other end
...
...
@@ -1846,6 +1851,85 @@ class TypesTest(unittest.TestCase):
self
.
assertEqual
(
codecs
.
raw_unicode_escape_decode
(
r"\u1234"
),
(
"
\
u1234
"
,
6
))
self
.
assertEqual
(
codecs
.
raw_unicode_escape_decode
(
br"\u1234"
),
(
"
\
u1234
"
,
6
))
class
UnicodeEscapeTest
(
unittest
.
TestCase
):
def
test_empty
(
self
):
self
.
assertEqual
(
codecs
.
unicode_escape_encode
(
""
),
(
b""
,
0
))
self
.
assertEqual
(
codecs
.
unicode_escape_decode
(
b""
),
(
""
,
0
))
def
test_raw_encode
(
self
):
encode
=
codecs
.
unicode_escape_encode
for
b
in
range
(
32
,
127
):
if
b
!=
b'
\
\
'
[
0
]:
self
.
assertEqual
(
encode
(
chr
(
b
)),
(
bytes
([
b
]),
1
))
def
test_raw_decode
(
self
):
decode
=
codecs
.
unicode_escape_decode
for
b
in
range
(
256
):
if
b
!=
b'
\
\
'
[
0
]:
self
.
assertEqual
(
decode
(
bytes
([
b
])
+
b'0'
),
(
chr
(
b
)
+
'0'
,
2
))
def
test_escape_encode
(
self
):
encode
=
codecs
.
unicode_escape_encode
check
=
coding_checker
(
self
,
encode
)
check
(
'
\
t
'
,
br'\t'
)
check
(
'
\
n
'
,
br'\n'
)
check
(
'
\
r
'
,
br'\r'
)
check
(
'
\
\
'
,
br'\\'
)
for
b
in
range
(
32
):
if
chr
(
b
)
not
in
'
\
t
\
n
\
r
'
:
check
(
chr
(
b
),
(
'
\
\
x%02x'
%
b
).
encode
())
for
b
in
range
(
127
,
256
):
check
(
chr
(
b
),
(
'
\
\
x%02x'
%
b
).
encode
())
check
(
'
\
u20ac
'
,
br'\u20ac'
)
check
(
'
\
U0001d120
'
,
br'\U0001d120'
)
def
test_escape_decode
(
self
):
decode
=
codecs
.
unicode_escape_decode
check
=
coding_checker
(
self
,
decode
)
check
(
b"[
\
\
\
n
]"
,
"[]"
)
check
(
br'[\"]'
,
'["]'
)
check
(
br"[\']"
,
"[']"
)
check
(
br"[\\]"
,
r"[\
]
")
check(br"
[
\
a
]
", "
[
\
x07
]
")
check(br"
[
\
b
]
", "
[
\
x08
]
")
check(br"
[
\
t
]
", "
[
\
x09
]
")
check(br"
[
\
n
]
", "
[
\
x0a
]
")
check(br"
[
\
v
]
", "
[
\
x0b
]
")
check(br"
[
\
f
]
", "
[
\
x0c
]
")
check(br"
[
\
r
]
", "
[
\
x0d
]
")
check(br"
[
\
7
]
", "
[
\
x07
]
")
check(br"
[
\
8
]
", r"
[
\
8
]
")
check(br"
[
\
78
]
", "
[
\
x078
]
")
check(br"
[
\
41
]
", "
[
!
]
")
check(br"
[
\
418
]
", "
[
!
8
]
")
check(br"
[
\
101
]
", "
[
A
]
")
check(br"
[
\
1010
]
", "
[
A0
]
")
check(br"
[
\
x41
]
", "
[
A
]
")
check(br"
[
\
x410
]
", "
[
A0
]
")
check(br"
\
u20ac
", "
\
u20ac
")
check(br"
\
U0001d120
", "
\
U0001d120
")
for b in range(256):
if b not in b'
\
n
"
\
'
\
\
abtnvfr01234567xuUN'
:
check
(
b'
\
\
'
+
bytes
([
b
]),
'
\
\
'
+
chr
(
b
))
def
test_decode_errors
(
self
):
decode
=
codecs
.
unicode_escape_decode
for
c
,
d
in
(
b'x'
,
2
),
(
b'u'
,
4
),
(
b'U'
,
4
):
for
i
in
range
(
d
):
self
.
assertRaises
(
UnicodeDecodeError
,
decode
,
b"
\
\
"
+
c
+
b"0"
*
i
)
self
.
assertRaises
(
UnicodeDecodeError
,
decode
,
b"[
\
\
"
+
c
+
b"0"
*
i
+
b"]"
)
data
=
b"[
\
\
"
+
c
+
b"0"
*
i
+
b"]
\
\
"
+
c
+
b"0"
*
i
self
.
assertEqual
(
decode
(
data
,
"ignore"
),
(
"[]"
,
len
(
data
)))
self
.
assertEqual
(
decode
(
data
,
"replace"
),
(
"[
\
ufffd
]
\
ufffd
"
,
len
(
data
)))
self
.
assertRaises
(
UnicodeDecodeError
,
decode
,
br"\U00110000"
)
self
.
assertEqual
(
decode
(
br"\U00110000"
,
"ignore"
),
(
""
,
10
))
self
.
assertEqual
(
decode
(
br"\U00110000"
,
"replace"
),
(
"
\
ufffd
"
,
10
))
class
SurrogateEscapeTest
(
unittest
.
TestCase
):
def
test_utf8
(
self
):
...
...
@@ -2011,6 +2095,7 @@ def test_main():
CharmapTest
,
WithStmtTest
,
TypesTest
,
UnicodeEscapeTest
,
SurrogateEscapeTest
,
BomTest
,
TransformCodecTest
,
...
...
Misc/NEWS
View file @
d679377b
...
...
@@ -214,6 +214,8 @@ Core and Builtins
Library
-------
- Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder.
- Issue #9290: In IDLE the sys.std* streams now implement io.TextIOBase
interface and support all mandatory methods and properties.
...
...
Objects/unicodeobject.c
View file @
d679377b
...
...
@@ -3760,7 +3760,6 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
Py_ssize_t
startinpos
;
Py_ssize_t
endinpos
;
Py_ssize_t
outpos
;
int
i
;
PyUnicodeObject
*
v
;
Py_UNICODE
*
p
;
const
char
*
end
;
...
...
@@ -3846,29 +3845,19 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
message
=
"truncated
\\
UXXXXXXXX escape"
;
hexescape:
chr
=
0
;
outpos
=
p
-
PyUnicode_AS_UNICODE
(
v
);
if
(
s
+
digits
>
end
)
{
endinpos
=
size
;
if
(
unicode_decode_call_errorhandler
(
errors
,
&
errorHandler
,
"unicodeescape"
,
"end of string in escape sequence"
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
outpos
,
&
p
))
goto
onError
;
goto
nextByte
;
if
(
end
-
s
<
digits
)
{
/* count only hex digits */
for
(;
s
<
end
;
++
s
)
{
c
=
(
unsigned
char
)
*
s
;
if
(
!
Py_ISXDIGIT
(
c
))
goto
error
;
}
for
(
i
=
0
;
i
<
digits
;
++
i
)
{
c
=
(
unsigned
char
)
s
[
i
];
if
(
!
Py_ISXDIGIT
(
c
))
{
endinpos
=
(
s
+
i
+
1
)
-
starts
;
if
(
unicode_decode_call_errorhandler
(
errors
,
&
errorHandler
,
"unicodeescape"
,
message
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
outpos
,
&
p
))
goto
onError
;
goto
nextByte
;
goto
error
;
}
for
(;
digits
--
;
++
s
)
{
c
=
(
unsigned
char
)
*
s
;
if
(
!
Py_ISXDIGIT
(
c
))
goto
error
;
chr
=
(
chr
<<
4
)
&
~
0xF
;
if
(
c
>=
'0'
&&
c
<=
'9'
)
chr
+=
c
-
'0'
;
...
...
@@ -3877,7 +3866,6 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
else
chr
+=
10
+
c
-
'A'
;
}
s
+=
i
;
if
(
chr
==
0xffffffff
&&
PyErr_Occurred
())
/* _decoding_error will have already written into the
target buffer. */
...
...
@@ -3898,14 +3886,8 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
*
p
++
=
0xDC00
+
(
Py_UNICODE
)
(
chr
&
0x03FF
);
#endif
}
else
{
endinpos
=
s
-
starts
;
outpos
=
p
-
PyUnicode_AS_UNICODE
(
v
);
if
(
unicode_decode_call_errorhandler
(
errors
,
&
errorHandler
,
"unicodeescape"
,
"illegal Unicode character"
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
outpos
,
&
p
))
goto
onError
;
message
=
"illegal Unicode character"
;
goto
error
;
}
break
;
...
...
@@ -3932,20 +3914,23 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
goto
store
;
}
}
endinpos
=
s
-
starts
;
outpos
=
p
-
PyUnicode_AS_UNICODE
(
v
);
if
(
unicode_decode_call_errorhandler
(
errors
,
&
errorHandler
,
"unicodeescape"
,
message
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
outpos
,
&
p
))
goto
onError
;
break
;
goto
error
;
default:
if
(
s
>
end
)
{
message
=
"
\\
at end of string"
;
s
--
;
goto
error
;
}
else
{
*
p
++
=
'\\'
;
*
p
++
=
(
unsigned
char
)
s
[
-
1
];
}
break
;
}
continue
;
error:
endinpos
=
s
-
starts
;
outpos
=
p
-
PyUnicode_AS_UNICODE
(
v
);
if
(
unicode_decode_call_errorhandler
(
...
...
@@ -3954,15 +3939,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
v
,
&
outpos
,
&
p
))
goto
onError
;
}
else
{
*
p
++
=
'\\'
;
*
p
++
=
(
unsigned
char
)
s
[
-
1
];
}
break
;
}
nextByte:
;
continue
;
}
if
(
_PyUnicode_Resize
(
&
v
,
p
-
PyUnicode_AS_UNICODE
(
v
))
<
0
)
goto
onError
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment