Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
687ff0ec
Commit
687ff0ec
authored
Nov 26, 2013
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Plain Diff
Issue #11489: JSON decoder now accepts lone surrogates.
parents
1df88677
c93329b3
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
73 additions
and
41 deletions
+73
-41
Lib/json/decoder.py
Lib/json/decoder.py
+17
-18
Lib/test/test_json/test_scanstring.py
Lib/test/test_json/test_scanstring.py
+47
-4
Misc/NEWS
Misc/NEWS
+2
-0
Modules/_json.c
Modules/_json.c
+7
-19
No files found.
Lib/json/decoder.py
View file @
687ff0ec
...
...
@@ -58,6 +58,16 @@ BACKSLASH = {
'b'
:
'
\
b
'
,
'f'
:
'
\
f
'
,
'n'
:
'
\
n
'
,
'r'
:
'
\
r
'
,
't'
:
'
\
t
'
,
}
def
_decode_uXXXX
(
s
,
pos
):
esc
=
s
[
pos
+
1
:
pos
+
5
]
if
len
(
esc
)
==
4
and
esc
[
1
]
not
in
'xX'
:
try
:
return
int
(
esc
,
16
)
except
ValueError
:
pass
msg
=
"Invalid
\
\
uXXXX escape"
raise
ValueError
(
errmsg
(
msg
,
s
,
pos
))
def
py_scanstring
(
s
,
end
,
strict
=
True
,
_b
=
BACKSLASH
,
_m
=
STRINGCHUNK
.
match
):
"""Scan the string s for a JSON string. End is the index of the
...
...
@@ -107,25 +117,14 @@ def py_scanstring(s, end, strict=True,
raise
ValueError
(
errmsg
(
msg
,
s
,
end
))
end
+=
1
else
:
esc
=
s
[
end
+
1
:
end
+
5
]
next_end
=
end
+
5
if
len
(
esc
)
!=
4
:
msg
=
"Invalid
\
\
uXXXX escape"
raise
ValueError
(
errmsg
(
msg
,
s
,
end
))
uni
=
int
(
esc
,
16
)
if
0xd800
<=
uni
<=
0xdbff
:
msg
=
"Invalid
\
\
uXXXX
\
\
uXXXX surrogate pair"
if
not
s
[
end
+
5
:
end
+
7
]
==
'
\
\
u'
:
raise
ValueError
(
errmsg
(
msg
,
s
,
end
))
esc2
=
s
[
end
+
7
:
end
+
11
]
if
len
(
esc2
)
!=
4
:
raise
ValueError
(
errmsg
(
msg
,
s
,
end
))
uni2
=
int
(
esc2
,
16
)
uni
=
0x10000
+
(((
uni
-
0xd800
)
<<
10
)
|
(
uni2
-
0xdc00
))
next_end
+=
6
uni
=
_decode_uXXXX
(
s
,
end
)
end
+=
5
if
0xd800
<=
uni
<=
0xdbff
and
s
[
end
:
end
+
2
]
==
'
\
\
u'
:
uni2
=
_decode_uXXXX
(
s
,
end
+
1
)
if
0xdc00
<=
uni2
<=
0xdfff
:
uni
=
0x10000
+
(((
uni
-
0xd800
)
<<
10
)
|
(
uni2
-
0xdc00
))
end
+=
6
char
=
chr
(
uni
)
end
=
next_end
_append
(
char
)
return
''
.
join
(
chunks
),
end
...
...
Lib/test/test_json/test_scanstring.py
View file @
687ff0ec
...
...
@@ -5,10 +5,6 @@ from test.test_json import PyTest, CTest
class
TestScanstring
:
def
test_scanstring
(
self
):
scanstring
=
self
.
json
.
decoder
.
scanstring
self
.
assertEqual
(
scanstring
(
'"z
\
\
ud834
\
\
udd20x"'
,
1
,
True
),
(
'z
\
U0001d120
x'
,
16
))
self
.
assertEqual
(
scanstring
(
'"z
\
U0001d120
x"'
,
1
,
True
),
(
'z
\
U0001d120
x'
,
5
))
...
...
@@ -89,6 +85,53 @@ class TestScanstring:
scanstring
(
'["Bad value", truth]'
,
2
,
True
),
(
'Bad value'
,
12
))
def
test_surrogates
(
self
):
scanstring
=
self
.
json
.
decoder
.
scanstring
def
assertScan
(
given
,
expect
):
self
.
assertEqual
(
scanstring
(
given
,
1
,
True
),
(
expect
,
len
(
given
)))
assertScan
(
'"z
\
\
ud834
\
\
u0079x"'
,
'z
\
ud834
yx'
)
assertScan
(
'"z
\
\
ud834
\
\
udd20x"'
,
'z
\
U0001d120
x'
)
assertScan
(
'"z
\
\
ud834
\
\
ud834
\
\
udd20x"'
,
'z
\
ud834
\
U0001d120
x'
)
assertScan
(
'"z
\
\
ud834x"'
,
'z
\
ud834
x'
)
assertScan
(
'"z
\
\
ud834
\
udd20
x12345"'
,
'z
\
ud834
\
udd20
x12345'
)
assertScan
(
'"z
\
\
udd20x"'
,
'z
\
udd20
x'
)
assertScan
(
'"z
\
ud834
\
udd20
x"'
,
'z
\
ud834
\
udd20
x'
)
assertScan
(
'"z
\
ud834
\
\
udd20x"'
,
'z
\
ud834
\
udd20
x'
)
assertScan
(
'"z
\
ud834
x"'
,
'z
\
ud834
x'
)
def
test_bad_escapes
(
self
):
scanstring
=
self
.
json
.
decoder
.
scanstring
bad_escapes
=
[
'"
\
\
"'
,
'"
\
\
x"'
,
'"
\
\
u"'
,
'"
\
\
u0"'
,
'"
\
\
u01"'
,
'"
\
\
u012"'
,
'"
\
\
uz012"'
,
'"
\
\
u0z12"'
,
'"
\
\
u01z2"'
,
'"
\
\
u012z"'
,
'"
\
\
u0x12"'
,
'"
\
\
u0X12"'
,
'"
\
\
ud834
\
\
"'
,
'"
\
\
ud834
\
\
u"'
,
'"
\
\
ud834
\
\
ud"'
,
'"
\
\
ud834
\
\
udd"'
,
'"
\
\
ud834
\
\
udd2"'
,
'"
\
\
ud834
\
\
uzdd2"'
,
'"
\
\
ud834
\
\
udzd2"'
,
'"
\
\
ud834
\
\
uddz2"'
,
'"
\
\
ud834
\
\
udd2z"'
,
'"
\
\
ud834
\
\
u0x20"'
,
'"
\
\
ud834
\
\
u0X20"'
,
]
for
s
in
bad_escapes
:
with
self
.
assertRaises
(
ValueError
,
msg
=
s
):
scanstring
(
s
,
1
,
True
)
def
test_overflow
(
self
):
with
self
.
assertRaises
(
OverflowError
):
self
.
json
.
decoder
.
scanstring
(
b"xxx"
,
sys
.
maxsize
+
1
)
...
...
Misc/NEWS
View file @
687ff0ec
...
...
@@ -16,6 +16,8 @@ Core and Builtins
Library
-------
- Issue #11489: JSON decoder now accepts lone surrogates.
- Issue #19545: Avoid chained exceptions while passing stray % to
time.strptime(). Initial patch by Claudiu Popa.
...
...
Modules/_json.c
View file @
687ff0ec
...
...
@@ -409,17 +409,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
}
}
/* Surrogate pair */
if
(
Py_UNICODE_IS_HIGH_SURROGATE
(
c
))
{
if
(
Py_UNICODE_IS_HIGH_SURROGATE
(
c
)
&&
end
+
6
<
len
&&
PyUnicode_READ
(
kind
,
buf
,
next
++
)
==
'\\'
&&
PyUnicode_READ
(
kind
,
buf
,
next
++
)
==
'u'
)
{
Py_UCS4
c2
=
0
;
if
(
end
+
6
>=
len
)
{
raise_errmsg
(
"Unpaired high surrogate"
,
pystr
,
end
-
5
);
goto
bail
;
}
if
(
PyUnicode_READ
(
kind
,
buf
,
next
++
)
!=
'\\'
||
PyUnicode_READ
(
kind
,
buf
,
next
++
)
!=
'u'
)
{
raise_errmsg
(
"Unpaired high surrogate"
,
pystr
,
end
-
5
);
goto
bail
;
}
end
+=
6
;
/* Decode 4 hex digits */
for
(;
next
<
end
;
next
++
)
{
...
...
@@ -440,15 +433,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
goto
bail
;
}
}
if
(
!
Py_UNICODE_IS_LOW_SURROGATE
(
c2
))
{
raise_errmsg
(
"Unpaired high surrogate"
,
pystr
,
end
-
5
);
goto
bail
;
}
c
=
Py_UNICODE_JOIN_SURROGATES
(
c
,
c2
);
}
else
if
(
Py_UNICODE_IS_LOW_SURROGATE
(
c
))
{
raise_errmsg
(
"Unpaired low surrogate"
,
pystr
,
end
-
5
);
goto
bail
;
if
(
Py_UNICODE_IS_LOW_SURROGATE
(
c2
))
c
=
Py_UNICODE_JOIN_SURROGATES
(
c
,
c2
);
else
end
-=
6
;
}
}
APPEND_OLD_CHUNK
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment