Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
76df43de
Commit
76df43de
authored
Oct 30, 2012
by
Victor Stinner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #16330: Use surrogate-related macros
Patch written by Serhiy Storchaka.
parent
a5e7cd06
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
25 additions
and
28 deletions
+25
-28
Include/unicodeobject.h
Include/unicodeobject.h
+3
-3
Modules/_json.c
Modules/_json.c
+10
-11
Modules/cjkcodecs/cjkcodecs.h
Modules/cjkcodecs/cjkcodecs.h
+5
-6
Objects/unicodeobject.c
Objects/unicodeobject.c
+3
-4
Python/codecs.c
Python/codecs.c
+2
-2
Python/fileutils.c
Python/fileutils.c
+2
-2
No files found.
Include/unicodeobject.h
View file @
76df43de
...
...
@@ -180,9 +180,9 @@ typedef unsigned char Py_UCS1;
} while (0)
/* macros to work with surrogates */
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <=
ch && ch
<= 0xDFFF)
#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <=
ch && ch
<= 0xDBFF)
#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <=
ch && ch
<= 0xDFFF)
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <=
(ch) && (ch)
<= 0xDFFF)
#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <=
(ch) && (ch)
<= 0xDBFF)
#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <=
(ch) && (ch)
<= 0xDFFF)
/* Join two surrogate characters and return a single Py_UCS4 value. */
#define Py_UNICODE_JOIN_SURROGATES(high, low) \
(((((Py_UCS4)(high) & 0x03FF) << 10) | \
...
...
Modules/_json.c
View file @
76df43de
...
...
@@ -174,14 +174,13 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
default:
if
(
c
>=
0x10000
)
{
/* UTF-16 surrogate pair */
Py_UCS4
v
=
c
-
0x10000
;
c
=
0xd800
|
((
v
>>
10
)
&
0x3ff
);
Py_UCS4
v
=
Py_UNICODE_HIGH_SURROGATE
(
c
);
output
[
chars
++
]
=
'u'
;
output
[
chars
++
]
=
Py_hexdigits
[(
c
>>
12
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
c
>>
8
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
c
>>
4
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
c
)
&
0xf
];
c
=
0xdc00
|
(
v
&
0x3ff
);
output
[
chars
++
]
=
Py_hexdigits
[(
v
>>
12
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
v
>>
8
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
v
>>
4
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
v
)
&
0xf
];
c
=
Py_UNICODE_LOW_SURROGATE
(
c
);
output
[
chars
++
]
=
'\\'
;
}
output
[
chars
++
]
=
'u'
;
...
...
@@ -431,7 +430,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
}
}
/* Surrogate pair */
if
(
(
c
&
0xfc00
)
==
0xd800
)
{
if
(
Py_UNICODE_IS_HIGH_SURROGATE
(
c
)
)
{
Py_UCS4
c2
=
0
;
if
(
end
+
6
>=
len
)
{
raise_errmsg
(
"Unpaired high surrogate"
,
pystr
,
end
-
5
);
...
...
@@ -462,13 +461,13 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
goto
bail
;
}
}
if
(
(
c2
&
0xfc00
)
!=
0xdc00
)
{
if
(
!
Py_UNICODE_IS_LOW_SURROGATE
(
c2
)
)
{
raise_errmsg
(
"Unpaired high surrogate"
,
pystr
,
end
-
5
);
goto
bail
;
}
c
=
0x10000
+
(((
c
-
0xd800
)
<<
10
)
|
(
c2
-
0xdc00
)
);
c
=
Py_UNICODE_JOIN_SURROGATES
(
c
,
c2
);
}
else
if
(
(
c
&
0xfc00
)
==
0xdc00
)
{
else
if
(
Py_UNICODE_IS_LOW_SURROGATE
(
c
)
)
{
raise_errmsg
(
"Unpaired low surrogate"
,
pystr
,
end
-
5
);
goto
bail
;
}
...
...
Modules/cjkcodecs/cjkcodecs.h
View file @
76df43de
...
...
@@ -148,8 +148,8 @@ static const struct dbcs_map *mapping_list;
#if Py_UNICODE_SIZE == 2
# define WRITEUCS4(c) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] =
0xd800 + (((c) - 0x10000) >> 10);
\
(*outbuf)[1] =
0xdc00 + (((c) - 0x10000) & 0x3ff);
\
(*outbuf)[0] =
Py_UNICODE_HIGH_SURROGATE(c);
\
(*outbuf)[1] =
Py_UNICODE_LOW_SURROGATE(c);
\
NEXT_OUT(2)
#else
# define WRITEUCS4(c) \
...
...
@@ -188,11 +188,10 @@ static const struct dbcs_map *mapping_list;
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
if (
c >> 10 == 0xd800 >> 10) {
/* high surrogate */
\
if (
Py_UNICODE_IS_HIGH_SURROGATE(c)) {
\
REQUIRE_INBUF(2) \
if (IN2 >> 10 == 0xdc00 >> 10) {
/* low surrogate */
\
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
((ucs4_t)(IN2) - 0xdc00); \
if (Py_UNICODE_IS_LOW_SURROGATE(IN2)) { \
c = Py_UNICODE_JOIN_SURROGATES(c, IN2) \
} \
}
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
...
...
Objects/unicodeobject.c
View file @
76df43de
...
...
@@ -4412,7 +4412,7 @@ encode_char:
/* code first surrogate */
base64bits
+=
16
;
base64buffer
=
(
base64buffer
<<
16
)
|
0xd800
|
((
ch
-
0x10000
)
>>
10
);
base64buffer
=
(
base64buffer
<<
16
)
|
Py_UNICODE_HIGH_SURROGATE
(
ch
);
while
(
base64bits
>=
6
)
{
*
out
++
=
TO_BASE64
(
base64buffer
>>
(
base64bits
-
6
));
base64bits
-=
6
;
...
...
@@ -7052,9 +7052,8 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
charsize
=
1
;
}
else
{
ch
-=
0x10000
;
chars
[
0
]
=
0xd800
+
(
ch
>>
10
);
chars
[
1
]
=
0xdc00
+
(
ch
&
0x3ff
);
chars
[
0
]
=
Py_UNICODE_HIGH_SURROGATE
(
ch
);
chars
[
1
]
=
Py_UNICODE_LOW_SURROGATE
(
ch
);
charsize
=
2
;
}
...
...
Python/codecs.c
View file @
76df43de
...
...
@@ -761,7 +761,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
for
(
i
=
start
;
i
<
end
;
i
++
)
{
/* object is guaranteed to be "ready" */
Py_UCS4
ch
=
PyUnicode_READ_CHAR
(
object
,
i
);
if
(
ch
<
0xd800
||
ch
>
0xdfff
)
{
if
(
!
Py_UNICODE_IS_SURROGATE
(
ch
)
)
{
/* Not a surrogate, fail with original exception */
PyErr_SetObject
(
PyExceptionInstance_Class
(
exc
),
exc
);
Py_DECREF
(
res
);
...
...
@@ -797,7 +797,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
(
p
[
2
]
&
0xc0
)
==
0x80
))
{
/* it's a three-byte code */
ch
=
((
p
[
0
]
&
0x0f
)
<<
12
)
+
((
p
[
1
]
&
0x3f
)
<<
6
)
+
(
p
[
2
]
&
0x3f
);
if
(
ch
<
0xd800
||
ch
>
0xdfff
)
if
(
!
Py_UNICODE_IS_SURROGATE
(
ch
)
)
/* it's not a surrogate - fail */
ch
=
0
;
}
...
...
Python/fileutils.c
View file @
76df43de
...
...
@@ -85,7 +85,7 @@ _Py_char2wchar(const char* arg, size_t *size)
/* Only use the result if it contains no
surrogate characters. */
for
(
tmp
=
res
;
*
tmp
!=
0
&&
(
*
tmp
<
0xd800
||
*
tmp
>
0xdfff
);
tmp
++
)
!
Py_UNICODE_IS_SURROGATE
(
*
tmp
);
tmp
++
)
;
if
(
*
tmp
==
0
)
{
if
(
size
!=
NULL
)
...
...
@@ -131,7 +131,7 @@ _Py_char2wchar(const char* arg, size_t *size)
memset
(
&
mbs
,
0
,
sizeof
mbs
);
continue
;
}
if
(
*
out
>=
0xd800
&&
*
out
<=
0xdfff
)
{
if
(
Py_UNICODE_IS_SURROGATE
(
*
out
)
)
{
/* Surrogate character. Escape the original
byte sequence with surrogateescape. */
argsize
-=
converted
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment