Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
dcb2a6b3
Commit
dcb2a6b3
authored
Oct 30, 2012
by
Victor Stinner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #16330: Use surrogate-related macros
Patch written by Serhiy Storchaka.
parent
3b798c70
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
25 additions
and
28 deletions
+25
-28
Include/unicodeobject.h
Include/unicodeobject.h
+3
-3
Modules/_json.c
Modules/_json.c
+10
-11
Modules/cjkcodecs/cjkcodecs.h
Modules/cjkcodecs/cjkcodecs.h
+5
-6
Objects/unicodeobject.c
Objects/unicodeobject.c
+3
-4
Python/codecs.c
Python/codecs.c
+2
-2
Python/fileutils.c
Python/fileutils.c
+2
-2
No files found.
Include/unicodeobject.h
View file @
dcb2a6b3
...
@@ -180,9 +180,9 @@ typedef unsigned char Py_UCS1;
...
@@ -180,9 +180,9 @@ typedef unsigned char Py_UCS1;
} while (0)
} while (0)
/* macros to work with surrogates */
/* macros to work with surrogates */
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <=
ch && ch
<= 0xDFFF)
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <=
(ch) && (ch)
<= 0xDFFF)
#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <=
ch && ch
<= 0xDBFF)
#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <=
(ch) && (ch)
<= 0xDBFF)
#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <=
ch && ch
<= 0xDFFF)
#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <=
(ch) && (ch)
<= 0xDFFF)
/* Join two surrogate characters and return a single Py_UCS4 value. */
/* Join two surrogate characters and return a single Py_UCS4 value. */
#define Py_UNICODE_JOIN_SURROGATES(high, low) \
#define Py_UNICODE_JOIN_SURROGATES(high, low) \
(((((Py_UCS4)(high) & 0x03FF) << 10) | \
(((((Py_UCS4)(high) & 0x03FF) << 10) | \
...
...
Modules/_json.c
View file @
dcb2a6b3
...
@@ -174,14 +174,13 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
...
@@ -174,14 +174,13 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
default:
default:
if
(
c
>=
0x10000
)
{
if
(
c
>=
0x10000
)
{
/* UTF-16 surrogate pair */
/* UTF-16 surrogate pair */
Py_UCS4
v
=
c
-
0x10000
;
Py_UCS4
v
=
Py_UNICODE_HIGH_SURROGATE
(
c
);
c
=
0xd800
|
((
v
>>
10
)
&
0x3ff
);
output
[
chars
++
]
=
'u'
;
output
[
chars
++
]
=
'u'
;
output
[
chars
++
]
=
Py_hexdigits
[(
c
>>
12
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
v
>>
12
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
c
>>
8
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
v
>>
8
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
c
>>
4
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
v
>>
4
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
c
)
&
0xf
];
output
[
chars
++
]
=
Py_hexdigits
[(
v
)
&
0xf
];
c
=
0xdc00
|
(
v
&
0x3ff
);
c
=
Py_UNICODE_LOW_SURROGATE
(
c
);
output
[
chars
++
]
=
'\\'
;
output
[
chars
++
]
=
'\\'
;
}
}
output
[
chars
++
]
=
'u'
;
output
[
chars
++
]
=
'u'
;
...
@@ -431,7 +430,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
...
@@ -431,7 +430,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
}
}
}
}
/* Surrogate pair */
/* Surrogate pair */
if
(
(
c
&
0xfc00
)
==
0xd800
)
{
if
(
Py_UNICODE_IS_HIGH_SURROGATE
(
c
)
)
{
Py_UCS4
c2
=
0
;
Py_UCS4
c2
=
0
;
if
(
end
+
6
>=
len
)
{
if
(
end
+
6
>=
len
)
{
raise_errmsg
(
"Unpaired high surrogate"
,
pystr
,
end
-
5
);
raise_errmsg
(
"Unpaired high surrogate"
,
pystr
,
end
-
5
);
...
@@ -462,13 +461,13 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
...
@@ -462,13 +461,13 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
goto
bail
;
goto
bail
;
}
}
}
}
if
(
(
c2
&
0xfc00
)
!=
0xdc00
)
{
if
(
!
Py_UNICODE_IS_LOW_SURROGATE
(
c2
)
)
{
raise_errmsg
(
"Unpaired high surrogate"
,
pystr
,
end
-
5
);
raise_errmsg
(
"Unpaired high surrogate"
,
pystr
,
end
-
5
);
goto
bail
;
goto
bail
;
}
}
c
=
0x10000
+
(((
c
-
0xd800
)
<<
10
)
|
(
c2
-
0xdc00
)
);
c
=
Py_UNICODE_JOIN_SURROGATES
(
c
,
c2
);
}
}
else
if
(
(
c
&
0xfc00
)
==
0xdc00
)
{
else
if
(
Py_UNICODE_IS_LOW_SURROGATE
(
c
)
)
{
raise_errmsg
(
"Unpaired low surrogate"
,
pystr
,
end
-
5
);
raise_errmsg
(
"Unpaired low surrogate"
,
pystr
,
end
-
5
);
goto
bail
;
goto
bail
;
}
}
...
...
Modules/cjkcodecs/cjkcodecs.h
View file @
dcb2a6b3
...
@@ -148,8 +148,8 @@ static const struct dbcs_map *mapping_list;
...
@@ -148,8 +148,8 @@ static const struct dbcs_map *mapping_list;
#if Py_UNICODE_SIZE == 2
#if Py_UNICODE_SIZE == 2
# define WRITEUCS4(c) \
# define WRITEUCS4(c) \
REQUIRE_OUTBUF(2) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] =
0xd800 + (((c) - 0x10000) >> 10);
\
(*outbuf)[0] =
Py_UNICODE_HIGH_SURROGATE(c);
\
(*outbuf)[1] =
0xdc00 + (((c) - 0x10000) & 0x3ff);
\
(*outbuf)[1] =
Py_UNICODE_LOW_SURROGATE(c);
\
NEXT_OUT(2)
NEXT_OUT(2)
#else
#else
# define WRITEUCS4(c) \
# define WRITEUCS4(c) \
...
@@ -188,11 +188,10 @@ static const struct dbcs_map *mapping_list;
...
@@ -188,11 +188,10 @@ static const struct dbcs_map *mapping_list;
#if Py_UNICODE_SIZE == 2
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
#define DECODE_SURROGATE(c) \
if (
c >> 10 == 0xd800 >> 10) {
/* high surrogate */
\
if (
Py_UNICODE_IS_HIGH_SURROGATE(c)) {
\
REQUIRE_INBUF(2) \
REQUIRE_INBUF(2) \
if (IN2 >> 10 == 0xdc00 >> 10) {
/* low surrogate */
\
if (Py_UNICODE_IS_LOW_SURROGATE(IN2)) { \
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
c = Py_UNICODE_JOIN_SURROGATES(c, IN2) \
((ucs4_t)(IN2) - 0xdc00); \
} \
} \
}
}
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
...
...
Objects/unicodeobject.c
View file @
dcb2a6b3
...
@@ -4412,7 +4412,7 @@ encode_char:
...
@@ -4412,7 +4412,7 @@ encode_char:
/* code first surrogate */
/* code first surrogate */
base64bits
+=
16
;
base64bits
+=
16
;
base64buffer
=
(
base64buffer
<<
16
)
|
0xd800
|
((
ch
-
0x10000
)
>>
10
);
base64buffer
=
(
base64buffer
<<
16
)
|
Py_UNICODE_HIGH_SURROGATE
(
ch
);
while
(
base64bits
>=
6
)
{
while
(
base64bits
>=
6
)
{
*
out
++
=
TO_BASE64
(
base64buffer
>>
(
base64bits
-
6
));
*
out
++
=
TO_BASE64
(
base64buffer
>>
(
base64bits
-
6
));
base64bits
-=
6
;
base64bits
-=
6
;
...
@@ -7052,9 +7052,8 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
...
@@ -7052,9 +7052,8 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
charsize
=
1
;
charsize
=
1
;
}
}
else
{
else
{
ch
-=
0x10000
;
chars
[
0
]
=
Py_UNICODE_HIGH_SURROGATE
(
ch
);
chars
[
0
]
=
0xd800
+
(
ch
>>
10
);
chars
[
1
]
=
Py_UNICODE_LOW_SURROGATE
(
ch
);
chars
[
1
]
=
0xdc00
+
(
ch
&
0x3ff
);
charsize
=
2
;
charsize
=
2
;
}
}
...
...
Python/codecs.c
View file @
dcb2a6b3
...
@@ -761,7 +761,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
...
@@ -761,7 +761,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
for
(
i
=
start
;
i
<
end
;
i
++
)
{
for
(
i
=
start
;
i
<
end
;
i
++
)
{
/* object is guaranteed to be "ready" */
/* object is guaranteed to be "ready" */
Py_UCS4
ch
=
PyUnicode_READ_CHAR
(
object
,
i
);
Py_UCS4
ch
=
PyUnicode_READ_CHAR
(
object
,
i
);
if
(
ch
<
0xd800
||
ch
>
0xdfff
)
{
if
(
!
Py_UNICODE_IS_SURROGATE
(
ch
)
)
{
/* Not a surrogate, fail with original exception */
/* Not a surrogate, fail with original exception */
PyErr_SetObject
(
PyExceptionInstance_Class
(
exc
),
exc
);
PyErr_SetObject
(
PyExceptionInstance_Class
(
exc
),
exc
);
Py_DECREF
(
res
);
Py_DECREF
(
res
);
...
@@ -797,7 +797,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
...
@@ -797,7 +797,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
(
p
[
2
]
&
0xc0
)
==
0x80
))
{
(
p
[
2
]
&
0xc0
)
==
0x80
))
{
/* it's a three-byte code */
/* it's a three-byte code */
ch
=
((
p
[
0
]
&
0x0f
)
<<
12
)
+
((
p
[
1
]
&
0x3f
)
<<
6
)
+
(
p
[
2
]
&
0x3f
);
ch
=
((
p
[
0
]
&
0x0f
)
<<
12
)
+
((
p
[
1
]
&
0x3f
)
<<
6
)
+
(
p
[
2
]
&
0x3f
);
if
(
ch
<
0xd800
||
ch
>
0xdfff
)
if
(
!
Py_UNICODE_IS_SURROGATE
(
ch
)
)
/* it's not a surrogate - fail */
/* it's not a surrogate - fail */
ch
=
0
;
ch
=
0
;
}
}
...
...
Python/fileutils.c
View file @
dcb2a6b3
...
@@ -85,7 +85,7 @@ _Py_char2wchar(const char* arg, size_t *size)
...
@@ -85,7 +85,7 @@ _Py_char2wchar(const char* arg, size_t *size)
/* Only use the result if it contains no
/* Only use the result if it contains no
surrogate characters. */
surrogate characters. */
for
(
tmp
=
res
;
*
tmp
!=
0
&&
for
(
tmp
=
res
;
*
tmp
!=
0
&&
(
*
tmp
<
0xd800
||
*
tmp
>
0xdfff
);
tmp
++
)
!
Py_UNICODE_IS_SURROGATE
(
*
tmp
);
tmp
++
)
;
;
if
(
*
tmp
==
0
)
{
if
(
*
tmp
==
0
)
{
if
(
size
!=
NULL
)
if
(
size
!=
NULL
)
...
@@ -131,7 +131,7 @@ _Py_char2wchar(const char* arg, size_t *size)
...
@@ -131,7 +131,7 @@ _Py_char2wchar(const char* arg, size_t *size)
memset
(
&
mbs
,
0
,
sizeof
mbs
);
memset
(
&
mbs
,
0
,
sizeof
mbs
);
continue
;
continue
;
}
}
if
(
*
out
>=
0xd800
&&
*
out
<=
0xdfff
)
{
if
(
Py_UNICODE_IS_SURROGATE
(
*
out
)
)
{
/* Surrogate character. Escape the original
/* Surrogate character. Escape the original
byte sequence with surrogateescape. */
byte sequence with surrogateescape. */
argsize
-=
converted
;
argsize
-=
converted
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment