Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
8faf8216
Commit
8faf8216
authored
Dec 08, 2011
by
Victor Stinner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
PyUnicode_FromWideChar() and PyUnicode_FromUnicode() raise a ValueError if a
character in not in range [U+0000; U+10ffff].
parent
bc9f0c68
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
34 additions
and
33 deletions
+34
-33
Objects/unicodeobject.c
Objects/unicodeobject.c
+34
-33
No files found.
Objects/unicodeobject.c
View file @
8faf8216
...
...
@@ -66,6 +66,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
extern
"C"
{
#endif
/* Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) */
#define MAX_UNICODE 0x10ffff
#ifdef Py_DEBUG
# define _PyUnicode_CHECK(op) _PyUnicode_CheckConsistency(op, 0)
#else
...
...
@@ -393,9 +396,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
}
else
{
assert
(
maxchar
>=
0x10000
);
/* FIXME: Issue #13441: on Solaris, localeconv() and strxfrm()
return characters outside the range U+0000-U+10FFFF. */
/* assert(maxchar <= 0x10FFFF); */
assert
(
maxchar
<=
MAX_UNICODE
);
}
}
return
1
;
...
...
@@ -1295,36 +1296,37 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
Py_UCS4
*
maxchar
,
Py_ssize_t
*
num_surrogates
)
{
const
wchar_t
*
iter
;
Py_UCS4
ch
;
assert
(
num_surrogates
!=
NULL
&&
maxchar
!=
NULL
);
*
num_surrogates
=
0
;
*
maxchar
=
0
;
for
(
iter
=
begin
;
iter
<
end
;
)
{
if
(
*
iter
>
*
maxchar
)
{
*
maxchar
=
*
iter
;
#if SIZEOF_WCHAR_T != 2
if
(
*
maxchar
>=
0x10000
)
return
0
;
#endif
}
#if SIZEOF_WCHAR_T == 2
if
(
Py_UNICODE_IS_HIGH_SURROGATE
(
iter
[
0
])
&&
(
iter
+
1
)
<
end
&&
Py_UNICODE_IS_LOW_SURROGATE
(
iter
[
1
]))
{
Py_UCS4
surrogate_val
;
surrogate_val
=
Py_UNICODE_JOIN_SURROGATES
(
iter
[
0
],
iter
[
1
]);
ch
=
Py_UNICODE_JOIN_SURROGATES
(
iter
[
0
],
iter
[
1
]);
++
(
*
num_surrogates
);
if
(
surrogate_val
>
*
maxchar
)
*
maxchar
=
surrogate_val
;
iter
+=
2
;
}
else
iter
++
;
#else
iter
++
;
#endif
{
ch
=
*
iter
;
iter
++
;
}
if
(
ch
>
*
maxchar
)
{
*
maxchar
=
ch
;
if
(
*
maxchar
>
MAX_UNICODE
)
{
PyErr_Format
(
PyExc_ValueError
,
"character U+%x is not in range [U+0000; U+10ffff]"
,
ch
);
return
-
1
;
}
}
}
return
0
;
}
...
...
@@ -1669,8 +1671,7 @@ PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
&
maxchar
,
&
num_surrogates
)
==
-
1
)
return
NULL
;
unicode
=
PyUnicode_New
(
size
-
num_surrogates
,
maxchar
);
unicode
=
PyUnicode_New
(
size
-
num_surrogates
,
maxchar
);
if
(
!
unicode
)
return
NULL
;
...
...
@@ -1808,7 +1809,7 @@ kind_maxchar_limit(unsigned int kind)
return
0x10000
;
default:
assert
(
0
&&
"invalid kind"
);
return
0x10ffff
;
return
MAX_UNICODE
;
}
}
...
...
@@ -2796,7 +2797,7 @@ PyObject *
PyUnicode_FromOrdinal
(
int
ordinal
)
{
PyObject
*
v
;
if
(
ordinal
<
0
||
ordinal
>
0x10ffff
)
{
if
(
ordinal
<
0
||
ordinal
>
MAX_UNICODE
)
{
PyErr_SetString
(
PyExc_ValueError
,
"chr() arg not in range(0x110000)"
);
return
NULL
;
...
...
@@ -3472,7 +3473,7 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
four_bytes
=
PyUnicode_4BYTE_DATA
(
unicode
);
for
(;
four_bytes
<
ucs4_end
;
++
four_bytes
,
++
w
)
{
if
(
*
four_bytes
>
0xFFFF
)
{
assert
(
*
four_bytes
<=
0x10FFFF
);
assert
(
*
four_bytes
<=
MAX_UNICODE
);
/* encode surrogate pair in this case */
*
w
++
=
Py_UNICODE_HIGH_SURROGATE
(
*
four_bytes
);
*
w
=
Py_UNICODE_LOW_SURROGATE
(
*
four_bytes
);
...
...
@@ -4118,7 +4119,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
continue
;
encode_char:
if
(
ch
>=
0x10000
)
{
assert
(
ch
<=
0x10FFFF
);
assert
(
ch
<=
MAX_UNICODE
);
/* code first surrogate */
base64bits
+=
16
;
...
...
@@ -4577,7 +4578,7 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
}
ch
=
((
s
[
0
]
&
0x7
)
<<
18
)
+
((
s
[
1
]
&
0x3f
)
<<
12
)
+
((
s
[
2
]
&
0x3f
)
<<
6
)
+
(
s
[
3
]
&
0x3f
);
assert
((
ch
>
0xFFFF
)
&&
(
ch
<=
0x10ffff
));
assert
((
ch
>
0xFFFF
)
&&
(
ch
<=
MAX_UNICODE
));
WRITE_MAYBE_FAIL
(
i
++
,
ch
);
break
;
...
...
@@ -4714,7 +4715,7 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
}
ch
=
((
s
[
0
]
&
0x7
)
<<
18
)
+
((
s
[
1
]
&
0x3f
)
<<
12
)
+
((
s
[
2
]
&
0x3f
)
<<
6
)
+
(
s
[
3
]
&
0x3f
);
assert
((
ch
>
0xFFFF
)
&&
(
ch
<=
0x10ffff
));
assert
((
ch
>
0xFFFF
)
&&
(
ch
<=
MAX_UNICODE
));
#if SIZEOF_WCHAR_T == 4
*
p
++
=
(
wchar_t
)
ch
;
...
...
@@ -4884,7 +4885,7 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
*
p
++
=
(
char
)(
0x80
|
((
ch
>>
6
)
&
0x3f
));
*
p
++
=
(
char
)(
0x80
|
(
ch
&
0x3f
));
}
else
/* ch >= 0x10000 */
{
assert
(
ch
<=
0x10FFFF
);
assert
(
ch
<=
MAX_UNICODE
);
/* Encode UCS4 Unicode ordinals */
*
p
++
=
(
char
)(
0xf0
|
(
ch
>>
18
));
*
p
++
=
(
char
)(
0x80
|
((
ch
>>
12
)
&
0x3f
));
...
...
@@ -5792,7 +5793,7 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
break
;
store:
/* when we get here, chr is a 32-bit unicode character */
if
(
chr
<=
0x10ffff
)
{
if
(
chr
<=
MAX_UNICODE
)
{
WRITECHAR
(
chr
);
}
else
{
endinpos
=
s
-
starts
;
...
...
@@ -5957,7 +5958,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
/* Map 21-bit characters to '\U00xxxxxx' */
else
if
(
ch
>=
0x10000
)
{
assert
(
ch
<=
0x10FFFF
);
assert
(
ch
<=
MAX_UNICODE
);
*
p
++
=
'\\'
;
*
p
++
=
'U'
;
*
p
++
=
Py_hexdigits
[(
ch
>>
28
)
&
0x0000000F
];
...
...
@@ -6108,7 +6109,7 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
else
x
+=
10
+
c
-
'A'
;
}
if
(
x
<=
0x10ffff
)
{
if
(
x
<=
MAX_UNICODE
)
{
if
(
unicode_putchar
(
&
v
,
&
outpos
,
x
)
<
0
)
goto
onError
;
}
else
{
...
...
@@ -6175,7 +6176,7 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
Py_UCS4
ch
=
PyUnicode_READ
(
kind
,
data
,
pos
);
/* Map 32-bit characters to '\Uxxxxxxxx' */
if
(
ch
>=
0x10000
)
{
assert
(
ch
<=
0x10FFFF
);
assert
(
ch
<=
MAX_UNICODE
);
*
p
++
=
'\\'
;
*
p
++
=
'U'
;
*
p
++
=
Py_hexdigits
[(
ch
>>
28
)
&
0xf
];
...
...
@@ -6536,7 +6537,7 @@ unicode_encode_ucs1(PyObject *unicode,
else
if
(
ch
<
1000000
)
repsize
+=
2
+
6
+
1
;
else
{
assert
(
ch
<=
0x10FFFF
);
assert
(
ch
<=
MAX_UNICODE
);
repsize
+=
2
+
7
+
1
;
}
}
...
...
@@ -9275,7 +9276,7 @@ fixup(PyObject *self,
else
if
(
maxchar_new
<=
65535
)
maxchar_new
=
65535
;
else
maxchar_new
=
1114111
;
/* 0x10ffff */
maxchar_new
=
MAX_UNICODE
;
if
(
!
maxchar_new
&&
PyUnicode_CheckExact
(
self
))
{
/* fixfct should return TRUE if it modified the buffer. If
...
...
@@ -13059,7 +13060,7 @@ formatchar(PyObject *v)
if
(
x
==
-
1
&&
PyErr_Occurred
())
goto
onError
;
if
(
x
<
0
||
x
>
0x10ffff
)
{
if
(
x
<
0
||
x
>
MAX_UNICODE
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"%c arg not in range(0x110000)"
);
return
(
Py_UCS4
)
-
1
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment