Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
ce0b664a
Commit
ce0b664a
authored
Apr 10, 2002
by
Marc-André Lemburg
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added test case for UTF-8 encoding bug #541828.
parent
a9745611
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
2 deletions
+18
-2
Lib/test/test_unicode.py
Lib/test/test_unicode.py
+16
-0
Objects/unicodeobject.c
Objects/unicodeobject.c
+2
-2
No files found.
Lib/test/test_unicode.py
View file @
ce0b664a
...
...
@@ -508,6 +508,22 @@ verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
verify(u'
\
udc00
'.encode('
utf
-
8
') == '
\
xed
\
xb0
\
x80
')
verify((u'
\
ud800
\
udc02
'*1000).encode('
utf
-
8
') ==
'
\
xf0
\
x90
\
x80
\
x82
'*1000)
verify(u'
\
u6b63
\
u78ba
\
u306b
\
u8a00
\
u3046
\
u3068
\
u7ffb
\
u8a33
\
u306f
'
u'
\
u3055
\
u308c
\
u3066
\
u3044
\
u307e
\
u305b
\
u3093
\
u3002
\
u4e00
'
u'
\
u90e8
\
u306f
\
u30c9
\
u30a4
\
u30c4
\
u8a9e
\
u3067
\
u3059
\
u304c
'
u'
\
u3001
\
u3042
\
u3068
\
u306f
\
u3067
\
u305f
\
u3089
\
u3081
\
u3067
'
u'
\
u3059
\
u3002
\
u5b9f
\
u969b
\
u306b
\
u306f
\
u300cWenn
ist
das
'
u'
Nunstuck
git
und
'.encode('
utf
-
8
') ==
'
\
xe6
\
xad
\
xa3
\
xe7
\
xa2
\
xba
\
xe3
\
x81
\
xab
\
xe8
\
xa8
\
x80
\
xe3
\
x81
'
'
\
x86
\
xe3
\
x81
\
xa8
\
xe7
\
xbf
\
xbb
\
xe8
\
xa8
\
xb3
\
xe3
\
x81
\
xaf
\
xe3
'
'
\
x81
\
x95
\
xe3
\
x82
\
x8c
\
xe3
\
x81
\
xa6
\
xe3
\
x81
\
x84
\
xe3
\
x81
\
xbe
'
'
\
xe3
\
x81
\
x9b
\
xe3
\
x82
\
x93
\
xe3
\
x80
\
x82
\
xe4
\
xb8
\
x80
\
xe9
\
x83
'
'
\
xa8
\
xe3
\
x81
\
xaf
\
xe3
\
x83
\
x89
\
xe3
\
x82
\
xa4
\
xe3
\
x83
\
x84
\
xe8
'
'
\
xaa
\
x9e
\
xe3
\
x81
\
xa7
\
xe3
\
x81
\
x99
\
xe3
\
x81
\
x8c
\
xe3
\
x80
\
x81
'
'
\
xe3
\
x81
\
x82
\
xe3
\
x81
\
xa8
\
xe3
\
x81
\
xaf
\
xe3
\
x81
\
xa7
\
xe3
\
x81
'
'
\
x9f
\
xe3
\
x82
\
x89
\
xe3
\
x82
\
x81
\
xe3
\
x81
\
xa7
\
xe3
\
x81
\
x99
\
xe3
'
'
\
x80
\
x82
\
xe5
\
xae
\
x9f
\
xe9
\
x9a
\
x9b
\
xe3
\
x81
\
xab
\
xe3
\
x81
\
xaf
'
'
\
xe3
\
x80
\
x8cWenn
ist
das
Nunstuck
git
und
')
# UTF-8 specific decoding tests
verify(unicode('
\
xf0
\
xa3
\
x91
\
x96
', '
utf
-
8
') == u'
\
U00023456
' )
...
...
Objects/unicodeobject.c
View file @
ce0b664a
...
...
@@ -1224,8 +1224,8 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
Py_UCS4
ch2
=
s
[
i
];
/* Check for low surrogate */
if
(
0xDC00
<=
ch2
&&
ch2
<=
0xDFFF
)
{
ch
=
((
ch
-
0xD800
)
<<
10
|
(
ch2
-
0xDC00
))
+
0x
10000
;
*
p
++
=
(
char
)(
(
ch
>>
18
)
|
0xf0
);
ch
=
((
ch
-
0xD800
)
<<
10
|
(
ch2
-
0xDC00
))
+
0x000
10000
;
*
p
++
=
(
char
)(
0xf0
|
(
ch
>>
18
)
);
*
p
++
=
(
char
)(
0x80
|
((
ch
>>
12
)
&
0x3f
));
*
p
++
=
(
char
)(
0x80
|
((
ch
>>
6
)
&
0x3f
));
*
p
++
=
(
char
)(
0x80
|
(
ch
&
0x3f
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment