Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
df1d00a1
Commit
df1d00a1
authored
Mar 15, 2011
by
R David Murray
Browse files
Options
Browse Files
Download
Plain Diff
Merge #11554 test_email_codecs activation from 3.2.
parents
2ef76981
56a9d7e3
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
54 additions
and
22 deletions
+54
-22
Lib/email/charset.py
Lib/email/charset.py
+7
-2
Lib/email/encoders.py
Lib/email/encoders.py
+6
-3
Lib/email/test/test_email.py
Lib/email/test/test_email.py
+2
-2
Lib/email/test/test_email_codecs.py
Lib/email/test/test_email_codecs.py
+31
-15
Lib/test/test_email.py
Lib/test/test_email.py
+2
-0
Misc/ACKS
Misc/ACKS
+1
-0
Misc/NEWS
Misc/NEWS
+5
-0
No files found.
Lib/email/charset.py
View file @
df1d00a1
...
...
@@ -263,7 +263,7 @@ class Charset:
Returns "quoted-printable" if self.body_encoding is QP.
Returns "base64" if self.body_encoding is BASE64.
Returns
"7bit"
otherwise.
Returns
conversion function
otherwise.
"""
assert
self
.
body_encoding
!=
SHORTEST
if
self
.
body_encoding
==
QP
:
...
...
@@ -381,7 +381,10 @@ class Charset:
"""Body-encode a string by converting it first to bytes.
The type of encoding (base64 or quoted-printable) will be based on
self.body_encoding.
self.body_encoding. If body_encoding is None, we assume the
output charset is a 7bit encoding, so re-encoding the decoded
string using the ascii codec produces the correct string version
of the content.
"""
# 7bit/8bit encodings return the string unchanged (module conversions)
if
self
.
body_encoding
is
BASE64
:
...
...
@@ -391,4 +394,6 @@ class Charset:
elif
self
.
body_encoding
is
QP
:
return
email
.
quoprimime
.
body_encode
(
string
)
else
:
if
isinstance
(
string
,
str
):
string
=
string
.
encode
(
self
.
output_charset
).
decode
(
'ascii'
)
return
string
Lib/email/encoders.py
View file @
df1d00a1
...
...
@@ -54,10 +54,13 @@ def encode_7or8bit(msg):
# There's no payload. For backwards compatibility we use 7bit
msg
[
'Content-Transfer-Encoding'
]
=
'7bit'
return
# We play a trick to make this go fast. If encoding
to ASCII succeeds, we
# know the data must be 7bit, otherwise treat it as 8bit.
# We play a trick to make this go fast. If encoding
/decode to ASCII
#
succeeds, we
know the data must be 7bit, otherwise treat it as 8bit.
try
:
orig
.
encode
(
'ascii'
)
if
isinstance
(
orig
,
str
):
orig
.
encode
(
'ascii'
)
else
:
orig
.
decode
(
'ascii'
)
except
UnicodeError
:
# iso-2022-* is non-ASCII but still 7-bit
charset
=
msg
.
get_charset
()
...
...
Lib/email/test/test_email.py
View file @
df1d00a1
...
...
@@ -3372,9 +3372,9 @@ class TestCharset(unittest.TestCase):
# built-in encodings where the header encoding is QP but the body
# encoding is not.
from
email
import
charset
as
CharsetModule
CharsetModule
.
add_charset
(
'fake'
,
CharsetModule
.
QP
,
None
)
CharsetModule
.
add_charset
(
'fake'
,
CharsetModule
.
QP
,
None
,
'utf-8'
)
c
=
Charset
(
'fake'
)
eq
(
'hello w
\
xf6
rld'
,
c
.
body_encode
(
'hello w
\
xf6
rld'
))
eq
(
'hello w
orld'
,
c
.
body_encode
(
'hello wo
rld'
))
def
test_unicode_charset_name
(
self
):
charset
=
Charset
(
'us-ascii'
)
...
...
Lib/email/test/test_email_codecs.py
View file @
df1d00a1
...
...
@@ -13,7 +13,7 @@ from email.message import Message
# We're compatible with Python 2.3, but it doesn't have the built-in Asian
# codecs, so we have to skip all these tests.
try
:
str
(
'foo'
,
'euc-jp'
)
str
(
b
'foo'
,
'euc-jp'
)
except
LookupError
:
raise
unittest
.
SkipTest
...
...
@@ -22,11 +22,14 @@ except LookupError:
class
TestEmailAsianCodecs
(
TestEmailBase
):
def
test_japanese_codecs
(
self
):
eq
=
self
.
ndiffAssertEqual
j
=
Charset
(
"euc-jp"
)
g
=
Charset
(
"iso-8859-1"
)
jcode
=
"euc-jp"
gcode
=
"iso-8859-1"
j
=
Charset
(
jcode
)
g
=
Charset
(
gcode
)
h
=
Header
(
"Hello World!"
)
jhello
=
'
\
xa5
\
xcf
\
xa5
\
xed
\
xa1
\
xbc
\
xa5
\
xef
\
xa1
\
xbc
\
xa5
\
xeb
\
xa5
\
xc9
\
xa1
\
xaa
'
ghello
=
'Gr
\
xfc
\
xdf
Gott!'
jhello
=
str
(
b'
\
xa5
\
xcf
\
xa5
\
xed
\
xa1
\
xbc
\
xa5
\
xef
\
xa1
\
xbc
'
b'
\
xa5
\
xeb
\
xa5
\
xc9
\
xa1
\
xaa
'
,
jcode
)
ghello
=
str
(
b'Gr
\
xfc
\
xdf
Gott!'
,
gcode
)
h
.
append
(
jhello
,
j
)
h
.
append
(
ghello
,
g
)
# BAW: This used to -- and maybe should -- fold the two iso-8859-1
...
...
@@ -36,13 +39,17 @@ class TestEmailAsianCodecs(TestEmailBase):
# encoded word.
eq
(
h
.
encode
(),
"""
\
Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
=?iso-8859-1?q?Gr=FC=DF
?= =?iso-8859-1?q?
_Gott!?="""
)
=?iso-8859-1?q?Gr=FC=DF_Gott!?="""
)
eq
(
decode_header
(
h
.
encode
()),
[(
'Hello World!'
,
None
),
(
'
\
x1b
$B%O%m!<%o!<%k%I!*
\
x1b
(B'
,
'iso-2022-jp'
),
(
'Gr
\
xfc
\
xdf
Gott!'
,
'iso-8859-1'
)])
int
=
'test-ja
\
xa4
\
xd8
\
xc5
\
xea
\
xb9
\
xc6
\
xa4
\
xb5
\
xa4
\
xec
\
xa4
\
xbf
\
xa5
\
xe1
\
xa1
\
xbc
\
xa5
\
xeb
\
xa4
\
xcf
\
xbb
\
xca
\
xb2
\
xf1
\
xbc
\
xd4
\
xa4
\
xce
\
xbe
\
xb5
\
xc7
\
xa7
\
xa4
\
xf2
\
xc2
\
xd4
\
xa4
\
xc3
\
xa4
\
xc6
\
xa4
\
xa4
\
xa4
\
xde
\
xa4
\
xb9
'
h
=
Header
(
int
,
j
,
header_name
=
"Subject"
)
[(
b'Hello World!'
,
None
),
(
b'
\
x1b
$B%O%m!<%o!<%k%I!*
\
x1b
(B'
,
'iso-2022-jp'
),
(
b'Gr
\
xfc
\
xdf
Gott!'
,
gcode
)])
subject_bytes
=
(
b'test-ja
\
xa4
\
xd8
\
xc5
\
xea
\
xb9
\
xc6
\
xa4
\
xb5
'
b'
\
xa4
\
xec
\
xa4
\
xbf
\
xa5
\
xe1
\
xa1
\
xbc
\
xa5
\
xeb
\
xa4
\
xcf
\
xbb
\
xca
\
xb2
'
b'
\
xf1
\
xbc
\
xd4
\
xa4
\
xce
\
xbe
\
xb5
\
xc7
\
xa7
\
xa4
\
xf2
\
xc2
\
xd4
\
xa4
\
xc3
'
b'
\
xa4
\
xc6
\
xa4
\
xa4
\
xa4
\
xde
\
xa4
\
xb9
'
)
subject
=
str
(
subject_bytes
,
jcode
)
h
=
Header
(
subject
,
j
,
header_name
=
"Subject"
)
# test a very long header
enc
=
h
.
encode
()
# TK: splitting point may differ by codec design and/or Header encoding
...
...
@@ -50,15 +57,24 @@ Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
=?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?="""
)
# TK: full decode comparison
eq
(
h
.
__unicode__
().
encode
(
'euc-jp'
),
int
)
eq
(
str
(
h
).
encode
(
jcode
),
subject_bytes
)
def
test_payload_encoding_utf8
(
self
):
jhello
=
str
(
b'
\
xa5
\
xcf
\
xa5
\
xed
\
xa1
\
xbc
\
xa5
\
xef
\
xa1
\
xbc
'
b'
\
xa5
\
xeb
\
xa5
\
xc9
\
xa1
\
xaa
'
,
'euc-jp'
)
msg
=
Message
()
msg
.
set_payload
(
jhello
,
'utf-8'
)
ustr
=
msg
.
get_payload
(
decode
=
True
).
decode
(
msg
.
get_content_charset
())
self
.
assertEqual
(
jhello
,
ustr
)
def
test_payload_encoding
(
self
):
jhello
=
'
\
xa5
\
xcf
\
xa5
\
xed
\
xa1
\
xbc
\
xa5
\
xef
\
xa1
\
xbc
\
xa5
\
xeb
\
xa5
\
xc9
\
xa1
\
xaa
'
jcode
=
'euc-jp'
jhello
=
str
(
b'
\
xa5
\
xcf
\
xa5
\
xed
\
xa1
\
xbc
\
xa5
\
xef
\
xa1
\
xbc
'
b'
\
xa5
\
xeb
\
xa5
\
xc9
\
xa1
\
xaa
'
,
jcode
)
msg
=
Message
()
msg
.
set_payload
(
jhello
,
jcode
)
ustr
=
str
(
msg
.
get_payload
(),
msg
.
get_content_charset
())
self
.
assertEqual
(
jhello
,
ustr
.
encode
(
jcode
)
)
ustr
=
msg
.
get_payload
(
decode
=
True
).
decode
(
msg
.
get_content_charset
())
self
.
assertEqual
(
jhello
,
ustr
)
...
...
Lib/test/test_email.py
View file @
df1d00a1
...
...
@@ -3,10 +3,12 @@
# The specific tests now live in Lib/email/test
from
email.test.test_email
import
suite
from
email.test.test_email_codecs
import
suite
as
codecs_suite
from
test
import
support
def
test_main
():
support
.
run_unittest
(
suite
())
support
.
run_unittest
(
codecs_suite
())
if
__name__
==
'__main__'
:
test_main
()
Misc/ACKS
View file @
df1d00a1
...
...
@@ -375,6 +375,7 @@ Kevan Heydon
Jason Hildebrand
Richie Hindle
Konrad Hinsen
Michael Henry
David Hobley
Tim Hochberg
Joerg-Cyril Hoehle
...
...
Misc/NEWS
View file @
df1d00a1
...
...
@@ -68,6 +68,9 @@ Core and Builtins
Library
-------
-
Issue
#
11554
:
Fixed
support
for
Japanese
codecs
;
previously
the
body
output
encoding
was
not
done
if
euc
-
jp
or
shift
-
jis
was
specified
as
the
charset
.
-
Issue
#
11509
:
Significantly
increase
test
coverage
of
fileinput
.
Patch
by
Denver
Coneybeare
at
PyCon
2011
Sprints
.
...
...
@@ -206,6 +209,8 @@ Tools/Demos
Tests
-----
-
Issue
#
11554
:
Reactivated
test_email_codecs
.
-
Issue
#
11505
:
improves
test
coverage
of
string
.
py
-
Issue
#
11490
:
test_subprocess
:
test_leaking_fds_on_error
no
longer
gives
a
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment