Commit e62ec934 authored by Barry Warsaw's avatar Barry Warsaw

Lots of new and updated tests to check for proper ascii header

folding.  Note that some of the Japanese tests have changed, but I
don't really know if they are correct or not. :(

Someone with Japanese and RFC 2047 expertise, please take a look!
parent 7fc5f906
...@@ -6,6 +6,7 @@ import os ...@@ -6,6 +6,7 @@ import os
import time import time
import unittest import unittest
import base64 import base64
import difflib
from cStringIO import StringIO from cStringIO import StringIO
from types import StringType, ListType from types import StringType, ListType
import warnings import warnings
...@@ -50,6 +51,14 @@ def openfile(filename): ...@@ -50,6 +51,14 @@ def openfile(filename):
# Base test class # Base test class
class TestEmailBase(unittest.TestCase): class TestEmailBase(unittest.TestCase):
def ndiffAssertEqual(self, first, second):
"""Like failUnlessEqual except use ndiff to produce readable output."""
if first <> second:
diff = difflib.ndiff(first.splitlines(), second.splitlines())
fp = StringIO()
print >> fp, NL, NL.join(diff)
raise self.failureException, fp.getvalue()
def _msgobj(self, filename): def _msgobj(self, filename):
fp = openfile(findfile(filename)) fp = openfile(findfile(filename))
try: try:
...@@ -393,8 +402,116 @@ class TestEncoders(unittest.TestCase): ...@@ -393,8 +402,116 @@ class TestEncoders(unittest.TestCase):
# Test long header wrapping # Test long header wrapping
class TestLongHeaders(unittest.TestCase): class TestLongHeaders(TestEmailBase):
def test_split_long_continuation(self):
eq = self.ndiffAssertEqual
msg = email.message_from_string("""\
Subject: bug demonstration
\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
\tmore text
test
""")
sfp = StringIO()
g = Generator(sfp)
g.flatten(msg)
eq(sfp.getvalue(), """\
Subject: bug demonstration
\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
\tmore text
test
""")
def test_another_long_almost_unsplittable_header(self):
eq = self.ndiffAssertEqual
hstr = """\
bug demonstration
\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
\tmore text"""
h = Header(hstr, continuation_ws='\t')
eq(h.encode(), """\
bug demonstration
\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
\tmore text""")
h = Header(hstr)
eq(h.encode(), """\
bug demonstration
12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
more text""")
def test_long_nonstring(self):
eq = self.ndiffAssertEqual
g = Charset("iso-8859-1")
cz = Charset("iso-8859-2")
utf8 = Charset("utf-8")
g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
h = Header(g_head, g)
h.append(cz_head, cz)
h.append(utf8_head, utf8)
msg = Message()
msg['Subject'] = h
sfp = StringIO()
g = Generator(sfp)
g.flatten(msg)
eq(sfp.getvalue(), '''\
Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
=?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
=?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
=?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
=?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
=?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
=?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
=?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
=?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
=?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
=?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
=?utf-8?b?cyBOdW5zdHVjayBnaXQgdW5k?=
=?utf-8?b?IFNsb3Rlcm1leWVyPyBKYSEgQmVpaGVyaHVuZCBkYXMgT2Rl?=
=?utf-8?b?ciBkaWUgRmxpcHBlcndhbGR0?=
=?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=
''')
eq(h.encode(), '''\
=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
=?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
=?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
=?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
=?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
=?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
=?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
=?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
=?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
=?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
=?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
=?utf-8?b?cyBOdW5zdHVjayBnaXQgdW5k?=
=?utf-8?b?IFNsb3Rlcm1leWVyPyBKYSEgQmVpaGVyaHVuZCBkYXMgT2Rl?=
=?utf-8?b?ciBkaWUgRmxpcHBlcndhbGR0?=
=?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=''')
def test_long_header_encode(self):
eq = self.ndiffAssertEqual
h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
header_name='X-Foobar-Spoink-Defrobnit')
eq(h.encode(), '''\
wasnipoop; giraffes="very-long-necked-animals";
spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
def test_long_header_encode_with_tab_continuation(self):
eq = self.ndiffAssertEqual
h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
header_name='X-Foobar-Spoink-Defrobnit',
continuation_ws='\t')
eq(h.encode(), '''\
wasnipoop; giraffes="very-long-necked-animals";
\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
def test_header_splitter(self): def test_header_splitter(self):
eq = self.ndiffAssertEqual
msg = MIMEText('') msg = MIMEText('')
# It'd be great if we could use add_header() here, but that doesn't # It'd be great if we could use add_header() here, but that doesn't
# guarantee an order of the parameters. # guarantee an order of the parameters.
...@@ -404,7 +521,7 @@ class TestLongHeaders(unittest.TestCase): ...@@ -404,7 +521,7 @@ class TestLongHeaders(unittest.TestCase):
sfp = StringIO() sfp = StringIO()
g = Generator(sfp) g = Generator(sfp)
g.flatten(msg) g.flatten(msg)
self.assertEqual(sfp.getvalue(), '''\ eq(sfp.getvalue(), '''\
Content-Type: text/plain; charset="us-ascii" Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0 MIME-Version: 1.0
Content-Transfer-Encoding: 7bit Content-Transfer-Encoding: 7bit
...@@ -414,17 +531,15 @@ X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; ...@@ -414,17 +531,15 @@ X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
''') ''')
def test_no_semis_header_splitter(self): def test_no_semis_header_splitter(self):
eq = self.ndiffAssertEqual
msg = Message() msg = Message()
msg['From'] = 'test@dom.ain' msg['From'] = 'test@dom.ain'
refparts = [] msg['References'] = SPACE.join(['<%d@dom.ain>' % i for i in range(10)])
for i in range(10):
refparts.append('<%d@dom.ain>' % i)
msg['References'] = SPACE.join(refparts)
msg.set_payload('Test') msg.set_payload('Test')
sfp = StringIO() sfp = StringIO()
g = Generator(sfp) g = Generator(sfp)
g.flatten(msg) g.flatten(msg)
self.assertEqual(sfp.getvalue(), """\ eq(sfp.getvalue(), """\
From: test@dom.ain From: test@dom.ain
References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
\t<5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> \t<5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
...@@ -432,29 +547,22 @@ References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> ...@@ -432,29 +547,22 @@ References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
Test""") Test""")
def test_no_split_long_header(self): def test_no_split_long_header(self):
msg = Message() eq = self.ndiffAssertEqual
msg['From'] = 'test@dom.ain' hstr = 'References: ' + 'x' * 80
refparts = [] h = Header(hstr, continuation_ws='\t')
msg['References'] = 'x' * 80 eq(h.encode(), """\
msg.set_payload('Test') References: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
sfp = StringIO()
g = Generator(sfp)
g.flatten(msg)
self.assertEqual(sfp.getvalue(), """\
From: test@dom.ain
References: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Test""")
def test_splitting_multiple_long_lines(self): def test_splitting_multiple_long_lines(self):
msg = Message() eq = self.ndiffAssertEqual
msg['Received'] = """\ hstr = """\
from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) \tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) \tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
""" """
self.assertEqual(msg.as_string(), """\ h = Header(hstr, continuation_ws='\t')
Received: from babylon.socal-raves.org (localhost [127.0.0.1]); eq(h.encode(), """\
from babylon.socal-raves.org (localhost [127.0.0.1]);
\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; \tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
\tfor <mailman-admin@babylon.socal-raves.org>; \tfor <mailman-admin@babylon.socal-raves.org>;
\tSat, 2 Feb 2002 17:00:06 -0800 (PST) \tSat, 2 Feb 2002 17:00:06 -0800 (PST)
...@@ -465,10 +573,7 @@ Received: from babylon.socal-raves.org (localhost [127.0.0.1]); ...@@ -465,10 +573,7 @@ Received: from babylon.socal-raves.org (localhost [127.0.0.1]);
\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); \tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; \tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
\tfor <mailman-admin@babylon.socal-raves.org>; \tfor <mailman-admin@babylon.socal-raves.org>;
\tSat, 2 Feb 2002 17:00:06 -0800 (PST) \tSat, 2 Feb 2002 17:00:06 -0800 (PST)""")
""")
...@@ -993,7 +1098,7 @@ Your message cannot be delivered to the following recipients: ...@@ -993,7 +1098,7 @@ Your message cannot be delivered to the following recipients:
# regenerate the plain text. The original text and the transformed text # regenerate the plain text. The original text and the transformed text
# should be identical. Note: that we ignore the Unix-From since that may # should be identical. Note: that we ignore the Unix-From since that may
# contain a changed date. # contain a changed date.
class TestIdempotent(unittest.TestCase): class TestIdempotent(TestEmailBase):
def _msgobj(self, filename): def _msgobj(self, filename):
fp = openfile(filename) fp = openfile(filename)
try: try:
...@@ -1004,7 +1109,7 @@ class TestIdempotent(unittest.TestCase): ...@@ -1004,7 +1109,7 @@ class TestIdempotent(unittest.TestCase):
return msg, data return msg, data
def _idempotent(self, msg, text): def _idempotent(self, msg, text):
eq = self.assertEquals eq = self.ndiffAssertEqual
s = StringIO() s = StringIO()
g = Generator(s, maxheaderlen=0) g = Generator(s, maxheaderlen=0)
g.flatten(msg) g.flatten(msg)
...@@ -1038,6 +1143,10 @@ class TestIdempotent(unittest.TestCase): ...@@ -1038,6 +1143,10 @@ class TestIdempotent(unittest.TestCase):
msg, text = self._msgobj('msg_02.txt') msg, text = self._msgobj('msg_02.txt')
self._idempotent(msg, text) self._idempotent(msg, text)
## def test_MIME_digest_with_part_headers(self):
## msg, text = self._msgobj('msg_28.txt')
## self._idempotent(msg, text)
def test_mixed_with_image(self): def test_mixed_with_image(self):
msg, text = self._msgobj('msg_06.txt') msg, text = self._msgobj('msg_06.txt')
self._idempotent(msg, text) self._idempotent(msg, text)
...@@ -1370,6 +1479,20 @@ Here's the message body ...@@ -1370,6 +1479,20 @@ Here's the message body
part2 = msg.get_payload(1) part2 = msg.get_payload(1)
eq(part2.get_type(), 'application/riscos') eq(part2.get_type(), 'application/riscos')
## def test_multipart_digest_with_extra_mime_headers(self):
## eq = self.assertEqual
## fp = openfile('msg_28.txt')
## p = Parser()
## msg = p.parse(fp)
## self.failUnless(msg.is_multipart())
## eq(len(msg.get_payload()), 2)
## part1 = msg.get_payload(0)
## eq(part1.get_type(), 'text/plain')
## eq(part1.get_payload(), 'message 1')
## part2 = msg.get_payload(1)
## eq(part2.get_type(), 'text/plain')
## eq(part2.get_payload(), 'message 2')
class TestBase64(unittest.TestCase): class TestBase64(unittest.TestCase):
...@@ -1571,14 +1694,21 @@ class TestCharset(unittest.TestCase): ...@@ -1571,14 +1694,21 @@ class TestCharset(unittest.TestCase):
# Test multilingual MIME headers. # Test multilingual MIME headers.
class TestHeader(unittest.TestCase): class TestHeader(TestEmailBase):
def test_simple(self): def test_simple(self):
eq = self.assertEqual eq = self.ndiffAssertEqual
h = Header('Hello World!') h = Header('Hello World!')
eq(h.encode(), 'Hello World!') eq(h.encode(), 'Hello World!')
h.append('Goodbye World!') h.append(' Goodbye World!')
eq(h.encode(), 'Hello World! Goodbye World!') eq(h.encode(), 'Hello World! Goodbye World!')
def test_simple_surprise(self):
eq = self.ndiffAssertEqual
h = Header('Hello World!')
eq(h.encode(), 'Hello World!')
h.append('Goodbye World!')
eq(h.encode(), 'Hello World!Goodbye World!')
def test_header_needs_no_decoding(self): def test_header_needs_no_decoding(self):
h = 'no decoding needed' h = 'no decoding needed'
self.assertEqual(decode_header(h), [(h, None)]) self.assertEqual(decode_header(h), [(h, None)])
...@@ -1621,12 +1751,12 @@ class TestHeader(unittest.TestCase): ...@@ -1621,12 +1751,12 @@ class TestHeader(unittest.TestCase):
(utf8_head, "utf-8")]) (utf8_head, "utf-8")])
def test_explicit_maxlinelen(self): def test_explicit_maxlinelen(self):
eq = self.assertEqual eq = self.ndiffAssertEqual
hstr = 'A very long line that must get split to something other than at the 76th character boundary to test the non-default behavior' hstr = 'A very long line that must get split to something other than at the 76th character boundary to test the non-default behavior'
h = Header(hstr) h = Header(hstr)
eq(h.encode(), '''\ eq(h.encode(), '''\
A very long line that must get split to something other than at the 76th cha A very long line that must get split to something other than at the 76th
racter boundary to test the non-default behavior''') character boundary to test the non-default behavior''')
h = Header(hstr, header_name='Subject') h = Header(hstr, header_name='Subject')
eq(h.encode(), '''\ eq(h.encode(), '''\
A very long line that must get split to something other than at the A very long line that must get split to something other than at the
......
...@@ -3,11 +3,11 @@ ...@@ -3,11 +3,11 @@
import unittest import unittest
import test_support import test_support
from test_email import TestEmailBase
from email.Charset import Charset from email.Charset import Charset
from email.Header import Header, decode_header from email.Header import Header, decode_header
# See if we have the Japanese codecs package installed # See if we have the Japanese codecs package installed
try: try:
unicode('foo', 'japanese.iso-2022-jp') unicode('foo', 'japanese.iso-2022-jp')
...@@ -16,9 +16,9 @@ except LookupError: ...@@ -16,9 +16,9 @@ except LookupError:
class TestEmailAsianCodecs(unittest.TestCase): class TestEmailAsianCodecs(TestEmailBase):
def test_japanese_codecs(self): def test_japanese_codecs(self):
eq = self.assertEqual eq = self.ndiffAssertEqual
j = Charset("euc-jp") j = Charset("euc-jp")
g = Charset("iso-8859-1") g = Charset("iso-8859-1")
h = Header("Hello World!") h = Header("Hello World!")
...@@ -35,8 +35,23 @@ class TestEmailAsianCodecs(unittest.TestCase): ...@@ -35,8 +35,23 @@ class TestEmailAsianCodecs(unittest.TestCase):
h = Header(long, j, header_name="Subject") h = Header(long, j, header_name="Subject")
# test a very long header # test a very long header
enc = h.encode() enc = h.encode()
eq(enc, '=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=\n =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?=\n =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=') # BAW: The following used to pass. Sadly, the test afterwards is what
eq(decode_header(enc), [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')]) # happens now. I've no idea which is right. Please, any Japanese and
# RFC 2047 experts, please verify!
## eq(enc, '''\
##=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=
## =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?=
## =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=''')
eq(enc, """\
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=
=?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NUcnJHJCVCRDJEYkJCReJDkbKEI=?=""")
# BAW: same deal here. :(
## self.assertEqual(
## decode_header(enc),
## [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
self.assertEqual(
decode_header(enc),
[("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5G'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment