Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
b938c8c2
Commit
b938c8c2
authored
Mar 24, 2011
by
R David Murray
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
#11606: improved body_encode algorithm, no longer produces overlong lines
Algorithm and initial patch by Michael Henry.
parent
29943aa1
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
103 additions
and
61 deletions
+103
-61
Lib/email/quoprimime.py
Lib/email/quoprimime.py
+85
-60
Lib/email/test/test_email.py
Lib/email/test/test_email.py
+18
-1
No files found.
Lib/email/quoprimime.py
View file @
b938c8c2
...
...
@@ -40,6 +40,7 @@ __all__ = [
]
import
re
import
io
from
string
import
ascii_letters
,
digits
,
hexdigits
...
...
@@ -147,6 +148,59 @@ def header_encode(header_bytes, charset='iso-8859-1'):
return
'=?%s?q?%s?='
%
(
charset
,
EMPTYSTRING
.
join
(
encoded
))
class
_body_accumulator
(
io
.
StringIO
):
def
__init__
(
self
,
maxlinelen
,
eol
,
*
args
,
**
kw
):
super
().
__init__
(
*
args
,
**
kw
)
self
.
eol
=
eol
self
.
maxlinelen
=
self
.
room
=
maxlinelen
def
write_str
(
self
,
s
):
"""Add string s to the accumulated body."""
self
.
write
(
s
)
self
.
room
-=
len
(
s
)
def
newline
(
self
):
"""Write eol, then start new line."""
self
.
write_str
(
self
.
eol
)
self
.
room
=
self
.
maxlinelen
def
write_soft_break
(
self
):
"""Write a soft break, then start a new line."""
self
.
write_str
(
'='
)
self
.
newline
()
def
write_wrapped
(
self
,
s
,
extra_room
=
0
):
"""Add a soft line break if needed, then write s."""
if
self
.
room
<
len
(
s
)
+
extra_room
:
self
.
write_soft_break
()
self
.
write_str
(
s
)
def
write_char
(
self
,
c
,
is_last_char
):
if
not
is_last_char
:
# Another character follows on this line, so we must leave
# extra room, either for it or a soft break, and whitespace
# need not be quoted.
self
.
write_wrapped
(
c
,
extra_room
=
1
)
elif
c
not
in
'
\
t
'
:
# For this and remaining cases, no more characters follow,
# so there is no need to reserve extra room (since a hard
# break will immediately follow).
self
.
write_wrapped
(
c
)
elif
self
.
room
>=
3
:
# It's a whitespace character at end-of-line, and we have room
# for the three-character quoted encoding.
self
.
write
(
quote
(
c
))
elif
self
.
room
==
2
:
# There's room for the whitespace character and a soft break.
self
.
write
(
c
)
self
.
write_soft_break
()
else
:
# There's room only for a soft break. The quoted whitespace
# will be the only content on the subsequent line.
self
.
write_soft_break
()
self
.
write
(
quote
(
c
))
def
body_encode
(
body
,
maxlinelen
=
76
,
eol
=
NL
):
"""Encode with quoted-printable, wrapping at maxlinelen characters.
...
...
@@ -155,72 +209,43 @@ def body_encode(body, maxlinelen=76, eol=NL):
this to "
\
\
r
\
\
n" if you will be using the result of this function directly
in an email.
Each line will be wrapped at, at most, maxlinelen characters (defaults to
76 characters). Long lines will have the `soft linefeed' quoted-printable
character "=" appended to them, so the decoded text will be identical to
the original text.
Each line will be wrapped at, at most, maxlinelen characters before the
eol string (maxlinelen defaults to 76 characters, the maximum value
permitted by RFC 2045). Long lines will have the 'soft line break'
quoted-printable character "=" appended to them, so the decoded text will
be identical to the original text.
The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
followed by a soft line break. Smaller values will generate a
ValueError.
"""
if
maxlinelen
<
4
:
raise
ValueError
(
"maxlinelen must be at least 4"
)
if
not
body
:
return
body
# BAW: We're accumulating the body text by string concatenation. That
# can't be very efficient, but I don't have time now to rewrite it. It
# just feels like this algorithm could be more efficient.
encoded_body
=
''
lineno
=
-
1
# Preserve line endings here so we can check later to see an eol needs to
# be added to the output later.
lines
=
body
.
splitlines
(
1
)
for
line
in
lines
:
# But strip off line-endings for processing this line.
if
line
.
endswith
(
CRLF
):
line
=
line
[:
-
2
]
elif
line
[
-
1
]
in
CRLF
:
line
=
line
[:
-
1
]
lineno
+=
1
encoded_line
=
''
prev
=
None
linelen
=
len
(
line
)
# Now we need to examine every character to see if it needs to be
# quopri encoded. BAW: again, string concatenation is inefficient.
for
j
in
range
(
linelen
):
c
=
line
[
j
]
prev
=
c
# The last line may or may not end in eol, but all other lines do.
last_has_eol
=
(
body
[
-
1
]
in
'
\
r
\
n
'
)
# This accumulator will make it easier to build the encoded body.
encoded_body
=
_body_accumulator
(
maxlinelen
,
eol
)
lines
=
body
.
splitlines
()
last_line_no
=
len
(
lines
)
-
1
for
line_no
,
line
in
enumerate
(
lines
):
last_char_index
=
len
(
line
)
-
1
for
i
,
c
in
enumerate
(
line
):
if
body_check
(
ord
(
c
)):
c
=
quote
(
c
)
elif
j
+
1
==
linelen
:
# Check for whitespace at end of line; special case
if
c
not
in
'
\
t
'
:
encoded_line
+=
c
prev
=
c
continue
# Check to see to see if the line has reached its maximum length
if
len
(
encoded_line
)
+
len
(
c
)
>=
maxlinelen
:
encoded_body
+=
encoded_line
+
'='
+
eol
encoded_line
=
''
encoded_line
+=
c
# Now at end of line..
if
prev
and
prev
in
'
\
t
'
:
# Special case for whitespace at end of file
if
lineno
+
1
==
len
(
lines
):
prev
=
quote
(
prev
)
if
len
(
encoded_line
)
+
len
(
prev
)
>
maxlinelen
:
encoded_body
+=
encoded_line
+
'='
+
eol
+
prev
else
:
encoded_body
+=
encoded_line
+
prev
# Just normal whitespace at end of line
else
:
encoded_body
+=
encoded_line
+
prev
+
'='
+
eol
encoded_line
=
''
# Now look at the line we just finished and it has a line ending, we
# need to add eol to the end of the line.
if
lines
[
lineno
].
endswith
(
CRLF
)
or
lines
[
lineno
][
-
1
]
in
CRLF
:
encoded_body
+=
encoded_line
+
eol
else
:
encoded_body
+=
encoded_line
encoded_line
=
''
return
encoded_body
encoded_body
.
write_char
(
c
,
i
==
last_char_index
)
# Add an eol if input line had eol. All input lines have eol except
# possibly the last one.
if
line_no
<
last_line_no
or
last_has_eol
:
encoded_body
.
newline
()
return
encoded_body
.
getvalue
()
...
...
Lib/email/test/test_email.py
View file @
b938c8c2
...
...
@@ -3041,6 +3041,14 @@ class TestQuopri(unittest.TestCase):
def
test_encode_one_line_one_space
(
self
):
self
.
_test_encode
(
'
\
n
'
,
'=20
\
n
'
)
# XXX: body_encode() expect strings, but uses ord(char) from these strings
# to index into a 256-entry list. For code points above 255, this will fail.
# Should there be a check for 8-bit only ord() values in body, or at least
# a comment about the expected input?
def
test_encode_two_lines_one_space
(
self
):
self
.
_test_encode
(
'
\
n
\
n
'
,
'=20
\
n
=20
\
n
'
)
def
test_encode_one_word_trailing_spaces
(
self
):
self
.
_test_encode
(
'hello '
,
'hello =20'
)
...
...
@@ -3056,8 +3064,14 @@ class TestQuopri(unittest.TestCase):
def
test_encode_trailing_space_before_maxlinelen
(
self
):
self
.
_test_encode
(
'abcd
\
n
1234'
,
'abcd =
\
n
\
n
1234'
,
maxlinelen
=
6
)
def
test_encode_trailing_space_at_maxlinelen
(
self
):
self
.
_test_encode
(
'abcd
\
n
1234'
,
'abcd=
\
n
=20
\
n
1234'
,
maxlinelen
=
5
)
def
test_encode_trailing_space_beyond_maxlinelen
(
self
):
self
.
_test_encode
(
'abcd
\
n
1234'
,
'abc=
\
n
d =
\
n
\
n
1234'
,
maxlinelen
=
4
)
self
.
_test_encode
(
'abcd
\
n
1234'
,
'abc=
\
n
d=20
\
n
1234'
,
maxlinelen
=
4
)
def
test_encode_whitespace_lines
(
self
):
self
.
_test_encode
(
'
\
n
'
*
5
,
'=20
\
n
'
*
5
)
def
test_encode_quoted_equals
(
self
):
self
.
_test_encode
(
'a = b'
,
'a =3D b'
)
...
...
@@ -3078,6 +3092,9 @@ class TestQuopri(unittest.TestCase):
def
test_encode_shortest_maxlinelen
(
self
):
self
.
_test_encode
(
'='
*
5
,
'=3D=
\
n
'
*
4
+
'=3D'
,
maxlinelen
=
4
)
def
test_encode_maxlinelen_too_small
(
self
):
self
.
assertRaises
(
ValueError
,
self
.
_test_encode
,
''
,
''
,
maxlinelen
=
3
)
def
test_encode
(
self
):
eq
=
self
.
assertEqual
eq
(
quoprimime
.
body_encode
(
''
),
''
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment