Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
aeacde7b
Commit
aeacde7b
authored
Oct 25, 2016
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #20491: The textwrap.TextWrapper class now honors non-breaking spaces.
Based on patch by Kaarle Ritvanen.
parent
20e5a346
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
14 deletions
+47
-14
Lib/test/test_textwrap.py
Lib/test/test_textwrap.py
+31
-0
Lib/textwrap.py
Lib/textwrap.py
+13
-14
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/test/test_textwrap.py
View file @
aeacde7b
...
@@ -444,6 +444,37 @@ What a mess!
...
@@ -444,6 +444,37 @@ What a mess!
text
=
"aa
\
xe4
\
xe4
-
\
xe4
\
xe4
"
text
=
"aa
\
xe4
\
xe4
-
\
xe4
\
xe4
"
self
.
check_wrap
(
text
,
7
,
[
"aa
\
xe4
\
xe4
-"
,
"
\
xe4
\
xe4
"
])
self
.
check_wrap
(
text
,
7
,
[
"aa
\
xe4
\
xe4
-"
,
"
\
xe4
\
xe4
"
])
def
test_non_breaking_space
(
self
):
text
=
'This is a sentence with non-breaking
\
N{NO-BREAK SPACE}sp
a
ce.'
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with non-'
,
'breaking
\
N{NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
True
)
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with'
,
'non-breaking
\
N{NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
False
)
def
test_narrow_non_breaking_space
(
self
):
text
=
(
'This is a sentence with non-breaking'
'
\
N{NARROW NO-BREAK SPACE}sp
a
ce.'
)
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with non-'
,
'breaking
\
N{NARROW NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
True
)
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with'
,
'non-breaking
\
N{NARROW NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
False
)
class
MaxLinesTestCase
(
BaseTestCase
):
class
MaxLinesTestCase
(
BaseTestCase
):
text
=
"Hello there, how are you this fine day? I'm glad to hear it!"
text
=
"Hello there, how are you this fine day? I'm glad to hear it!"
...
...
Lib/textwrap.py
View file @
aeacde7b
...
@@ -10,13 +10,8 @@ import re
...
@@ -10,13 +10,8 @@ import re
__all__
=
[
'TextWrapper'
,
'wrap'
,
'fill'
,
'dedent'
,
'indent'
,
'shorten'
]
__all__
=
[
'TextWrapper'
,
'wrap'
,
'fill'
,
'dedent'
,
'indent'
,
'shorten'
]
# Hardcode the recognized whitespace characters to the US-ASCII
# Hardcode the recognized whitespace characters to the US-ASCII
# whitespace characters. The main reason for doing this is that in
# whitespace characters. The main reason for doing this is that
# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
# that character winds up in string.whitespace. Respecting
# string.whitespace in those cases would 1) make textwrap treat 0xa0 the
# same as any other whitespace char, which is clearly wrong (it's a
# *non-breaking* space), 2) possibly cause problems with Unicode,
# since 0xa0 is not in range(128).
_whitespace
=
'
\
t
\
n
\
x0b
\
x0c
\
r
'
_whitespace
=
'
\
t
\
n
\
x0b
\
x0c
\
r
'
class
TextWrapper
:
class
TextWrapper
:
...
@@ -81,29 +76,34 @@ class TextWrapper:
...
@@ -81,29 +76,34 @@ class TextWrapper:
# (after stripping out empty strings).
# (after stripping out empty strings).
word_punct
=
r'[\
w!
"\'&.,?]'
word_punct
=
r'[\
w!
"\'&.,?]'
letter
=
r'[^\
d
\W]'
letter
=
r'[^\
d
\W]'
whitespace
=
r'[%s]'
%
re
.
escape
(
_whitespace
)
nowhitespace
=
'[^'
+
whitespace
[
1
:]
wordsep_re
=
re
.
compile
(
r'''
wordsep_re
=
re
.
compile
(
r'''
( # any whitespace
( # any whitespace
\
s+
%(ws)
s+
| # em-dash between words
| # em-dash between words
(?<=%(wp)s) -{2,} (?=\
w)
(?<=%(wp)s) -{2,} (?=\
w)
| # word, possibly hyphenated
| # word, possibly hyphenated
\
S
+? (?:
%(nws)s
+? (?:
# hyphenated word
# hyphenated word
-(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
-(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
(?= %(lt)s -? %(lt)s)
(?= %(lt)s -? %(lt)s)
| # end of word
| # end of word
(?=
\
s|
\Z)
(?=
%(ws)
s|\
Z)
| # em-dash
| # em-dash
(?<=%(wp)s) (?=-{2,}\
w)
(?<=%(wp)s) (?=-{2,}\
w)
)
)
)'''
%
{
'wp'
:
word_punct
,
'lt'
:
letter
},
re
.
VERBOSE
)
)'''
%
{
'wp'
:
word_punct
,
'lt'
:
letter
,
del
word_punct
,
letter
'ws'
:
whitespace
,
'nws'
:
nowhitespace
},
re
.
VERBOSE
)
del
word_punct
,
letter
,
nowhitespace
# This less funky little regex just split on recognized spaces. E.g.
# This less funky little regex just split on recognized spaces. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# splits into
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
wordsep_simple_re
=
re
.
compile
(
r'(\
s+)
')
wordsep_simple_re
=
re
.
compile
(
r'(%s+)'
%
whitespace
)
del
whitespace
# XXX this is not locale- or charset-aware -- string.lowercase
# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
# is US-ASCII only (and therefore English-only)
...
@@ -112,7 +112,6 @@ class TextWrapper:
...
@@ -112,7 +112,6 @@ class TextWrapper:
r'
[
\
"
\
'
]?' # optional end-of-quote
r'
[
\
"
\
'
]?' # optional end-of-quote
r'
\
Z
'
) # end of chunk
r'
\
Z
'
) # end of chunk
def __init__(self,
def __init__(self,
width=70,
width=70,
initial_indent="",
initial_indent="",
...
...
Misc/NEWS
View file @
aeacde7b
...
@@ -113,6 +113,9 @@ Core and Builtins
...
@@ -113,6 +113,9 @@ Core and Builtins
Library
Library
-------
-------
- Issue #20491: The textwrap.TextWrapper class now honors non-breaking spaces.
Based on patch by Kaarle Ritvanen.
- Issue #28353: os.fwalk() no longer fails on broken links.
- Issue #28353: os.fwalk() no longer fails on broken links.
- Issue #25464: Fixed HList.header_exists() in tkinter.tix module by addin
- Issue #25464: Fixed HList.header_exists() in tkinter.tix module by addin
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment