Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
aeacde7b
Commit
aeacde7b
authored
Oct 25, 2016
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #20491: The textwrap.TextWrapper class now honors non-breaking spaces.
Based on patch by Kaarle Ritvanen.
parent
20e5a346
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
14 deletions
+47
-14
Lib/test/test_textwrap.py
Lib/test/test_textwrap.py
+31
-0
Lib/textwrap.py
Lib/textwrap.py
+13
-14
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/test/test_textwrap.py
View file @
aeacde7b
...
...
@@ -444,6 +444,37 @@ What a mess!
text
=
"aa
\
xe4
\
xe4
-
\
xe4
\
xe4
"
self
.
check_wrap
(
text
,
7
,
[
"aa
\
xe4
\
xe4
-"
,
"
\
xe4
\
xe4
"
])
def
test_non_breaking_space
(
self
):
text
=
'This is a sentence with non-breaking
\
N{NO-BREAK SPACE}sp
a
ce.'
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with non-'
,
'breaking
\
N{NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
True
)
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with'
,
'non-breaking
\
N{NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
False
)
def
test_narrow_non_breaking_space
(
self
):
text
=
(
'This is a sentence with non-breaking'
'
\
N{NARROW NO-BREAK SPACE}sp
a
ce.'
)
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with non-'
,
'breaking
\
N{NARROW NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
True
)
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with'
,
'non-breaking
\
N{NARROW NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
False
)
class
MaxLinesTestCase
(
BaseTestCase
):
text
=
"Hello there, how are you this fine day? I'm glad to hear it!"
...
...
Lib/textwrap.py
View file @
aeacde7b
...
...
@@ -10,13 +10,8 @@ import re
__all__
=
[
'TextWrapper'
,
'wrap'
,
'fill'
,
'dedent'
,
'indent'
,
'shorten'
]
# Hardcode the recognized whitespace characters to the US-ASCII
# whitespace characters. The main reason for doing this is that in
# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
# that character winds up in string.whitespace. Respecting
# string.whitespace in those cases would 1) make textwrap treat 0xa0 the
# same as any other whitespace char, which is clearly wrong (it's a
# *non-breaking* space), 2) possibly cause problems with Unicode,
# since 0xa0 is not in range(128).
# whitespace characters. The main reason for doing this is that
# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
_whitespace
=
'
\
t
\
n
\
x0b
\
x0c
\
r
'
class
TextWrapper
:
...
...
@@ -81,29 +76,34 @@ class TextWrapper:
# (after stripping out empty strings).
word_punct
=
r'[\
w!
"\'&.,?]'
letter
=
r'[^\
d
\W]'
whitespace
=
r'[%s]'
%
re
.
escape
(
_whitespace
)
nowhitespace
=
'[^'
+
whitespace
[
1
:]
wordsep_re
=
re
.
compile
(
r'''
( # any whitespace
\
s+
%(ws)
s+
| # em-dash between words
(?<=%(wp)s) -{2,} (?=\
w)
| # word, possibly hyphenated
\
S
+? (?:
%(nws)s
+? (?:
# hyphenated word
-(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
(?= %(lt)s -? %(lt)s)
| # end of word
(?=
\
s|
\Z)
(?=
%(ws)
s|\
Z)
| # em-dash
(?<=%(wp)s) (?=-{2,}\
w)
)
)'''
%
{
'wp'
:
word_punct
,
'lt'
:
letter
},
re
.
VERBOSE
)
del
word_punct
,
letter
)'''
%
{
'wp'
:
word_punct
,
'lt'
:
letter
,
'ws'
:
whitespace
,
'nws'
:
nowhitespace
},
re
.
VERBOSE
)
del
word_punct
,
letter
,
nowhitespace
# This less funky little regex just split on recognized spaces. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
wordsep_simple_re
=
re
.
compile
(
r'(\
s+)
')
wordsep_simple_re
=
re
.
compile
(
r'(%s+)'
%
whitespace
)
del
whitespace
# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
...
...
@@ -112,7 +112,6 @@ class TextWrapper:
r'
[
\
"
\
'
]?' # optional end-of-quote
r'
\
Z
'
) # end of chunk
def __init__(self,
width=70,
initial_indent="",
...
...
Misc/NEWS
View file @
aeacde7b
...
...
@@ -113,6 +113,9 @@ Core and Builtins
Library
-------
- Issue #20491: The textwrap.TextWrapper class now honors non-breaking spaces.
Based on patch by Kaarle Ritvanen.
- Issue #28353: os.fwalk() no longer fails on broken links.
- Issue #25464: Fixed HList.header_exists() in tkinter.tix module by addin
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment