Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
c43382f3
Commit
c43382f3
authored
Oct 25, 2016
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Plain Diff
Issue #20491: The textwrap.TextWrapper class now honors non-breaking spaces.
Based on patch by Kaarle Ritvanen.
parents
c24ac4f5
aeacde7b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
14 deletions
+47
-14
Lib/test/test_textwrap.py
Lib/test/test_textwrap.py
+31
-0
Lib/textwrap.py
Lib/textwrap.py
+13
-14
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/test/test_textwrap.py
View file @
c43382f3
...
...
@@ -444,6 +444,37 @@ What a mess!
text
=
"aa
\
xe4
\
xe4
-
\
xe4
\
xe4
"
self
.
check_wrap
(
text
,
7
,
[
"aa
\
xe4
\
xe4
-"
,
"
\
xe4
\
xe4
"
])
def
test_non_breaking_space
(
self
):
text
=
'This is a sentence with non-breaking
\
N{NO-BREAK SPACE}sp
a
ce.'
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with non-'
,
'breaking
\
N{NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
True
)
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with'
,
'non-breaking
\
N{NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
False
)
def
test_narrow_non_breaking_space
(
self
):
text
=
(
'This is a sentence with non-breaking'
'
\
N{NARROW NO-BREAK SPACE}sp
a
ce.'
)
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with non-'
,
'breaking
\
N{NARROW NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
True
)
self
.
check_wrap
(
text
,
20
,
[
'This is a sentence'
,
'with'
,
'non-breaking
\
N{NARROW NO-BREAK SPACE}sp
a
ce.'
],
break_on_hyphens
=
False
)
class
MaxLinesTestCase
(
BaseTestCase
):
text
=
"Hello there, how are you this fine day? I'm glad to hear it!"
...
...
Lib/textwrap.py
View file @
c43382f3
...
...
@@ -10,13 +10,8 @@ import re
__all__
=
[
'TextWrapper'
,
'wrap'
,
'fill'
,
'dedent'
,
'indent'
,
'shorten'
]
# Hardcode the recognized whitespace characters to the US-ASCII
# whitespace characters. The main reason for doing this is that in
# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
# that character winds up in string.whitespace. Respecting
# string.whitespace in those cases would 1) make textwrap treat 0xa0 the
# same as any other whitespace char, which is clearly wrong (it's a
# *non-breaking* space), 2) possibly cause problems with Unicode,
# since 0xa0 is not in range(128).
# whitespace characters. The main reason for doing this is that
# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
_whitespace
=
'
\
t
\
n
\
x0b
\
x0c
\
r
'
class
TextWrapper
:
...
...
@@ -81,29 +76,34 @@ class TextWrapper:
# (after stripping out empty strings).
word_punct
=
r'[\
w!
"\'&.,?]'
letter
=
r'[^\
d
\W]'
whitespace
=
r'[%s]'
%
re
.
escape
(
_whitespace
)
nowhitespace
=
'[^'
+
whitespace
[
1
:]
wordsep_re
=
re
.
compile
(
r'''
( # any whitespace
\
s+
%(ws)
s+
| # em-dash between words
(?<=%(wp)s) -{2,} (?=\
w)
| # word, possibly hyphenated
\
S
+? (?:
%(nws)s
+? (?:
# hyphenated word
-(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
(?= %(lt)s -? %(lt)s)
| # end of word
(?=
\
s|
\Z)
(?=
%(ws)
s|\
Z)
| # em-dash
(?<=%(wp)s) (?=-{2,}\
w)
)
)'''
%
{
'wp'
:
word_punct
,
'lt'
:
letter
},
re
.
VERBOSE
)
del
word_punct
,
letter
)'''
%
{
'wp'
:
word_punct
,
'lt'
:
letter
,
'ws'
:
whitespace
,
'nws'
:
nowhitespace
},
re
.
VERBOSE
)
del
word_punct
,
letter
,
nowhitespace
# This less funky little regex just split on recognized spaces. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
wordsep_simple_re
=
re
.
compile
(
r'(\
s+)
')
wordsep_simple_re
=
re
.
compile
(
r'(%s+)'
%
whitespace
)
del
whitespace
# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
...
...
@@ -112,7 +112,6 @@ class TextWrapper:
r'
[
\
"
\
'
]?' # optional end-of-quote
r'
\
Z
'
) # end of chunk
def __init__(self,
width=70,
initial_indent="",
...
...
Misc/NEWS
View file @
c43382f3
...
...
@@ -29,6 +29,9 @@ Core and Builtins
Library
-------
- Issue #20491: The textwrap.TextWrapper class now honors non-breaking spaces.
Based on patch by Kaarle Ritvanen.
- Issue #28353: os.fwalk() no longer fails on broken links.
- Issue #28430: Fix iterator of C implemented asyncio.Future doesn'
t
accept
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment