Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
56624a99
Commit
56624a99
authored
Jun 02, 2019
by
Evan
Committed by
Vinay Sajip
Jun 01, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071)
parent
2b843ac0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
61 additions
and
23 deletions
+61
-23
Doc/library/shlex.rst
Doc/library/shlex.rst
+23
-12
Lib/shlex.py
Lib/shlex.py
+2
-1
Lib/test/test_shlex.py
Lib/test/test_shlex.py
+36
-10
No files found.
Doc/library/shlex.rst
View file @
56624a99
...
...
@@ -225,7 +225,8 @@ variables which either control lexical analysis or can be used for debugging:
appear in filename specifications and command line parameters, will also be
included in this attribute, and any characters which appear in
``punctuation_chars`` will be removed from ``wordchars`` if they are present
there.
there. If :attr:`whitespace_split` is set to ``True``, this will have no
effect.
.. attribute:: shlex.whitespace
...
...
@@ -258,11 +259,13 @@ variables which either control lexical analysis or can be used for debugging:
If ``True``, tokens will only be split in whitespaces. This is useful, for
example, for parsing command lines with :class:`~shlex.shlex`, getting
tokens in a similar way to shell arguments. If this attribute is ``True``,
:attr:`punctuation_chars` will have no effect, and splitting will happen
only on whitespaces. When using :attr:`punctuation_chars`, which is
intended to provide parsing closer to that implemented by shells, it is
advisable to leave ``whitespace_split`` as ``False`` (the default value).
tokens in a similar way to shell arguments. When used in combination with
:attr:`punctuation_chars`, tokens will be split on whitespace in addition to
those characters.
.. versionchanged:: 3.8
The :attr:`punctuation_chars` attribute was made compatible with the
:attr:`whitespace_split` attribute.
.. attribute:: shlex.infile
...
...
@@ -398,12 +401,15 @@ otherwise. To illustrate, you can see the difference in the following snippet:
>>> import shlex
>>> text = "a && b; c && d || e; f >'abc'; (def \"ghi\")"
>>> list(shlex.shlex(text))
['a', '&', '&', 'b', ';', 'c', '&', '&', 'd', '|', '|', 'e', ';', 'f', '>',
"'abc'", ';', '(', 'def', '"ghi"', ')']
>>> list(shlex.shlex(text, punctuation_chars=True))
['a', '&&', 'b', ';', 'c', '&&', 'd', '||', 'e', ';', 'f', '>', "'abc'",
';', '(', 'def', '"ghi"', ')']
>>> s = shlex.shlex(text, posix=True)
>>> s.whitespace_split = True
>>> list(s)
['a', '&&', 'b;', 'c', '&&', 'd', '||', 'e;', 'f', '>abc;', '(def', 'ghi)']
>>> s = shlex.shlex(text, posix=True, punctuation_chars=True)
>>> s.whitespace_split = True
>>> list(s)
['a', '&&', 'b', ';', 'c', '&&', 'd', '||', 'e', ';', 'f', '>', 'abc', ';',
'(', 'def', 'ghi', ')']
Of course, tokens will be returned which are not valid for shells, and you'll
need to implement your own error checks on the returned tokens.
...
...
@@ -428,6 +434,11 @@ which characters constitute punctuation. For example::
>>> list(s)
['~/a', '&&', 'b-c', '--color=auto', '||', 'd', '*.py?']
However, to match the shell as closely as possible, it is recommended to
always use ``posix`` and :attr:`~shlex.whitespace_split` when using
:attr:`~shlex.punctuation_chars`, which will negate
:attr:`~shlex.wordchars` entirely.
For best effect, ``punctuation_chars`` should be set in conjunction with
``posix=True``. (Note that ``posix=False`` is the default for
:class:`~shlex.shlex`.)
Lib/shlex.py
View file @
56624a99
...
...
@@ -246,7 +246,8 @@ class shlex:
escapedstate
=
'a'
self
.
state
=
nextchar
elif
(
nextchar
in
self
.
wordchars
or
nextchar
in
self
.
quotes
or
self
.
whitespace_split
):
or
(
self
.
whitespace_split
and
nextchar
not
in
self
.
punctuation_chars
)):
self
.
token
+=
nextchar
else
:
if
self
.
punctuation_chars
:
...
...
Lib/test/test_shlex.py
View file @
56624a99
import
io
import
itertools
import
shlex
import
string
import
unittest
...
...
@@ -183,10 +184,12 @@ class ShlexTest(unittest.TestCase):
src
=
[
'echo hi %s echo bye'
%
delimiter
,
'echo hi%secho bye'
%
delimiter
]
ref
=
[
'echo'
,
'hi'
,
delimiter
,
'echo'
,
'bye'
]
for
ss
in
src
:
for
ss
,
ws
in
itertools
.
product
(
src
,
(
False
,
True
))
:
s
=
shlex
.
shlex
(
ss
,
punctuation_chars
=
True
)
s
.
whitespace_split
=
ws
result
=
list
(
s
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s'"
%
ss
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s' [ws=%s]"
%
(
ss
,
ws
))
def
testSyntaxSplitSemicolon
(
self
):
"""Test handling of syntax splitting of ;"""
...
...
@@ -197,10 +200,12 @@ class ShlexTest(unittest.TestCase):
'echo hi%s echo bye'
%
delimiter
,
'echo hi%secho bye'
%
delimiter
]
ref
=
[
'echo'
,
'hi'
,
delimiter
,
'echo'
,
'bye'
]
for
ss
in
src
:
for
ss
,
ws
in
itertools
.
product
(
src
,
(
False
,
True
))
:
s
=
shlex
.
shlex
(
ss
,
punctuation_chars
=
True
)
s
.
whitespace_split
=
ws
result
=
list
(
s
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s'"
%
ss
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s' [ws=%s]"
%
(
ss
,
ws
))
def
testSyntaxSplitRedirect
(
self
):
"""Test handling of syntax splitting of >"""
...
...
@@ -211,10 +216,11 @@ class ShlexTest(unittest.TestCase):
'echo hi%s out'
%
delimiter
,
'echo hi%sout'
%
delimiter
]
ref
=
[
'echo'
,
'hi'
,
delimiter
,
'out'
]
for
ss
in
src
:
for
ss
,
ws
in
itertools
.
product
(
src
,
(
False
,
True
))
:
s
=
shlex
.
shlex
(
ss
,
punctuation_chars
=
True
)
result
=
list
(
s
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s'"
%
ss
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s' [ws=%s]"
%
(
ss
,
ws
))
def
testSyntaxSplitParen
(
self
):
"""Test handling of syntax splitting of ()"""
...
...
@@ -222,18 +228,25 @@ class ShlexTest(unittest.TestCase):
src
=
[
'( echo hi )'
,
'(echo hi)'
]
ref
=
[
'('
,
'echo'
,
'hi'
,
')'
]
for
ss
in
src
:
for
ss
,
ws
in
itertools
.
product
(
src
,
(
False
,
True
))
:
s
=
shlex
.
shlex
(
ss
,
punctuation_chars
=
True
)
s
.
whitespace_split
=
ws
result
=
list
(
s
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s'"
%
ss
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s' [ws=%s]"
%
(
ss
,
ws
))
def
testSyntaxSplitCustom
(
self
):
"""Test handling of syntax splitting with custom chars"""
ss
=
"~/a&&b-c --color=auto||d *.py?"
ref
=
[
'~/a'
,
'&'
,
'&'
,
'b-c'
,
'--color=auto'
,
'||'
,
'd'
,
'*.py?'
]
ss
=
"~/a && b-c --color=auto || d *.py?"
s
=
shlex
.
shlex
(
ss
,
punctuation_chars
=
"|"
)
result
=
list
(
s
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s'"
%
ss
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s' [ws=False]"
%
ss
)
ref
=
[
'~/a&&b-c'
,
'--color=auto'
,
'||'
,
'd'
,
'*.py?'
]
s
=
shlex
.
shlex
(
ss
,
punctuation_chars
=
"|"
)
s
.
whitespace_split
=
True
result
=
list
(
s
)
self
.
assertEqual
(
ref
,
result
,
"While splitting '%s' [ws=True]"
%
ss
)
def
testTokenTypes
(
self
):
"""Test that tokens are split with types as expected."""
...
...
@@ -293,6 +306,19 @@ class ShlexTest(unittest.TestCase):
s
=
shlex
.
shlex
(
"'')abc"
,
punctuation_chars
=
True
)
self
.
assertEqual
(
list
(
s
),
expected
)
def
testUnicodeHandling
(
self
):
"""Test punctuation_chars and whitespace_split handle unicode."""
ss
=
"
\
u2119
\
u01b4
\
u2602
\
u210c
\
u00f8
\
u1f24
"
# Should be parsed as one complete token (whitespace_split=True).
ref
=
[
'
\
u2119
\
u01b4
\
u2602
\
u210c
\
u00f8
\
u1f24
'
]
s
=
shlex
.
shlex
(
ss
,
punctuation_chars
=
True
)
s
.
whitespace_split
=
True
self
.
assertEqual
(
list
(
s
),
ref
)
# Without whitespace_split, uses wordchars and splits on all.
ref
=
[
'
\
u2119
'
,
'
\
u01b4
'
,
'
\
u2602
'
,
'
\
u210c
'
,
'
\
u00f8
'
,
'
\
u1f24
'
]
s
=
shlex
.
shlex
(
ss
,
punctuation_chars
=
True
)
self
.
assertEqual
(
list
(
s
),
ref
)
def
testQuote
(
self
):
safeunquoted
=
string
.
ascii_letters
+
string
.
digits
+
'@%_-+=:,./'
unicode_sample
=
'
\
xe9
\
xe0
\
xdf
'
# e + acute accent, a + grave, sharp s
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment