Commit 8d5c0b8c authored by Meador Inge's avatar Meador Inge

Issue #15054: Fix incorrect tokenization of 'b' string literals.

Patch by Serhiy Storchaka.
parent 2d9db1df
......@@ -289,6 +289,82 @@ String literals
OP '+' (1, 29) (1, 30)
STRING 'R"ABC"' (1, 31) (1, 37)
>>> dump_tokens("u'abc' + U'abc'")
ENCODING 'utf-8' (0, 0) (0, 0)
STRING "u'abc'" (1, 0) (1, 6)
OP '+' (1, 7) (1, 8)
STRING "U'abc'" (1, 9) (1, 15)
>>> dump_tokens('u"abc" + U"abc"')
ENCODING 'utf-8' (0, 0) (0, 0)
STRING 'u"abc"' (1, 0) (1, 6)
OP '+' (1, 7) (1, 8)
STRING 'U"abc"' (1, 9) (1, 15)
>>> dump_tokens("ur'abc' + uR'abc' + Ur'abc' + UR'abc'")
ENCODING 'utf-8' (0, 0) (0, 0)
STRING "ur'abc'" (1, 0) (1, 7)
OP '+' (1, 8) (1, 9)
STRING "uR'abc'" (1, 10) (1, 17)
OP '+' (1, 18) (1, 19)
STRING "Ur'abc'" (1, 20) (1, 27)
OP '+' (1, 28) (1, 29)
STRING "UR'abc'" (1, 30) (1, 37)
>>> dump_tokens('ur"abc" + uR"abc" + Ur"abc" + UR"abc"')
ENCODING 'utf-8' (0, 0) (0, 0)
STRING 'ur"abc"' (1, 0) (1, 7)
OP '+' (1, 8) (1, 9)
STRING 'uR"abc"' (1, 10) (1, 17)
OP '+' (1, 18) (1, 19)
STRING 'Ur"abc"' (1, 20) (1, 27)
OP '+' (1, 28) (1, 29)
STRING 'UR"abc"' (1, 30) (1, 37)
>>> dump_tokens("b'abc' + B'abc'")
ENCODING 'utf-8' (0, 0) (0, 0)
STRING "b'abc'" (1, 0) (1, 6)
OP '+' (1, 7) (1, 8)
STRING "B'abc'" (1, 9) (1, 15)
>>> dump_tokens('b"abc" + B"abc"')
ENCODING 'utf-8' (0, 0) (0, 0)
STRING 'b"abc"' (1, 0) (1, 6)
OP '+' (1, 7) (1, 8)
STRING 'B"abc"' (1, 9) (1, 15)
>>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")
ENCODING 'utf-8' (0, 0) (0, 0)
STRING "br'abc'" (1, 0) (1, 7)
OP '+' (1, 8) (1, 9)
STRING "bR'abc'" (1, 10) (1, 17)
OP '+' (1, 18) (1, 19)
STRING "Br'abc'" (1, 20) (1, 27)
OP '+' (1, 28) (1, 29)
STRING "BR'abc'" (1, 30) (1, 37)
>>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')
ENCODING 'utf-8' (0, 0) (0, 0)
STRING 'br"abc"' (1, 0) (1, 7)
OP '+' (1, 8) (1, 9)
STRING 'bR"abc"' (1, 10) (1, 17)
OP '+' (1, 18) (1, 19)
STRING 'Br"abc"' (1, 20) (1, 27)
OP '+' (1, 28) (1, 29)
STRING 'BR"abc"' (1, 30) (1, 37)
>>> dump_tokens("rb'abc' + rB'abc' + Rb'abc' + RB'abc'")
ENCODING 'utf-8' (0, 0) (0, 0)
STRING "rb'abc'" (1, 0) (1, 7)
OP '+' (1, 8) (1, 9)
STRING "rB'abc'" (1, 10) (1, 17)
OP '+' (1, 18) (1, 19)
STRING "Rb'abc'" (1, 20) (1, 27)
OP '+' (1, 28) (1, 29)
STRING "RB'abc'" (1, 30) (1, 37)
>>> dump_tokens('rb"abc" + rB"abc" + Rb"abc" + RB"abc"')
ENCODING 'utf-8' (0, 0) (0, 0)
STRING 'rb"abc"' (1, 0) (1, 7)
OP '+' (1, 8) (1, 9)
STRING 'rB"abc"' (1, 10) (1, 17)
OP '+' (1, 18) (1, 19)
STRING 'Rb"abc"' (1, 20) (1, 27)
OP '+' (1, 28) (1, 29)
STRING 'RB"abc"' (1, 30) (1, 37)
Operators
>>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
......
......@@ -127,7 +127,7 @@ Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
Number = group(Imagnumber, Floatnumber, Intnumber)
StringPrefix = r'(?:[uU][rR]?|[bB][rR]|[rR][bB]|[rR]|[uU])?'
StringPrefix = r'(?:[uUbB][rR]?|[rR][bB]?)?'
# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
......
......@@ -27,6 +27,10 @@ Core and Builtins
Library
-------
- Issue #15054: A bug in tokenize.tokenize that caused string literals
with 'b' prefixes to be incorrectly tokenized has been fixed.
Patch by Serhiy Storchaka.
- Issue #15006: Allow equality comparison between naive and aware
time or datetime objects.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment