Commit 66690e7c authored by da-woods's avatar da-woods Committed by Stefan Behnel

Fix string constant folding with language_level=2 (GH-4083)

* Handle constant folding for LanguageLevel 2 on Python 3. Ensure that when StrNode is a BytesLiteral, that we don't coerce it to unicode.
* Add test for string multiplication bug. Needed to change the TreePath slightly to allow bytes-to-str comparison.

Fixes https://github.com/cython/cython/issues/3951
parent ecd9a4ba
...@@ -4251,6 +4251,7 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations): ...@@ -4251,6 +4251,7 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
string_node.unicode_value = encoded_string( string_node.unicode_value = encoded_string(
string_node.unicode_value * multiplier, string_node.unicode_value * multiplier,
string_node.unicode_value.encoding) string_node.unicode_value.encoding)
build_string = encoded_string if string_node.value.is_unicode else bytes_literal
elif isinstance(string_node, ExprNodes.UnicodeNode): elif isinstance(string_node, ExprNodes.UnicodeNode):
if string_node.bytes_value is not None: if string_node.bytes_value is not None:
string_node.bytes_value = bytes_literal( string_node.bytes_value = bytes_literal(
...@@ -4258,9 +4259,14 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations): ...@@ -4258,9 +4259,14 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
string_node.bytes_value.encoding) string_node.bytes_value.encoding)
else: else:
assert False, "unknown string node type: %s" % type(string_node) assert False, "unknown string node type: %s" % type(string_node)
string_node.constant_result = string_node.value = build_string( string_node.value = build_string(
string_node.value * multiplier, string_node.value * multiplier,
string_node.value.encoding) string_node.value.encoding)
# follow constant-folding and use unicode_value in preference
if isinstance(string_node, ExprNodes.StringNode) and string_node.unicode_value is not None:
string_node.constant_result = string_node.unicode_value
else:
string_node.constant_result = string_node.value
return string_node return string_node
def _calculate_constant_seq(self, node, sequence_node, factor): def _calculate_constant_seq(self, node, sequence_node, factor):
......
...@@ -10,6 +10,12 @@ from __future__ import absolute_import ...@@ -10,6 +10,12 @@ from __future__ import absolute_import
import re import re
import operator import operator
import sys
if sys.version_info[0] >= 3:
_unicode = str
else:
_unicode = unicode
path_tokenizer = re.compile( path_tokenizer = re.compile(
r"(" r"("
...@@ -167,6 +173,11 @@ def handle_attribute(next, token): ...@@ -167,6 +173,11 @@ def handle_attribute(next, token):
continue continue
if attr_value == value: if attr_value == value:
yield attr_value yield attr_value
elif (isinstance(attr_value, bytes) and isinstance(value, _unicode) and
attr_value == value.encode()):
# allow a bytes-to-string comparison too
yield attr_value
return select return select
......
...@@ -31,3 +31,15 @@ grail_long = 700 * "tomato" ...@@ -31,3 +31,15 @@ grail_long = 700 * "tomato"
uspam = u"eggs" * 4 uspam = u"eggs" * 4
ugrail = 7 * u"tomato" ugrail = 7 * u"tomato"
ugrail_long = 700 * u"tomato" ugrail_long = 700 * u"tomato"
cimport cython
@cython.test_assert_path_exists("//StringNode[@value = '-----']")
@cython.test_assert_path_exists("//StringNode[@unicode_value = '-----']")
def gh3951():
"""
Bug occurs with language_level=2 and affects StringNode.value
>>> gh3951()
'-----'
"""
return "-"*5
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment