Prevent character escape sequences from being resolved in raw f-strings...

Prevent character escape sequences from being resolved in raw f-strings (fr"..."). Also fix some error reporting issues along the way. Update test_fstring.py test file from Py3.7.

Prevent character escape sequences from being resolved in raw f-strings...
Prevent character escape sequences from being resolved in raw f-strings (fr"..."). Also fix some error reporting issues along the way. Update test_fstring.py test file from Py3.7.
5703194d · Stefan Behnel · e77528ab · 5703194d · 5703194d · 5703194d
Commit 5703194d authored Sep 11, 2017 by Stefan Behnel
Hide whitespace changes
Inline Side-by-side

Showing with 62 additions and 33 deletions

CHANGES.rst CHANGES.rst +3 -0

Cython/Compiler/Parsing.py Cython/Compiler/Parsing.py +25 -21

tests/run/test_fstring.pyx tests/run/test_fstring.pyx +34 -12

No files found.
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -80,6 +80,9 @@ Bugs fixed
 * Compile time evaluations of (partially) constant f-strings could show incorrect
  results.

+* Escape sequences in raw f-strings (``fr'...'``) were resolved instead of passing
+  them through as expected.
+
 * Some ref-counting issues in buffer error handling have been resolved.

 Other changes

--- a/Cython/Compiler/Parsing.py
+++ b/Cython/Compiler/Parsing.py
@@ -11,8 +11,8 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
               bytes_literal=object, StringEncoding=object,
               FileSourceDescriptor=object, lookup_unicodechar=object, unicode_category=object,
               Future=object, Options=object, error=object, warning=object,
-               Builtin=object, ModuleNode=object, Utils=object,
-               re=object, sys=object, _parse_escape_sequences=object, _unicode=object, _bytes=object,
+               Builtin=object, ModuleNode=object, Utils=object, _unicode=object, _bytes=object,
+               re=object, sys=object, _parse_escape_sequences=object, _parse_escape_sequences_raw=object,
               partial=object, reduce=object, _IS_PY3=cython.bint, _IS_2BYTE_UNICODE=cython.bint)

 from io import StringIO
@@ -1013,22 +1013,25 @@ def _append_escape_sequence(kind, builder, escape_sequence, s):
        builder.append(escape_sequence)


-_parse_escape_sequences = re.compile(
+_parse_escape_sequences_raw, _parse_escape_sequences = [re.compile((
    # escape sequences:
-    br'(\\(?:'
-    br'[\\abfnrtv"\'{]|'
-    br'[0-7]{2,3}|'
-    br'N\{[^}]*\}|'
-    br'x[0-9a-fA-F]{2}|'
-    br'u[0-9a-fA-F]{4}|'
-    br'U[0-9a-fA-F]{8}|'
-    br'[NuU]|'  # detect invalid escape sequences that do not match above
+    br'(\\(?:' +
+    (br'\\?' if is_raw else (
+        br'[\\abfnrtv"\'{]|'
+        br'[0-7]{2,3}|'
+        br'N\{[^}]*\}|'
+        br'x[0-9a-fA-F]{2}|'
+        br'u[0-9a-fA-F]{4}|'
+        br'U[0-9a-fA-F]{8}|'
+        br'[NxuU]|'  # detect invalid escape sequences that do not match above
+    )) +
    br')?|'
    # non-escape sequences:
    br'\{\{?|'
    br'\}\}?|'
-    br'[^\\{}]+)'.decode('us-ascii')
-).match
+    br'[^\\{}]+)'
+    ).decode('us-ascii')).match
+    for is_raw in (True, False)]


 def p_f_string(s, unicode_value, pos, is_raw):
@@ -1038,13 +1041,15 @@ def p_f_string(s, unicode_value, pos, is_raw):
    next_start = 0
    size = len(unicode_value)
    builder = StringEncoding.UnicodeLiteralBuilder()
+    error_pos = list(pos)  # [src, line, column]
+    _parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences

    while next_start < size:
        end = next_start
-        match = _parse_escape_sequences(unicode_value, next_start)
+        error_pos[2] = pos[2] + end  # FIXME: handle newlines in string
+        match = _parse_seq(unicode_value, next_start)
        if match is None:
-            error_pos = (pos[0], pos[1] + end, pos[2])  # FIXME: handle newlines in string
-            error(error_pos, "Invalid escape sequence")
+            error(tuple(error_pos), "Invalid escape sequence")

        next_start = match.end()
        part = match.group()
@@ -1068,8 +1073,7 @@ def p_f_string(s, unicode_value, pos, is_raw):
            if part == '}}':
                builder.append('}')
            else:
-                error_pos = (pos[0], pos[1] + end, pos[2])  # FIXME: handle newlines in string
-                s.error("f-string: single '}' is not allowed", pos=error_pos)
+                s.error("f-string: single '}' is not allowed", pos=tuple(error_pos))
        else:
            builder.append(part)

@@ -1134,12 +1138,12 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
    expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2)  # TODO: find exact code position (concat, multi-line, ...)

    if not expr_str.strip():
-        error(pos, "empty expression not allowed in f-string")
+        error(expr_pos, "empty expression not allowed in f-string")

    if terminal_char == '!':
        i += 1
        if i + 2 > size:
-            error(pos, "invalid conversion char at end of string")
+            error(expr_pos, "invalid conversion char at end of string")
        else:
            conversion_char = unicode_value[i]
            i += 1
@@ -1152,7 +1156,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
        start_format_spec = i + 1
        while True:
            if i >= size:
-                s.error("missing '}' in format specifier")
+                s.error("missing '}' in format specifier", pos=expr_pos)
            c = unicode_value[i]
            if not in_triple_quotes and not in_string:
                if c == '{':

--- a/tests/run/test_fstring.pyx
+++ b/tests/run/test_fstring.pyx
@@ -35,9 +35,10 @@ class TestCase(CythonTest):
            if exception_type is SyntaxError:
                try:
                    self.fragment(str)
-                    assert held_errors(), "Invalid Cython code failed to raise SyntaxError: %s" % str
                except CompileError:
                    assert True
+                else:
+                    assert held_errors(), "Invalid Cython code failed to raise SyntaxError: %r" % str
                finally:
                    release_errors(ignore=True)
            else:
@@ -46,7 +47,7 @@ class TestCase(CythonTest):
                except exception_type:
                    assert True
                else:
-                    assert False, "Invalid Cython code failed to raise %s: %s" % (exception_type, str)
+                    assert False, "Invalid Cython code failed to raise %s: %r" % (exception_type, str)
                finally:
                    if error_stack:
                        release_errors(ignore=True)
@@ -141,18 +142,9 @@ f'{a * x()}'"""
        self.assertTrue(g.__doc__ is None)

    def __test_literal_eval(self):
-        # With no expressions, an f-string is okay.
-        self.assertEqual(ast.literal_eval("f'x'"), 'x')
-        self.assertEqual(ast.literal_eval("f'x' 'y'"), 'xy')
-
-        # But this should raise an error.
        with self.assertRaisesRegex(ValueError, 'malformed node or string'):
            ast.literal_eval("f'x'")

-        # As should this, which uses a different ast node
-        with self.assertRaisesRegex(ValueError, 'malformed node or string'):
-            ast.literal_eval("f'{3}'")
-
    def __test_ast_compile_time_concat(self):
        x = ['']

@@ -354,6 +346,10 @@ f'{a * x()}'"""
                             "f'{10:{ }}'",
                             "f' { } '",

+                             # The Python parser ignores also the following
+                             # whitespace characters in additional to a space.
+                             "f'''{\t\f\r\n}'''",
+
                             # Catch the empty expression before the
                             #  invalid conversion.
                             "f'{!x}'",
@@ -374,6 +370,12 @@ f'{a * x()}'"""
                             "f'{:x'",
                             ])

+        # Different error message is raised for other whitespace characters.
+        self.assertAllRaise(SyntaxError, 'invalid character in identifier',
+                            ["f'''{\xa0}'''",
+                             #"\xa0",
+                             ])
+
    def test_parens_in_expressions(self):
        self.assertEqual(f'{3,}', '(3,)')

@@ -435,6 +437,20 @@ f'{a * x()}'"""
        self.assertEqual(f'2\x203', '2 3')
        self.assertEqual(f'\x203', ' 3')

+        #with self.assertWarns(DeprecationWarning):  # invalid escape sequence
+        #    value = cy_eval(r"f'\{6*7}'")
+        #self.assertEqual(value, '\\42')
+        self.assertEqual(f'\\{6*7}', '\\42')
+        self.assertEqual(fr'\{6*7}', '\\42')
+
+        AMPERSAND = 'spam'
+        # Get the right unicode character (&), or pick up local variable
+        # depending on the number of backslashes.
+        self.assertEqual(f'\N{AMPERSAND}', '&')
+        self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
+        self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
+        self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
+
    def test_misformed_unicode_character_name(self):
        # These test are needed because unicode names are parsed
        # differently inside f-strings.
@@ -808,7 +824,8 @@ f'{a * x()}'"""

    def test_errors(self):
        # see issue 26287
-        self.assertAllRaise((TypeError, ValueError), 'non-empty',  # TypeError in Py3.4+
+        exc = ValueError if sys.version_info < (3, 4) else TypeError
+        self.assertAllRaise(exc, 'unsupported',
                            [r"f'{(lambda: 0):x}'",
                             r"f'{(0,):x}'",
                             ])
@@ -832,6 +849,11 @@ f'{a * x()}'"""
        self.assertEqual(f'{d["foo"]}', 'bar')
        self.assertEqual(f"{d['foo']}", 'bar')

+    def __test_backslash_char(self):
+        # Check eval of a backslash followed by a control char.
+        # See bpo-30682: this used to raise an assert in pydebug mode.
+        self.assertEqual(cy_eval('f"\\\n"'), '')
+        self.assertEqual(cy_eval('f"\\\r"'), '')

 if __name__ == '__main__':
    unittest.main()