Commit 56466486 authored by Eric V. Smith's avatar Eric V. Smith

Issue 28128: Print out better error/warning messages for invalid string escapes. Backport to 3.6.

parent 7f0514ad
...@@ -74,6 +74,11 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex( ...@@ -74,6 +74,11 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, Py_ssize_t, const char *, Py_ssize_t,
const char *); const char *);
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, Py_ssize_t,
const char *,
const char **);
/* Macro, trading safety for speed */ /* Macro, trading safety for speed */
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
......
...@@ -1486,6 +1486,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( ...@@ -1486,6 +1486,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
const char *errors /* error handling */ const char *errors /* error handling */
); );
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
chars. */
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
const char *string, /* Unicode-Escape encoded string */
Py_ssize_t length, /* size of string */
const char *errors, /* error handling */
const char **first_invalid_escape /* on return, points to first
invalid escaped char in
string. */
);
PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
PyObject *unicode /* Unicode object */ PyObject *unicode /* Unicode object */
); );
......
...@@ -31,6 +31,7 @@ import os ...@@ -31,6 +31,7 @@ import os
import sys import sys
import shutil import shutil
import tempfile import tempfile
import warnings
import unittest import unittest
...@@ -104,6 +105,19 @@ class TestLiterals(unittest.TestCase): ...@@ -104,6 +105,19 @@ class TestLiterals(unittest.TestCase):
self.assertRaises(SyntaxError, eval, r""" '\U000000' """) self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
self.assertRaises(SyntaxError, eval, r""" '\U0000000' """) self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
def test_eval_str_invalid_escape(self):
for b in range(1, 128):
if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
continue
with self.assertWarns(DeprecationWarning):
self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', category=DeprecationWarning)
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
def test_eval_str_raw(self): def test_eval_str_raw(self):
self.assertEqual(eval(""" r'x' """), 'x') self.assertEqual(eval(""" r'x' """), 'x')
self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
...@@ -130,6 +144,19 @@ class TestLiterals(unittest.TestCase): ...@@ -130,6 +144,19 @@ class TestLiterals(unittest.TestCase):
self.assertRaises(SyntaxError, eval, r""" b'\x' """) self.assertRaises(SyntaxError, eval, r""" b'\x' """)
self.assertRaises(SyntaxError, eval, r""" b'\x0' """) self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
def test_eval_bytes_invalid_escape(self):
for b in range(1, 128):
if b in b"""\n\r"'01234567\\abfnrtvx""":
continue
with self.assertWarns(DeprecationWarning):
self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', category=DeprecationWarning)
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
def test_eval_bytes_raw(self): def test_eval_bytes_raw(self):
self.assertEqual(eval(""" br'x' """), b'x') self.assertEqual(eval(""" br'x' """), b'x')
self.assertEqual(eval(""" rb'x' """), b'x') self.assertEqual(eval(""" rb'x' """), b'x')
......
...@@ -2413,13 +2413,6 @@ class UnicodeTest(string_tests.CommonTest, ...@@ -2413,13 +2413,6 @@ class UnicodeTest(string_tests.CommonTest,
support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, iter, str)
support.check_free_after_iterating(self, reversed, str) support.check_free_after_iterating(self, reversed, str)
def test_invalid_sequences(self):
for letter in string.ascii_letters + "89": # 0-7 are octal escapes
if letter in "abfnrtuvxNU":
continue
with self.assertWarns(DeprecationWarning):
eval(r"'\%s'" % letter)
class CAPITest(unittest.TestCase): class CAPITest(unittest.TestCase):
......
...@@ -10,6 +10,10 @@ What's New in Python 3.6.0 beta 3 ...@@ -10,6 +10,10 @@ What's New in Python 3.6.0 beta 3
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #28128: Deprecation warning for invalid str and byte escape
sequences now prints better information about where the error
occurs. Patch by Serhiy Storchaka and Eric Smith.
- Issue #28509: dict.update() no longer allocate unnecessary large memory. - Issue #28509: dict.update() no longer allocate unnecessary large memory.
- Issue #28426: Fixed potential crash in PyUnicode_AsDecodedObject() in debug - Issue #28426: Fixed potential crash in PyUnicode_AsDecodedObject() in debug
......
...@@ -1105,11 +1105,12 @@ _PyBytes_DecodeEscapeRecode(const char **s, const char *end, ...@@ -1105,11 +1105,12 @@ _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
return p; return p;
} }
PyObject *PyBytes_DecodeEscape(const char *s, PyObject *_PyBytes_DecodeEscape(const char *s,
Py_ssize_t len, Py_ssize_t len,
const char *errors, const char *errors,
Py_ssize_t unicode, Py_ssize_t unicode,
const char *recode_encoding) const char *recode_encoding,
const char **first_invalid_escape)
{ {
int c; int c;
char *p; char *p;
...@@ -1123,6 +1124,8 @@ PyObject *PyBytes_DecodeEscape(const char *s, ...@@ -1123,6 +1124,8 @@ PyObject *PyBytes_DecodeEscape(const char *s,
return NULL; return NULL;
writer.overallocate = 1; writer.overallocate = 1;
*first_invalid_escape = NULL;
end = s + len; end = s + len;
while (s < end) { while (s < end) {
if (*s != '\\') { if (*s != '\\') {
...@@ -1207,9 +1210,12 @@ PyObject *PyBytes_DecodeEscape(const char *s, ...@@ -1207,9 +1210,12 @@ PyObject *PyBytes_DecodeEscape(const char *s,
break; break;
default: default:
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0) if (*first_invalid_escape == NULL) {
goto failed; *first_invalid_escape = s-1; /* Back up one char, since we've
already incremented s. */
}
*p++ = '\\'; *p++ = '\\';
s--;
goto non_esc; /* an arbitrary number of unescaped goto non_esc; /* an arbitrary number of unescaped
UTF-8 bytes may follow. */ UTF-8 bytes may follow. */
} }
...@@ -1222,6 +1228,29 @@ PyObject *PyBytes_DecodeEscape(const char *s, ...@@ -1222,6 +1228,29 @@ PyObject *PyBytes_DecodeEscape(const char *s,
return NULL; return NULL;
} }
PyObject *PyBytes_DecodeEscape(const char *s,
Py_ssize_t len,
const char *errors,
Py_ssize_t unicode,
const char *recode_encoding)
{
const char* first_invalid_escape;
PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
recode_encoding,
&first_invalid_escape);
if (result == NULL)
return NULL;
if (first_invalid_escape != NULL) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
*first_invalid_escape) < 0) {
Py_DECREF(result);
return NULL;
}
}
return result;
}
/* -------------------------------------------------------------------- */ /* -------------------------------------------------------------------- */
/* object api */ /* object api */
......
...@@ -5896,9 +5896,10 @@ PyUnicode_AsUTF16String(PyObject *unicode) ...@@ -5896,9 +5896,10 @@ PyUnicode_AsUTF16String(PyObject *unicode)
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
PyObject * PyObject *
PyUnicode_DecodeUnicodeEscape(const char *s, _PyUnicode_DecodeUnicodeEscape(const char *s,
Py_ssize_t size, Py_ssize_t size,
const char *errors) const char *errors,
const char **first_invalid_escape)
{ {
const char *starts = s; const char *starts = s;
_PyUnicodeWriter writer; _PyUnicodeWriter writer;
...@@ -5906,6 +5907,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s, ...@@ -5906,6 +5907,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
PyObject *errorHandler = NULL; PyObject *errorHandler = NULL;
PyObject *exc = NULL; PyObject *exc = NULL;
// so we can remember if we've seen an invalid escape char or not
*first_invalid_escape = NULL;
if (size == 0) { if (size == 0) {
_Py_RETURN_UNICODE_EMPTY(); _Py_RETURN_UNICODE_EMPTY();
} }
...@@ -6080,9 +6084,10 @@ PyUnicode_DecodeUnicodeEscape(const char *s, ...@@ -6080,9 +6084,10 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
goto error; goto error;
default: default:
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, if (*first_invalid_escape == NULL) {
"invalid escape sequence '\\%c'", c) < 0) *first_invalid_escape = s-1; /* Back up one char, since we've
goto onError; already incremented s. */
}
WRITE_ASCII_CHAR('\\'); WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c); WRITE_CHAR(c);
continue; continue;
...@@ -6117,6 +6122,27 @@ PyUnicode_DecodeUnicodeEscape(const char *s, ...@@ -6117,6 +6122,27 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
return NULL; return NULL;
} }
PyObject *
PyUnicode_DecodeUnicodeEscape(const char *s,
Py_ssize_t size,
const char *errors)
{
const char *first_invalid_escape;
PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors,
&first_invalid_escape);
if (result == NULL)
return NULL;
if (first_invalid_escape != NULL) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
*first_invalid_escape) < 0) {
Py_DECREF(result);
return NULL;
}
}
return result;
}
/* Return a Unicode-Escape string version of the Unicode object. /* Return a Unicode-Escape string version of the Unicode object.
If quotes is true, the string is enclosed in u"" or u'' quotes as If quotes is true, the string is enclosed in u"" or u'' quotes as
......
...@@ -4113,8 +4113,34 @@ decode_utf8(struct compiling *c, const char **sPtr, const char *end) ...@@ -4113,8 +4113,34 @@ decode_utf8(struct compiling *c, const char **sPtr, const char *end)
return PyUnicode_DecodeUTF8(t, s - t, NULL); return PyUnicode_DecodeUTF8(t, s - t, NULL);
} }
static int
warn_invalid_escape_sequence(struct compiling *c, const node *n,
char first_invalid_escape_char)
{
PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
first_invalid_escape_char);
if (msg == NULL) {
return -1;
}
if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
c->c_filename, LINENO(n),
NULL, NULL) < 0 &&
PyErr_ExceptionMatches(PyExc_DeprecationWarning))
{
const char *s = PyUnicode_AsUTF8(msg);
if (s != NULL) {
ast_error(c, n, s);
}
Py_DECREF(msg);
return -1;
}
Py_DECREF(msg);
return 0;
}
static PyObject * static PyObject *
decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len) decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
size_t len)
{ {
PyObject *v, *u; PyObject *v, *u;
char *buf; char *buf;
...@@ -4167,11 +4193,41 @@ decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len) ...@@ -4167,11 +4193,41 @@ decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len)
len = p - buf; len = p - buf;
s = buf; s = buf;
v = PyUnicode_DecodeUnicodeEscape(s, len, NULL); const char *first_invalid_escape;
v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
if (v != NULL && first_invalid_escape != NULL) {
if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
/* We have not decref u before because first_invalid_escape points
inside u. */
Py_XDECREF(u);
Py_DECREF(v);
return NULL;
}
}
Py_XDECREF(u); Py_XDECREF(u);
return v; return v;
} }
static PyObject *
decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
size_t len)
{
const char *first_invalid_escape;
PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
&first_invalid_escape);
if (result == NULL)
return NULL;
if (first_invalid_escape != NULL) {
if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
Py_DECREF(result);
return NULL;
}
}
return result;
}
/* Compile this expression in to an expr_ty. Add parens around the /* Compile this expression in to an expr_ty. Add parens around the
expression, in order to allow leading spaces in the expression. */ expression, in order to allow leading spaces in the expression. */
static expr_ty static expr_ty
...@@ -4310,7 +4366,7 @@ done: ...@@ -4310,7 +4366,7 @@ done:
literal_end-literal_start, literal_end-literal_start,
NULL, NULL); NULL, NULL);
else else
*literal = decode_unicode_with_escapes(c, literal_start, *literal = decode_unicode_with_escapes(c, n, literal_start,
literal_end-literal_start); literal_end-literal_start);
if (!*literal) if (!*literal)
return -1; return -1;
...@@ -5048,12 +5104,12 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode, ...@@ -5048,12 +5104,12 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
if (*rawmode) if (*rawmode)
*result = PyBytes_FromStringAndSize(s, len); *result = PyBytes_FromStringAndSize(s, len);
else else
*result = PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL); *result = decode_bytes_with_escapes(c, n, s, len);
} else { } else {
if (*rawmode) if (*rawmode)
*result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
else else
*result = decode_unicode_with_escapes(c, s, len); *result = decode_unicode_with_escapes(c, n, s, len);
} }
return *result == NULL ? -1 : 0; return *result == NULL ? -1 : 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment