Commit 63c39fe3 authored by Martin v. Löwis's avatar Martin v. Löwis

merge 3.2: issue 14629

parents 7b17a4e1 63674f4b
......@@ -838,6 +838,16 @@ class TestDetectEncoding(TestCase):
found, consumed_lines = detect_encoding(rl)
self.assertEqual(found, "iso-8859-1")
def test_syntaxerror_latin1(self):
# Issue 14629: need to raise SyntaxError if the first
# line(s) have non-UTF-8 characters
lines = (
b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
)
readline = self.get_readline(lines)
self.assertRaises(SyntaxError, detect_encoding, readline)
def test_utf8_normalization(self):
# See get_normal_name() in tokenizer.c.
encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
......
......@@ -364,9 +364,12 @@ def detect_encoding(readline):
def find_cookie(line):
try:
line_string = line.decode('ascii')
# Decode as UTF-8. Either the line is an encoding declaration,
# in which case it should be pure ASCII, or it must be UTF-8
# per default encoding.
line_string = line.decode('utf-8')
except UnicodeDecodeError:
return None
raise SyntaxError("invalid or missing encoding declaration")
matches = cookie_re.findall(line_string)
if not matches:
......
......@@ -55,6 +55,9 @@ Core and Builtins
Library
-------
- Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the
first two lines have non-UTF-8 characters without an encoding declaration.
- Issue #14308: Fix an exception when a "dummy" thread is in the threading
module's active list after a fork().
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment