Commit e787bce7 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #18873: IDLE, 2to3, and the findnocoding.py script now detect Python

source code encoding only in comment lines.
parent 74213e4e
......@@ -71,7 +71,7 @@ else:
encoding = encoding.lower()
coding_re = re.compile("coding[:=]\s*([-\w_.]+)")
coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
class EncodingMessage(SimpleDialog):
"Inform user that an encoding declaration is needed."
......@@ -125,11 +125,12 @@ def coding_spec(str):
Raise LookupError if the encoding is declared but unknown.
"""
# Only consider the first two lines
str = str.split("\n")[:2]
str = "\n".join(str)
match = coding_re.search(str)
if not match:
str = str.split("\n", 2)[:2]
for line in lst:
match = coding_re.match(line)
if match is not None:
break
else:
return None
name = match.group(1)
# Check whether the encoding is known
......
......@@ -236,7 +236,7 @@ class Untokenizer:
startline = False
toks_append(tokval)
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
......@@ -281,11 +281,10 @@ def detect_encoding(readline):
line_string = line.decode('ascii')
except UnicodeDecodeError:
return None
matches = cookie_re.findall(line_string)
if not matches:
match = cookie_re.match(line_string)
if not match:
return None
encoding = _get_normal_name(matches[0])
encoding = _get_normal_name(match.group(1))
try:
codec = lookup(encoding)
except LookupError:
......
#!/usr/bin/env python
print '#coding=0'
......@@ -271,6 +271,10 @@ from __future__ import print_function"""
fn = os.path.join(TEST_DATA_DIR, "different_encoding.py")
self.check_file_refactoring(fn)
def test_false_file_encoding(self):
fn = os.path.join(TEST_DATA_DIR, "false_encoding.py")
data = self.check_file_refactoring(fn)
def test_bom(self):
fn = os.path.join(TEST_DATA_DIR, "bom.py")
data = self.check_file_refactoring(fn)
......
......@@ -183,6 +183,9 @@ Library
Tools/Demos
-----------
- Issue #18873: 2to3 and the findnocoding.py script now detect Python source
code encoding only in comment lines.
- Issue #18817: Fix a resource warning in Lib/aifc.py demo.
- Issue #18439: Make patchcheck work on Windows for ACKS, NEWS.
......@@ -207,6 +210,9 @@ Build
IDLE
----
- Issue #18873: IDLE now detects Python source code encoding only in comment
lines.
- Issue #18988: The "Tab" key now works when a word is already autocompleted.
- Issue #18489: Add tests for SearchEngine. Original patch by Phil Webster.
......
......@@ -32,13 +32,13 @@ except ImportError:
"no sophisticated Python source file search will be done.")
decl_re = re.compile(r"coding[=:]\s*([-\w.]+)")
decl_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
def get_declaration(line):
match = decl_re.search(line)
match = decl_re.match(line)
if match:
return match.group(1)
return ''
return b''
def has_correct_encoding(text, codec):
try:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment