Commit e787bce7 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #18873: IDLE, 2to3, and the findnocoding.py script now detect Python

source code encoding only in comment lines.
parent 74213e4e
...@@ -71,7 +71,7 @@ else: ...@@ -71,7 +71,7 @@ else:
encoding = encoding.lower() encoding = encoding.lower()
coding_re = re.compile("coding[:=]\s*([-\w_.]+)") coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
class EncodingMessage(SimpleDialog): class EncodingMessage(SimpleDialog):
"Inform user that an encoding declaration is needed." "Inform user that an encoding declaration is needed."
...@@ -125,11 +125,12 @@ def coding_spec(str): ...@@ -125,11 +125,12 @@ def coding_spec(str):
Raise LookupError if the encoding is declared but unknown. Raise LookupError if the encoding is declared but unknown.
""" """
# Only consider the first two lines # Only consider the first two lines
str = str.split("\n")[:2] str = str.split("\n", 2)[:2]
str = "\n".join(str) for line in lst:
match = coding_re.match(line)
match = coding_re.search(str) if match is not None:
if not match: break
else:
return None return None
name = match.group(1) name = match.group(1)
# Check whether the encoding is known # Check whether the encoding is known
......
...@@ -236,7 +236,7 @@ class Untokenizer: ...@@ -236,7 +236,7 @@ class Untokenizer:
startline = False startline = False
toks_append(tokval) toks_append(tokval)
cookie_re = re.compile("coding[:=]\s*([-\w.]+)") cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
def _get_normal_name(orig_enc): def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c.""" """Imitates get_normal_name in tokenizer.c."""
...@@ -281,11 +281,10 @@ def detect_encoding(readline): ...@@ -281,11 +281,10 @@ def detect_encoding(readline):
line_string = line.decode('ascii') line_string = line.decode('ascii')
except UnicodeDecodeError: except UnicodeDecodeError:
return None return None
match = cookie_re.match(line_string)
matches = cookie_re.findall(line_string) if not match:
if not matches:
return None return None
encoding = _get_normal_name(matches[0]) encoding = _get_normal_name(match.group(1))
try: try:
codec = lookup(encoding) codec = lookup(encoding)
except LookupError: except LookupError:
......
#!/usr/bin/env python
print '#coding=0'
...@@ -271,6 +271,10 @@ from __future__ import print_function""" ...@@ -271,6 +271,10 @@ from __future__ import print_function"""
fn = os.path.join(TEST_DATA_DIR, "different_encoding.py") fn = os.path.join(TEST_DATA_DIR, "different_encoding.py")
self.check_file_refactoring(fn) self.check_file_refactoring(fn)
def test_false_file_encoding(self):
fn = os.path.join(TEST_DATA_DIR, "false_encoding.py")
data = self.check_file_refactoring(fn)
def test_bom(self): def test_bom(self):
fn = os.path.join(TEST_DATA_DIR, "bom.py") fn = os.path.join(TEST_DATA_DIR, "bom.py")
data = self.check_file_refactoring(fn) data = self.check_file_refactoring(fn)
......
...@@ -183,6 +183,9 @@ Library ...@@ -183,6 +183,9 @@ Library
Tools/Demos Tools/Demos
----------- -----------
- Issue #18873: 2to3 and the findnocoding.py script now detect Python source
code encoding only in comment lines.
- Issue #18817: Fix a resource warning in Lib/aifc.py demo. - Issue #18817: Fix a resource warning in Lib/aifc.py demo.
- Issue #18439: Make patchcheck work on Windows for ACKS, NEWS. - Issue #18439: Make patchcheck work on Windows for ACKS, NEWS.
...@@ -207,6 +210,9 @@ Build ...@@ -207,6 +210,9 @@ Build
IDLE IDLE
---- ----
- Issue #18873: IDLE now detects Python source code encoding only in comment
lines.
- Issue #18988: The "Tab" key now works when a word is already autocompleted. - Issue #18988: The "Tab" key now works when a word is already autocompleted.
- Issue #18489: Add tests for SearchEngine. Original patch by Phil Webster. - Issue #18489: Add tests for SearchEngine. Original patch by Phil Webster.
......
...@@ -32,13 +32,13 @@ except ImportError: ...@@ -32,13 +32,13 @@ except ImportError:
"no sophisticated Python source file search will be done.") "no sophisticated Python source file search will be done.")
decl_re = re.compile(r"coding[=:]\s*([-\w.]+)") decl_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
def get_declaration(line): def get_declaration(line):
match = decl_re.search(line) match = decl_re.match(line)
if match: if match:
return match.group(1) return match.group(1)
return '' return b''
def has_correct_encoding(text, codec): def has_correct_encoding(text, codec):
try: try:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment