Commit 975a0797 authored by Martin v. Löwis's avatar Martin v. Löwis

Issue #4008: Fix problems with non-ASCII source files.

parent 1cbb17a8
...@@ -74,10 +74,11 @@ def coding_spec(data): ...@@ -74,10 +74,11 @@ def coding_spec(data):
Raise a LookupError if the encoding is declared but unknown. Raise a LookupError if the encoding is declared but unknown.
""" """
if isinstance(data, bytes): if isinstance(data, bytes):
try: # This encoding might be wrong. However, the coding
lines = data.decode('utf-8') # spec must be ASCII-only, so any non-ASCII characters
except UnicodeDecodeError: # around here will be ignored. Decoding to Latin-1 should
return None # never fail (except for memory outage)
lines = data.decode('iso-8859-1')
else: else:
lines = data lines = data
# consider only the first two lines # consider only the first two lines
......
...@@ -3,6 +3,8 @@ What's New in IDLE 3.1a1? ...@@ -3,6 +3,8 @@ What's New in IDLE 3.1a1?
*Release date: XX-XXX-XXXX* *Release date: XX-XXX-XXXX*
- Issue #4008: Fix problems with non-ASCII source files.
- Issue #4323: Always encode source as UTF-8 without asking - Issue #4323: Always encode source as UTF-8 without asking
the user (unless a different encoding is declared); remove the user (unless a different encoding is declared); remove
user configuration of source encoding; all according to user configuration of source encoding; all according to
......
...@@ -24,7 +24,7 @@ import tabnanny ...@@ -24,7 +24,7 @@ import tabnanny
import tokenize import tokenize
import tkinter.messagebox as tkMessageBox import tkinter.messagebox as tkMessageBox
from idlelib.EditorWindow import EditorWindow from idlelib.EditorWindow import EditorWindow
from idlelib import PyShell from idlelib import PyShell, IOBinding
from idlelib.configHandler import idleConf from idlelib.configHandler import idleConf
...@@ -62,7 +62,13 @@ class ScriptBinding: ...@@ -62,7 +62,13 @@ class ScriptBinding:
return 'break' return 'break'
def tabnanny(self, filename): def tabnanny(self, filename):
f = open(filename, 'r') # XXX: tabnanny should work on binary files as well
with open(filename, 'r', encoding='iso-8859-1') as f:
two_lines = f.readline() + f.readline()
encoding = IOBinding.coding_spec(two_lines)
if not encoding:
encoding = 'utf-8'
f = open(filename, 'r', encoding=encoding)
try: try:
tabnanny.process_tokens(tokenize.generate_tokens(f.readline)) tabnanny.process_tokens(tokenize.generate_tokens(f.readline))
except tokenize.TokenError as msg: except tokenize.TokenError as msg:
...@@ -82,14 +88,14 @@ class ScriptBinding: ...@@ -82,14 +88,14 @@ class ScriptBinding:
self.shell = shell = self.flist.open_shell() self.shell = shell = self.flist.open_shell()
saved_stream = shell.get_warning_stream() saved_stream = shell.get_warning_stream()
shell.set_warning_stream(shell.stderr) shell.set_warning_stream(shell.stderr)
f = open(filename, 'r') f = open(filename, 'rb')
source = f.read() source = f.read()
f.close() f.close()
if '\r' in source: if b'\r' in source:
source = re.sub(r"\r\n", "\n", source) source = source.replace(b'\r\n', b'\n')
source = re.sub(r"\r", "\n", source) source = source.replace(b'\r', b'\n')
if source and source[-1] != '\n': if source and source[-1] != ord(b'\n'):
source = source + '\n' source = source + b'\n'
editwin = self.editwin editwin = self.editwin
text = editwin.text text = editwin.text
text.tag_remove("ERROR", "1.0", "end") text.tag_remove("ERROR", "1.0", "end")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment