Commit 1a019507 authored by Stefan Behnel's avatar Stefan Behnel

robustness against unicode errors on encoding detection

parent 8c32ed8f
...@@ -139,8 +139,10 @@ class Context: ...@@ -139,8 +139,10 @@ class Context:
def parse(self, source_filename, type_names, pxd, full_module_name): def parse(self, source_filename, type_names, pxd, full_module_name):
# Parse the given source file and return a parse tree. # Parse the given source file and return a parse tree.
try:
f = Utils.open_source_file(source_filename, "rU") f = Utils.open_source_file(source_filename, "rU")
try:
if isinstance(source_filename, unicode): if isinstance(source_filename, unicode):
name = source_filename name = source_filename
else: else:
...@@ -149,15 +151,13 @@ class Context: ...@@ -149,15 +151,13 @@ class Context:
filename_encoding = sys.getdefaultencoding() filename_encoding = sys.getdefaultencoding()
name = source_filename.decode(filename_encoding) name = source_filename.decode(filename_encoding)
try:
try:
s = PyrexScanner(f, name, source_encoding = f.encoding, s = PyrexScanner(f, name, source_encoding = f.encoding,
type_names = type_names, context = self) type_names = type_names, context = self)
tree = Parsing.p_module(s, pxd, full_module_name) tree = Parsing.p_module(s, pxd, full_module_name)
except UnicodeDecodeError, msg:
error((name, 0, 0), "Decoding error, missing or incorrect coding=<encoding-name> at top of source (%s)" % msg)
finally: finally:
f.close() f.close()
except UnicodeDecodeError, msg:
error((source_filename, 0, 0), "Decoding error, missing or incorrect coding=<encoding-name> at top of source (%s)" % msg)
if Errors.num_errors > 0: if Errors.num_errors > 0:
raise CompileError raise CompileError
return tree return tree
......
...@@ -41,12 +41,15 @@ def detect_file_encoding(source_filename): ...@@ -41,12 +41,15 @@ def detect_file_encoding(source_filename):
# PEPs 263 and 3120 # PEPs 263 and 3120
f = codecs.open(source_filename, "rU", encoding="UTF-8") f = codecs.open(source_filename, "rU", encoding="UTF-8")
try: try:
for line_no, line in enumerate(f): chars = []
encoding = _match_file_encoding(line) for i in range(2):
c = f.read(1)
while c and c != '\n':
chars.append(c)
c = f.read(1)
encoding = _match_file_encoding(u''.join(chars))
if encoding: if encoding:
return encoding.group(1) return encoding.group(1)
if line_no == 1:
break
finally: finally:
f.close() f.close()
return "UTF-8" return "UTF-8"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment