Commit a934c41c authored by Stefan Behnel's avatar Stefan Behnel

fixed source filename and Cython code embedding in C files, moved source file...

fixed source filename and Cython code embedding in C files, moved source file reading algorithm to Cython.Utils to make it available everywhere (and then use it everywhere)
parent aa420b73
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
# Pyrex - Code output module # Pyrex - Code output module
# #
import codecs
import Naming import Naming
import Options import Options
from Cython.Utils import open_new_file from Cython.Utils import open_new_file, open_source_file
from PyrexTypes import py_object_type, typecast from PyrexTypes import py_object_type, typecast
from TypeSlots import method_coexist from TypeSlots import method_coexist
...@@ -85,23 +86,24 @@ class CCodeWriter: ...@@ -85,23 +86,24 @@ class CCodeWriter:
def indent(self): def indent(self):
self.f.write(" " * self.level) self.f.write(" " * self.level)
def get_py_version_hex(self, pyversion):
return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
def file_contents(self, file): def file_contents(self, file):
try: try:
return self.input_file_contents[file] return self.input_file_contents[file]
except KeyError: except KeyError:
F = [line.replace('*/', '*[inserted by cython to avoid comment closer]/') F = [line.encode('ASCII', 'replace').replace(
for line in open(file).readlines()] '*/', '*[inserted by cython to avoid comment closer]/')
for line in open_source_file(file)]
self.input_file_contents[file] = F self.input_file_contents[file] = F
return F return F
def get_py_version_hex(self, pyversion):
return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
def mark_pos(self, pos): def mark_pos(self, pos):
if pos is None: if pos is None:
return return
file, line, col = pos filename, line, col = pos
contents = self.file_contents(file) contents = self.file_contents(filename)
context = '' context = ''
for i in range(max(0,line-3), min(line+2, len(contents))): for i in range(max(0,line-3), min(line+2, len(contents))):
...@@ -109,8 +111,8 @@ class CCodeWriter: ...@@ -109,8 +111,8 @@ class CCodeWriter:
if i+1 == line: # line numbers in pyrex start counting up from 1 if i+1 == line: # line numbers in pyrex start counting up from 1
s = s.rstrip() + ' # <<<<<<<<<<<<<< ' + '\n' s = s.rstrip() + ' # <<<<<<<<<<<<<< ' + '\n'
context += " * " + s context += " * " + s
marker = '"%s":%s\n%s' % (file, line, context) marker = '"%s":%d\n%s' % (filename.encode('ASCII', 'replace'), line, context)
if self.last_marker != marker: if self.last_marker != marker:
self.marker = marker self.marker = marker
......
...@@ -137,28 +137,20 @@ class Context: ...@@ -137,28 +137,20 @@ class Context:
self.modules[name] = scope self.modules[name] = scope
return scope return scope
match_file_encoding = re.compile("coding[:=]\s*([-\w.]+)").search
def detect_file_encoding(self, source_filename):
# PEPs 263 and 3120
f = codecs.open(source_filename, "rU", encoding="UTF-8")
try:
for line_no, line in enumerate(f):
encoding = self.match_file_encoding(line)
if encoding:
return encoding.group(1)
if line_no == 1:
break
finally:
f.close()
return "UTF-8"
def parse(self, source_filename, type_names, pxd, full_module_name): def parse(self, source_filename, type_names, pxd, full_module_name):
# Parse the given source file and return a parse tree. # Parse the given source file and return a parse tree.
encoding = self.detect_file_encoding(source_filename) f = Utils.open_source_file(source_filename, "rU")
f = codecs.open(source_filename, "rU", encoding=encoding)
s = PyrexScanner(f, source_filename, source_encoding = encoding, if isinstance(source_filename, unicode):
type_names = type_names, context = self) name = source_filename
else:
filename_encoding = sys.getfilesystemencoding()
if filename_encoding is None:
filename_encoding = getdefaultencoding()
name = source_filename.decode(filename_encoding)
s = PyrexScanner(f, name, source_encoding = f.encoding,
type_names = type_names, context = self)
try: try:
tree = Parsing.p_module(s, pxd, full_module_name) tree = Parsing.p_module(s, pxd, full_module_name)
finally: finally:
......
...@@ -37,7 +37,31 @@ def relative_position(pos): ...@@ -37,7 +37,31 @@ def relative_position(pos):
AUTHOR: William Stein AUTHOR: William Stein
""" """
return (pos[0][absolute_path_length+1:], pos[1]) return (pos[0][absolute_path_length+1:], pos[1])
def embed_position(pos, docstring):
if not Options.embed_pos_in_docstring:
return docstring
pos_line = u'File: %s (starting at line %s)' % relative_position(self.pos)
if docstring is None:
# unicode string
return ExprNodes.EncodedString(pos_line)
# make sure we can encode the filename in the docstring encoding
# otherwise make the docstring a unicode string
encoding = docstring.encoding
if encoding is not None:
try:
encoded_bytes = pos_line.encode(encoding)
except UnicodeEncodeError:
encoding = None
if not docstring:
# reuse the string encoding of the original docstring
doc = ExprNodes.EncodedString(pos_line)
else:
doc = ExprNodes.EncodedString(pos_line + u'\\n' + docstring)
doc.encoding = encoding
return doc
class AttributeAccessor: class AttributeAccessor:
"""Used as the result of the Node.get_children_accessors() generator""" """Used as the result of the Node.get_children_accessors() generator"""
...@@ -1357,20 +1381,12 @@ class DefNode(FuncDefNode): ...@@ -1357,20 +1381,12 @@ class DefNode(FuncDefNode):
Naming.pyfunc_prefix + prefix + name Naming.pyfunc_prefix + prefix + name
entry.pymethdef_cname = \ entry.pymethdef_cname = \
Naming.pymethdef_prefix + prefix + name Naming.pymethdef_prefix + prefix + name
if not Options.docstrings: if Options.docstrings:
entry.doc = None entry.doc = embed_position(self.pos, self.doc)
else:
if Options.embed_pos_in_docstring:
doc = u'File: %s (starting at line %s)'%relative_position(self.pos)
if not self.doc is None:
doc = doc + u'\\n' + self.doc
doc = ExprNodes.EncodedString(doc)
doc.encoding = self.doc.encoding
entry.doc = doc
else:
entry.doc = self.doc
entry.doc_cname = \ entry.doc_cname = \
Naming.funcdoc_prefix + prefix + name Naming.funcdoc_prefix + prefix + name
else:
entry.doc = None
def declare_arguments(self, env): def declare_arguments(self, env):
for arg in self.args: for arg in self.args:
...@@ -1922,10 +1938,7 @@ class PyClassDefNode(StatNode, BlockNode): ...@@ -1922,10 +1938,7 @@ class PyClassDefNode(StatNode, BlockNode):
import ExprNodes import ExprNodes
self.dict = ExprNodes.DictNode(pos, key_value_pairs = []) self.dict = ExprNodes.DictNode(pos, key_value_pairs = [])
if self.doc and Options.docstrings: if self.doc and Options.docstrings:
if Options.embed_pos_in_docstring: doc = embed_position(self.pos, self.doc)
doc = u'File: %s (starting at line %s)'%relative_position(self.pos)
doc = ExprNodes.EncodedString(doc + 'u\\n' + self.doc)
doc.encoding = self.doc.encoding
doc_node = ExprNodes.StringNode(pos, value = doc) doc_node = ExprNodes.StringNode(pos, value = doc)
else: else:
doc_node = None doc_node = None
...@@ -2036,13 +2049,9 @@ class CClassDefNode(StatNode, BlockNode): ...@@ -2036,13 +2049,9 @@ class CClassDefNode(StatNode, BlockNode):
typedef_flag = self.typedef_flag, typedef_flag = self.typedef_flag,
api = self.api) api = self.api)
scope = self.entry.type.scope scope = self.entry.type.scope
if self.doc and Options.docstrings: if self.doc and Options.docstrings:
if Options.embed_pos_in_docstring: scope.doc = embed_position(self.pos, self.doc)
scope.doc = 'File: %s (starting at line %s)'%relative_position(self.pos)
scope.doc = scope.doc + '\\n' + self.doc
else:
scope.doc = self.doc
if has_body: if has_body:
self.body.analyse_declarations(scope) self.body.analyse_declarations(scope)
......
...@@ -10,6 +10,7 @@ import Nodes ...@@ -10,6 +10,7 @@ import Nodes
import ExprNodes import ExprNodes
from ModuleNode import ModuleNode from ModuleNode import ModuleNode
from Errors import error, InternalError from Errors import error, InternalError
from Cython import Utils
def p_ident(s, message = "Expected an identifier"): def p_ident(s, message = "Expected an identifier"):
if s.sy == 'IDENT': if s.sy == 'IDENT':
...@@ -1178,9 +1179,8 @@ def p_include_statement(s, level): ...@@ -1178,9 +1179,8 @@ def p_include_statement(s, level):
if s.compile_time_eval: if s.compile_time_eval:
include_file_path = s.context.find_include_file(include_file_name, pos) include_file_path = s.context.find_include_file(include_file_name, pos)
if include_file_path: if include_file_path:
encoding = s.context.detect_file_encoding(include_file_path) f = Utils.open_source_file(include_file_path, mode="rU")
f = codecs.open(include_file_path, "rU", encoding=encoding) s2 = PyrexScanner(f, include_file_path, s, source_encoding=f.encoding)
s2 = PyrexScanner(f, include_file_path, s, source_encoding=encoding)
try: try:
tree = p_statement_list(s2, level) tree = p_statement_list(s2, level)
finally: finally:
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# anywhere else in particular # anywhere else in particular
# #
import os, sys import os, sys, re, codecs
def replace_suffix(path, newsuf): def replace_suffix(path, newsuf):
base, _ = os.path.splitext(path) base, _ = os.path.splitext(path)
...@@ -32,3 +32,25 @@ def castrate_file(path, st): ...@@ -32,3 +32,25 @@ def castrate_file(path, st):
f.close() f.close()
if st: if st:
os.utime(path, (st.st_atime, st.st_mtime)) os.utime(path, (st.st_atime, st.st_mtime))
# support for source file encoding detection and unicode decoding
_match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search
def detect_file_encoding(source_filename):
# PEPs 263 and 3120
f = codecs.open(source_filename, "rU", encoding="UTF-8")
try:
for line_no, line in enumerate(f):
encoding = _match_file_encoding(line)
if encoding:
return encoding.group(1)
if line_no == 1:
break
finally:
f.close()
return "UTF-8"
def open_source_file(source_filename, mode="rU"):
encoding = detect_file_encoding(source_filename)
return codecs.open(source_filename, mode=mode, encoding=encoding)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment