diff --git a/Cython/Compiler/CmdLine.py b/Cython/Compiler/CmdLine.py index 1aa55d705060ec098acade5c228342e35e7572a0..d09e9ba7e5036a319ae1c3310f16840e99402de2 100644 --- a/Cython/Compiler/CmdLine.py +++ b/Cython/Compiler/CmdLine.py @@ -134,6 +134,9 @@ def parse_command_line(args): arg = pop_arg() if arg.endswith(".pyx"): sources.append(arg) + elif arg.endswith(".py"): + # maybe do some other stuff, but this should work for now + sources.append(arg) elif arg.endswith(".o"): options.objects.append(arg) else: diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py index c1f51de1036d1bf5f043cde8174bca9085c64fc0..d107248f6631e012dc80ffb4848c4bdc160b8fd2 100644 --- a/Cython/Compiler/Code.py +++ b/Cython/Compiler/Code.py @@ -2,9 +2,10 @@ # Pyrex - Code output module # +import codecs import Naming import Options -from Cython.Utils import open_new_file +from Cython.Utils import open_new_file, open_source_file from PyrexTypes import py_object_type, typecast from TypeSlots import method_coexist @@ -85,23 +86,24 @@ class CCodeWriter: def indent(self): self.f.write(" " * self.level) + def get_py_version_hex(self, pyversion): + return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4] + def file_contents(self, file): try: return self.input_file_contents[file] except KeyError: - F = [line.replace('*/', '*[inserted by cython to avoid comment closer]/') - for line in open(file).readlines()] + F = [line.encode('ASCII', 'replace').replace( + '*/', '*[inserted by cython to avoid comment closer]/') + for line in open_source_file(file)] self.input_file_contents[file] = F return F - def get_py_version_hex(self, pyversion): - return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4] - def mark_pos(self, pos): if pos is None: return - file, line, col = pos - contents = self.file_contents(file) + filename, line, col = pos + contents = self.file_contents(filename) context = '' for i in range(max(0,line-3), min(line+2, len(contents))): @@ -109,8 +111,8 @@ class CCodeWriter: if i+1 == line: # line numbers in pyrex start counting up from 1 s = s.rstrip() + ' # <<<<<<<<<<<<<< ' + '\n' context += " * " + s - - marker = '"%s":%s\n%s' % (file, line, context) + + marker = '"%s":%d\n%s' % (filename.encode('ASCII', 'replace'), line, context) if self.last_marker != marker: self.marker = marker diff --git a/Cython/Compiler/ControlFlow.py b/Cython/Compiler/ControlFlow.py index a1e811e16bdb0a3d88479241fcfac623a8a25076..e433f7d0ce5f49810d3d27425633a2b6dbaa1776 100644 --- a/Cython/Compiler/ControlFlow.py +++ b/Cython/Compiler/ControlFlow.py @@ -1,4 +1,4 @@ -import bisect +import bisect, sys # This module keeps track of arbitrary "states" at any point of the code. # A state is considered known if every path to the given point agrees on @@ -13,6 +13,8 @@ import bisect # redesigned. It doesn't take return, raise, continue, or break into # account. +_END_POS = ((unichr(sys.maxunicode)*10),()) + class ControlFlow: def __init__(self, start_pos, incoming, parent): @@ -22,7 +24,7 @@ class ControlFlow: parent = incoming.parent self.parent = parent self.tip = {} - self.end_pos = ((),) + self.end_pos = _END_POS def start_branch(self, pos): self.end_pos = pos @@ -40,10 +42,10 @@ class ControlFlow: self.parent.end_pos = pos return LinearControlFlow(pos, self.parent) - def get_state(self, item, pos=((),())): + def get_state(self, item, pos=_END_POS): return self.get_pos_state(item, pos)[1] - def get_pos_state(self, item, pos=((),())): + def get_pos_state(self, item, pos=_END_POS): # do some caching if pos > self.end_pos: try: @@ -61,13 +63,13 @@ class LinearControlFlow(ControlFlow): self.events = {} def set_state(self, pos, item, state): - if self.tip.has_key(item): + if item in self.tip: del self.tip[item] if pos < self.start_pos: if self.incoming is not None: self.incoming.set_state(pos, item, state) else: - if self.events.has_key(item): + if item in self.events: event_list = self.events[item] else: event_list = [] @@ -77,7 +79,7 @@ class LinearControlFlow(ControlFlow): def _get_pos_state(self, item, pos): if pos > self.start_pos: - if self.events.has_key(item): + if item in self.events: event_list = self.events[item] for event in event_list[::-1]: if event[0] < pos: @@ -116,7 +118,7 @@ class BranchingControlFlow(ControlFlow): def set_state(self, pos, item, state): - if self.tip.has_key(item): + if item in self.tip: del self.tip[item] if pos < self.start_pos: @@ -157,5 +159,3 @@ class BranchingControlFlow(ControlFlow): if self.incoming is not limit and self.incoming is not None: s = "%s\n%s" % (self.incoming.to_string(indent, limit=limit), s) return s - - \ No newline at end of file diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index bec4b82665f1b2c6a21575f2abed1607ed591121..209b4d95a10115cb01055ba385237d5f89e7b945 100644 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -18,6 +18,29 @@ from Cython.Debugging import print_call_chain from DebugFlags import debug_disposal_code, debug_temp_alloc, \ debug_coercion +class EncodedString(unicode): + # unicode string subclass to keep track of the original encoding. + # 'encoding' is None for unicode strings and the source encoding + # otherwise + encoding = None + + def byteencode(self): + assert self.encoding is not None + return self.encode(self.encoding) + + def utf8encode(self): + assert self.encoding is None + return self.encode("UTF-8") + + def is_unicode(self): + return self.encoding is None + is_unicode = property(is_unicode) + +# def __eq__(self, other): +# return unicode.__eq__(self, other) and \ +# getattr(other, 'encoding', '') == self.encoding + + class ExprNode(Node): # subexprs [string] Class var holding names of subexpr node attrs # type PyrexType Type of the result @@ -669,7 +692,7 @@ class IntNode(ConstNode): return str(self.value) def compile_time_value(self, denv): - return int(self.value) + return int(self.value, 0) class FloatNode(ConstNode): @@ -677,6 +700,17 @@ class FloatNode(ConstNode): def compile_time_value(self, denv): return float(self.value) + + def calculate_result_code(self): + strval = str(self.value) + if strval == 'nan': + return "(Py_HUGE_VAL * 0)" + elif strval == 'inf': + return "Py_HUGE_VAL" + elif strval == '-inf': + return "(-Py_HUGE_VAL)" + else: + return strval class StringNode(ConstNode): @@ -685,15 +719,16 @@ class StringNode(ConstNode): type = PyrexTypes.c_char_ptr_type def compile_time_value(self, denv): - return eval('"%s"' % self.value) + return self.value def analyse_types(self, env): self.entry = env.add_string_const(self.value) def coerce_to(self, dst_type, env): if dst_type.is_int: - if not self.type.is_pyobject and len(self.value) == 1: - return CharNode(self.pos, value=self.value) + if not self.type.is_pyobject and len(self.entry.init) == 1: + # we use the *encoded* value here + return CharNode(self.pos, value=self.entry.init) else: error(self.pos, "Only coerce single-character ascii strings can be used as ints.") return self @@ -776,7 +811,7 @@ class NameNode(AtomicExprNode): try: return denv.lookup(self.name) except KeyError: - error(self.pos, "Compile-time name '%s' not defined", self.name) + error(self.pos, "Compile-time name '%s' not defined" % self.name) def coerce_to(self, dst_type, env): # If coercing to a generic pyobject and this is a builtin @@ -4067,9 +4102,8 @@ bad: cpp_exception_utility_code = [ """ -static int __Pyx_CppExn2PyErr(); /*proto*/ -""",""" -void __Pyx_CppExn2PyErr() { +#ifndef __Pyx_CppExn2PyErr +static void __Pyx_CppExn2PyErr() { try { if (PyErr_Occurred()) ; // let the latest Python exn pass through and ignore the current one @@ -4086,6 +4120,7 @@ void __Pyx_CppExn2PyErr() { PyErr_SetString(PyExc_RuntimeError, "Unknown exception"); } } -"""] +#endif +""",""] #------------------------------------------------------------------------------------ diff --git a/Cython/Compiler/Main.py b/Cython/Compiler/Main.py index 5d7f4207b88da7adaa804c60348e8da5e7b116ba..ef742ed14632a47d79f2ddf030aa2ceec8b71e10 100644 --- a/Cython/Compiler/Main.py +++ b/Cython/Compiler/Main.py @@ -2,12 +2,11 @@ # Cython Top Level # -import os, sys, re +import os, sys, re, codecs if sys.version_info[:2] < (2, 2): print >>sys.stderr, "Sorry, Cython requires Python 2.2 or later" sys.exit(1) -import os from time import time import Version from Scanning import PyrexScanner @@ -140,9 +139,18 @@ class Context: def parse(self, source_filename, type_names, pxd, full_module_name): # Parse the given source file and return a parse tree. - f = open(source_filename, "rU") - s = PyrexScanner(f, source_filename, - type_names = type_names, context = self) + f = Utils.open_source_file(source_filename, "rU") + + if isinstance(source_filename, unicode): + name = source_filename + else: + filename_encoding = sys.getfilesystemencoding() + if filename_encoding is None: + filename_encoding = getdefaultencoding() + name = source_filename.decode(filename_encoding) + + s = PyrexScanner(f, name, source_encoding = f.encoding, + type_names = type_names, context = self) try: tree = Parsing.p_module(s, pxd, full_module_name) finally: diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py index 98283319a93ae3776e237fe5d88db3f91a166bdc..09d3814d4a68a1e798314b1ee877946d520a1869 100644 --- a/Cython/Compiler/ModuleNode.py +++ b/Cython/Compiler/ModuleNode.py @@ -793,13 +793,14 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): "static void %s(PyObject *o) {" % scope.mangle_internal("tp_dealloc")) py_attrs = [] + weakref_slot = scope.lookup_here("__weakref__") for entry in scope.var_entries: - if entry.type.is_pyobject and entry.name != "__weakref__": + if entry.type.is_pyobject and entry is not weakref_slot: py_attrs.append(entry) - if py_attrs or scope.lookup_here("__weakref__"): + if py_attrs or weakref_slot in scope.var_entries: self.generate_self_cast(scope, code) self.generate_usr_dealloc_call(scope, code) - if scope.lookup_here("__weakref__"): + if weakref_slot in scope.var_entries: code.putln("if (p->__weakref__) PyObject_ClearWeakRefs(o);") for entry in py_attrs: code.put_xdecref("p->%s" % entry.cname, entry.type) @@ -1377,7 +1378,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): entry.pystring_cname, entry.cname, entry.cname, - isinstance(entry.init, unicode) + entry.type.is_unicode )) code.putln( "{0, 0, 0, 0}") diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py index 76a14ee6432a358b178074b57e317e675a016e10..edc14e98f55b32c9470618a136b977843f1fcddf 100644 --- a/Cython/Compiler/Nodes.py +++ b/Cython/Compiler/Nodes.py @@ -37,7 +37,31 @@ def relative_position(pos): AUTHOR: William Stein """ return (pos[0][absolute_path_length+1:], pos[1]) - + +def embed_position(pos, docstring): + if not Options.embed_pos_in_docstring: + return docstring + pos_line = u'File: %s (starting at line %s)' % relative_position(self.pos) + if docstring is None: + # unicode string + return ExprNodes.EncodedString(pos_line) + + # make sure we can encode the filename in the docstring encoding + # otherwise make the docstring a unicode string + encoding = docstring.encoding + if encoding is not None: + try: + encoded_bytes = pos_line.encode(encoding) + except UnicodeEncodeError: + encoding = None + + if not docstring: + # reuse the string encoding of the original docstring + doc = ExprNodes.EncodedString(pos_line) + else: + doc = ExprNodes.EncodedString(pos_line + u'\\n' + docstring) + doc.encoding = encoding + return doc class AttributeAccessor: """Used as the result of the Node.get_children_accessors() generator""" @@ -1199,7 +1223,7 @@ class DefNode(FuncDefNode): # args [CArgDeclNode] formal arguments # star_arg PyArgDeclNode or None * argument # starstar_arg PyArgDeclNode or None ** argument - # doc string or None + # doc EncodedString or None # body StatListNode # # The following subnode is constructed internally @@ -1357,17 +1381,12 @@ class DefNode(FuncDefNode): Naming.pyfunc_prefix + prefix + name entry.pymethdef_cname = \ Naming.pymethdef_prefix + prefix + name - if not Options.docstrings: - self.entry.doc = None - else: - if Options.embed_pos_in_docstring: - entry.doc = 'File: %s (starting at line %s)'%relative_position(self.pos) - if not self.doc is None: - entry.doc = entry.doc + '\\n' + self.doc - else: - entry.doc = self.doc + if Options.docstrings: + entry.doc = embed_position(self.pos, self.doc) entry.doc_cname = \ Naming.funcdoc_prefix + prefix + name + else: + entry.doc = None def declare_arguments(self, env): for arg in self.args: @@ -1897,7 +1916,7 @@ class OverrideCheckNode(StatNode): class PyClassDefNode(StatNode, BlockNode): # A Python class definition. # - # name string Name of the class + # name EncodedString Name of the class # doc string or None # body StatNode Attribute definition code # entry Symtab.Entry @@ -1919,9 +1938,7 @@ class PyClassDefNode(StatNode, BlockNode): import ExprNodes self.dict = ExprNodes.DictNode(pos, key_value_pairs = []) if self.doc and Options.docstrings: - if Options.embed_pos_in_docstring: - doc = 'File: %s (starting at line %s)'%relative_position(self.pos) - doc = doc + '\\n' + self.doc + doc = embed_position(self.pos, self.doc) doc_node = ExprNodes.StringNode(pos, value = doc) else: doc_node = None @@ -1961,7 +1978,7 @@ class PyClassDefNode(StatNode, BlockNode): self.dict.generate_disposal_code(code) -class CClassDefNode(StatNode): +class CClassDefNode(StatNode, BlockNode): # An extension type definition. # # visibility 'private' or 'public' or 'extern' @@ -2032,13 +2049,9 @@ class CClassDefNode(StatNode): typedef_flag = self.typedef_flag, api = self.api) scope = self.entry.type.scope - + if self.doc and Options.docstrings: - if Options.embed_pos_in_docstring: - scope.doc = 'File: %s (starting at line %s)'%relative_position(self.pos) - scope.doc = scope.doc + '\\n' + self.doc - else: - scope.doc = self.doc + scope.doc = embed_position(self.pos, self.doc) if has_body: self.body.analyse_declarations(scope) @@ -2054,6 +2067,7 @@ class CClassDefNode(StatNode): self.body.analyse_expressions(scope) def generate_function_definitions(self, env, code, transforms): + self.generate_py_string_decls(self.entry.type.scope, code) if self.body: self.body.generate_function_definitions( self.entry.type.scope, code, transforms) @@ -2073,7 +2087,7 @@ class PropertyNode(StatNode): # Definition of a property in an extension type. # # name string - # doc string or None Doc string + # doc EncodedString or None Doc string # body StatListNode child_attrs = ["body"] diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index 93491c9e0a382e00b02c2c22a17e36f85de87de6..e18bac0a1cce66396422d28697c4274f3151fffb 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -2,7 +2,7 @@ # Pyrex Parser # -import os, re +import os, re, codecs from string import join, replace from types import ListType, TupleType from Scanning import PyrexScanner @@ -10,6 +10,7 @@ import Nodes import ExprNodes from ModuleNode import ModuleNode from Errors import error, InternalError +from Cython import Utils def p_ident(s, message = "Expected an identifier"): if s.sy == 'IDENT': @@ -281,8 +282,10 @@ def p_call(s, function): if not arg.is_name: s.error("Expected an identifier before '='", pos = arg.pos) + encoded_name = ExprNodes.EncodedString(arg.name) + encoded_name.encoding = s.source_encoding keyword = ExprNodes.StringNode(arg.pos, - value = arg.name) + value = encoded_name) arg = p_simple_expr(s) keyword_args.append((keyword, arg)) else: @@ -459,7 +462,7 @@ def p_atom(s): value = s.systring[:-1] s.next() return ExprNodes.ImagNode(pos, value = value) - elif sy == 'STRING' or sy == 'BEGIN_STRING': + elif sy == 'BEGIN_STRING': kind, value = p_cat_string_literal(s) if kind == 'c': return ExprNodes.CharNode(pos, value = value) @@ -500,7 +503,12 @@ def p_name(s, name): elif isinstance(value, float): return ExprNodes.FloatNode(pos, value = rep) elif isinstance(value, str): - return ExprNodes.StringNode(pos, value = rep[1:-1]) + sval = ExprNodes.EncodedString(rep[1:-1]) + sval.encoding = value.encoding + return ExprNodes.StringNode(pos, value = sval) + elif isinstance(value, unicode): + sval = ExprNodes.EncodedString(rep[2:-1]) + return ExprNodes.StringNode(pos, value = sval) else: error(pos, "Invalid type for compile-time constant: %s" % value.__class__.__name__) @@ -508,21 +516,25 @@ def p_name(s, name): def p_cat_string_literal(s): # A sequence of one or more adjacent string literals. - # Returns (kind, value) where kind in ('', 'c', 'r') + # Returns (kind, value) where kind in ('', 'c', 'r', 'u') kind, value = p_string_literal(s) if kind != 'c': strings = [value] - while s.sy == 'STRING' or s.sy == 'BEGIN_STRING': + while s.sy == 'BEGIN_STRING': next_kind, next_value = p_string_literal(s) if next_kind == 'c': self.error( "Cannot concatenate char literal with another string or char literal") + elif next_kind == 'u': + kind = 'u' strings.append(next_value) - value = ''.join(strings) + value = ExprNodes.EncodedString( u''.join(strings) ) + if kind != 'u': + value.encoding = s.source_encoding return kind, value def p_opt_string_literal(s): - if s.sy == 'STRING' or s.sy == 'BEGIN_STRING': + if s.sy == 'BEGIN_STRING': return p_string_literal(s) else: return None @@ -530,10 +542,6 @@ def p_opt_string_literal(s): def p_string_literal(s): # A single string or char literal. # Returns (kind, value) where kind in ('', 'c', 'r', 'u') - if s.sy == 'STRING': - value = unquote(s.systring) - s.next() - return value # s.sy == 'BEGIN_STRING' pos = s.position() #is_raw = s.systring[:1].lower() == "r" @@ -549,8 +557,6 @@ def p_string_literal(s): systr = s.systring if len(systr) == 1 and systr in "'\"\n": chars.append('\\') - if kind == 'u' and not isinstance(systr, unicode): - systr = systr.decode("UTF-8") chars.append(systr) elif sy == 'ESCAPE': systr = s.systring @@ -572,7 +578,8 @@ def p_string_literal(s): elif c in 'ux': if kind == 'u': try: - chars.append(systr.decode('unicode_escape')) + chars.append( + systr.encode("ASCII").decode('unicode_escape')) except UnicodeDecodeError: s.error("Invalid unicode escape '%s'" % systr, pos = pos) @@ -593,50 +600,12 @@ def p_string_literal(s): "Unexpected token %r:%r in string literal" % (sy, s.systring)) s.next() - value = ''.join(chars) + value = ExprNodes.EncodedString( u''.join(chars) ) + if kind != 'u': + value.encoding = s.source_encoding #print "p_string_literal: value =", repr(value) ### return kind, value -def unquote(s): - is_raw = 0 - if s[:1].lower() == "r": - is_raw = 1 - s = s[1:] - q = s[:3] - if q == '"""' or q == "'''": - s = s[3:-3] - else: - s = s[1:-1] - if is_raw: - s = s.replace('\\', '\\\\') - s = s.replace('\n', '\\\n') - else: - # Split into double quotes, newlines, escape sequences - # and spans of regular chars - l1 = re.split(r'((?:\\[0-7]{1,3})|(?:\\x[0-9A-Fa-f]{2})|(?:\\.)|(?:\\\n)|(?:\n)|")', s) - #print "unquote: l1 =", l1 ### - l2 = [] - for item in l1: - if item == '"' or item == '\n': - l2.append('\\' + item) - elif item == '\\\n': - pass - elif item[:1] == '\\': - if len(item) == 2: - if item[1] in '"\\abfnrtv': - l2.append(item) - else: - l2.append(item[1]) - elif item[1:2] == 'x': - l2.append('\\x0' + item[2:]) - else: - # octal escape - l2.append(item) - else: - l2.append(item) - s = "".join(l2) - return s - # list_display ::= "[" [listmaker] "]" # listmaker ::= expression ( list_for | ( "," expression )* [","] ) # list_iter ::= list_for | list_if @@ -946,6 +915,8 @@ def p_import_statement(s): ExprNodes.StringNode(pos, value = "*")]) else: name_list = None + dotted_name = ExprNodes.EncodedString(dotted_name) + dotted_name.encoding = s.source_encoding stat = Nodes.SingleAssignmentNode(pos, lhs = ExprNodes.NameNode(pos, name = as_name or target_name), @@ -984,14 +955,18 @@ def p_from_import_statement(s): imported_name_strings = [] items = [] for (name_pos, name, as_name) in imported_names: + encoded_name = ExprNodes.EncodedString(name) + encoded_name.encoding = s.source_encoding imported_name_strings.append( - ExprNodes.StringNode(name_pos, value = name)) + ExprNodes.StringNode(name_pos, value = encoded_name)) items.append( (name, ExprNodes.NameNode(name_pos, name = as_name or name))) import_list = ExprNodes.ListNode( imported_names[0][0], args = imported_name_strings) + dotted_name = ExprNodes.EncodedString(dotted_name) + dotted_name.encoding = s.source_encoding return Nodes.FromImportStatNode(pos, module = ExprNodes.ImportNode(dotted_name_pos, module_name = ExprNodes.StringNode(dotted_name_pos, @@ -1204,8 +1179,8 @@ def p_include_statement(s, level): if s.compile_time_eval: include_file_path = s.context.find_include_file(include_file_name, pos) if include_file_path: - f = open(include_file_path, "rU") - s2 = PyrexScanner(f, include_file_path, s) + f = Utils.open_source_file(include_file_path, mode="rU") + s2 = PyrexScanner(f, include_file_path, s, source_encoding=f.encoding) try: tree = p_statement_list(s2, level) finally: @@ -1996,7 +1971,8 @@ def p_class_statement(s): # s.sy == 'class' pos = s.position() s.next() - class_name = p_ident(s) + class_name = ExprNodes.EncodedString( p_ident(s) ) + class_name.encoding = s.source_encoding if s.sy == '(': s.next() base_list = p_simple_expr_list(s) @@ -2113,7 +2089,7 @@ def p_property_decl(s): return Nodes.PropertyNode(pos, name = name, doc = doc, body = body) def p_doc_string(s): - if s.sy == 'STRING' or s.sy == 'BEGIN_STRING': + if s.sy == 'BEGIN_STRING': _, result = p_cat_string_literal(s) if s.sy != 'EOF': s.expect_newline("Syntax error in doc string") diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py index bf3e6f9f8a3a9d4d26faf65a90687f980112a95f..d7427d4cbfc603b2be780f809ee46993cef2c5c8 100644 --- a/Cython/Compiler/PyrexTypes.py +++ b/Cython/Compiler/PyrexTypes.py @@ -37,6 +37,7 @@ class PyrexType(BaseType): # is_enum boolean Is a C enum type # is_typedef boolean Is a typedef type # is_string boolean Is a C char * type + # is_unicode boolean Is a UTF-8 encoded C char * type # is_returncode boolean Is used only to signal exceptions # is_error boolean Is the dummy error type # has_attributes boolean Has C dot-selectable attributes @@ -83,6 +84,7 @@ class PyrexType(BaseType): is_enum = 0 is_typedef = 0 is_string = 0 + is_unicode = 0 is_returncode = 0 is_error = 0 has_attributes = 0 @@ -875,19 +877,49 @@ class CEnumType(CType): return self.base_declaration_code(public_decl(base, dll_linkage), entity_code) +def _escape_byte_string(s): + try: + s.decode("ASCII") + return s + except UnicodeDecodeError: + pass + l = [] + append = l.append + for c in s: + o = ord(c) + if o >= 128: + append('\\x%X' % o) + else: + append(c) + return ''.join(l) + class CStringType: # Mixin class for C string types. is_string = 1 + is_unicode = 0 to_py_function = "PyString_FromString" from_py_function = "PyString_AsString" exception_value = "NULL" def literal_code(self, value): - if isinstance(value, unicode): - value = value.encode("UTF-8") - return '"%s"' % value + assert isinstance(value, str) + return '"%s"' % _escape_byte_string(value) + + +class CUTF8StringType: + # Mixin class for C unicode types. + + is_string = 1 + is_unicode = 1 + + to_py_function = "PyUnicode_DecodeUTF8" + exception_value = "NULL" + + def literal_code(self, value): + assert isinstance(value, str) + return '"%s"' % _escape_byte_string(value) class CCharArrayType(CStringType, CArrayType): @@ -898,6 +930,16 @@ class CCharArrayType(CStringType, CArrayType): def __init__(self, size): CArrayType.__init__(self, c_char_type, size) + + +class CUTF8CharArrayType(CUTF8StringType, CArrayType): + # C 'char []' type. + + parsetuple_format = "s" + pymemberdef_typecode = "T_STRING_INPLACE" + + def __init__(self, size): + CArrayType.__init__(self, c_char_type, size) class CCharPtrType(CStringType, CPtrType): @@ -910,6 +952,16 @@ class CCharPtrType(CStringType, CPtrType): CPtrType.__init__(self, c_char_type) +class CUTF8CharPtrType(CUTF8StringType, CPtrType): + # C 'char *' type, encoded in UTF-8. + + parsetuple_format = "s" + pymemberdef_typecode = "T_STRING" + + def __init__(self): + CPtrType.__init__(self, c_char_type) + + class ErrorType(PyrexType): # Used to prevent propagation of error messages. @@ -974,7 +1026,9 @@ c_longdouble_type = CFloatType(8) c_null_ptr_type = CNullPtrType(c_void_type) c_char_array_type = CCharArrayType(None) +c_utf8_char_array_type = CUTF8CharArrayType(None) c_char_ptr_type = CCharPtrType() +c_utf8_char_ptr_type = CUTF8CharPtrType() c_char_ptr_ptr_type = CPtrType(c_char_ptr_type) c_int_ptr_type = CPtrType(c_int_type) diff --git a/Cython/Compiler/Scanning.py b/Cython/Compiler/Scanning.py index e48c8dcea590480bf5c5778ed99f473860579903..e91e343aaadd6c5e1e7a9773e58c900ef603083d 100644 --- a/Cython/Compiler/Scanning.py +++ b/Cython/Compiler/Scanning.py @@ -212,7 +212,7 @@ class PyrexScanner(Scanner): resword_dict = build_resword_dict() def __init__(self, file, filename, parent_scanner = None, - type_names = None, context = None): + type_names = None, context = None, source_encoding=None): Scanner.__init__(self, get_lexicon(), file, filename) if parent_scanner: self.context = parent_scanner.context @@ -226,6 +226,7 @@ class PyrexScanner(Scanner): self.compile_time_env = initial_compile_time_env() self.compile_time_eval = 1 self.compile_time_expr = 0 + self.source_encoding = source_encoding self.trace = trace_scanner self.indentation_stack = [0] self.indentation_char = None diff --git a/Cython/Compiler/Symtab.py b/Cython/Compiler/Symtab.py index 0a6e429364a46203fe604e6d0fd357c4e571f098..f4ae088ce9b5f2d2f7e67079c514f4aaf95ecdbc 100644 --- a/Cython/Compiler/Symtab.py +++ b/Cython/Compiler/Symtab.py @@ -3,8 +3,6 @@ # import re -import bisect - from Errors import warning, error, InternalError import Options import Naming @@ -434,15 +432,21 @@ class Scope: if not entry: entry = self.declare_var(name, py_object_type, None) return entry - + def add_string_const(self, value): # Add an entry for a string constant. cname = self.new_const_cname() - entry = Entry("", cname, c_char_array_type, init = value) + if value.is_unicode: + c_type = c_utf8_char_array_type + value = value.utf8encode() + else: + c_type = c_char_array_type + value = value.byteencode() + entry = Entry("", cname, c_type, init = value) entry.used = 1 self.const_entries.append(entry) return entry - + def get_string_const(self, value): # Get entry for string constant. Returns an existing # one if possible, otherwise creates a new one. @@ -452,7 +456,7 @@ class Scope: entry = self.add_string_const(value) genv.string_to_entry[value] = entry return entry - + def add_py_string(self, entry): # If not already done, allocate a C name for a Python version of # a string literal, and add it to the list of Python strings to @@ -460,7 +464,7 @@ class Scope: # Python identifier, it will be interned. if not entry.pystring_cname: value = entry.init - if identifier_pattern.match(value) and isinstance(value, str): + if not entry.type.is_unicode and identifier_pattern.match(value): entry.pystring_cname = self.intern(value) entry.is_interned = 1 else: diff --git a/Cython/Utils.py b/Cython/Utils.py index 1b4b07d3ae04b8e6e6824a54d4df2b2778be063b..20ea31e64f29385b027fa240e3e862b486c1d3ff 100644 --- a/Cython/Utils.py +++ b/Cython/Utils.py @@ -3,7 +3,7 @@ # anywhere else in particular # -import os, sys +import os, sys, re, codecs def replace_suffix(path, newsuf): base, _ = os.path.splitext(path) @@ -32,3 +32,25 @@ def castrate_file(path, st): f.close() if st: os.utime(path, (st.st_atime, st.st_mtime)) + +# support for source file encoding detection and unicode decoding + +_match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search + +def detect_file_encoding(source_filename): + # PEPs 263 and 3120 + f = codecs.open(source_filename, "rU", encoding="UTF-8") + try: + for line_no, line in enumerate(f): + encoding = _match_file_encoding(line) + if encoding: + return encoding.group(1) + if line_no == 1: + break + finally: + f.close() + return "UTF-8" + +def open_source_file(source_filename, mode="rU"): + encoding = detect_file_encoding(source_filename) + return codecs.open(source_filename, mode=mode, encoding=encoding) diff --git a/Demos/Setup.py b/Demos/Setup.py index 18e05820ba4c7686d7e3f74fcf0f12734596e0f2..75b05af0fce37948d547cb1fb88ae7303ba7712c 100644 --- a/Demos/Setup.py +++ b/Demos/Setup.py @@ -1,31 +1,20 @@ +import glob + from distutils.core import setup from distutils.extension import Extension from Cython.Distutils import build_ext -setup( - name = 'Demos', - ext_modules=[ +ext_modules=[ Extension("primes", ["primes.pyx"]), Extension("spam", ["spam.pyx"]), -# Extension("numeric_demo", ["numeric_demo.pyx"]), - Extension("test", ["test.pyx"]), - Extension("func_pointers", ["func_pointers.pyx"]), -# Extension("inplace", ["inplace.pyx"]), -# Extension("withGIL", ["withGIL.pyx"]), - Extension("class_members", ["class_members.pyx"]), -# Extension("inherit_bug", ["inherit_bug.pyx"]), - Extension("override", ["override.pyx"]), - Extension("cond", ["cond.pyx"]), -# Extension("submodule.test", ["submodule/test.pyx"]), - Extension("errors", ["errors.pyx"]), - Extension("cpdef", ["cpdef.pyx"]), - Extension("range", ["range.pyx"]), - Extension("early_temps", ["early_temps.pyx"]), - Extension("ints", ["ints.pyx"]), - Extension("clear", ["clear.pyx"]), - Extension("detect_override", ["detect_override.pyx"]), - Extension("fixes", ["fixes.pyx"]), - ], +] + +for file in glob.glob("*.pyx"): + if file != "numeric_demo.pyx": + ext_modules.append(Extension(file[:-4], [file])) + +setup( + name = 'Demos', cmdclass = {'build_ext': build_ext}, -# include_dirs = "/System/Library/Frameworks/Python.framework/Versions/2.3/include/python2.3/" + ext_modules = ext_modules, ) diff --git a/Includes/python.pxi b/Includes/python.pxd similarity index 100% rename from Includes/python.pxi rename to Includes/python.pxd diff --git a/Includes/python2.5.pxi b/Includes/python2.5.pxd similarity index 100% rename from Includes/python2.5.pxi rename to Includes/python2.5.pxd diff --git a/Includes/python_bool.pxi b/Includes/python_bool.pxd similarity index 100% rename from Includes/python_bool.pxi rename to Includes/python_bool.pxd diff --git a/Includes/python_complex.pxi b/Includes/python_complex.pxd similarity index 100% rename from Includes/python_complex.pxi rename to Includes/python_complex.pxd diff --git a/Includes/python_dict.pxi b/Includes/python_dict.pxd similarity index 100% rename from Includes/python_dict.pxi rename to Includes/python_dict.pxd diff --git a/Includes/python_exc.pxi b/Includes/python_exc.pxd similarity index 100% rename from Includes/python_exc.pxi rename to Includes/python_exc.pxd diff --git a/Includes/python_float.pxi b/Includes/python_float.pxd similarity index 100% rename from Includes/python_float.pxi rename to Includes/python_float.pxd diff --git a/Includes/python_function.pxi b/Includes/python_function.pxd similarity index 100% rename from Includes/python_function.pxi rename to Includes/python_function.pxd diff --git a/Includes/python_instance.pxi b/Includes/python_instance.pxd similarity index 100% rename from Includes/python_instance.pxi rename to Includes/python_instance.pxd diff --git a/Includes/python_int.pxi b/Includes/python_int.pxd similarity index 100% rename from Includes/python_int.pxi rename to Includes/python_int.pxd diff --git a/Includes/python_iterator.pxi b/Includes/python_iterator.pxd similarity index 100% rename from Includes/python_iterator.pxi rename to Includes/python_iterator.pxd diff --git a/Includes/python_list.pxi b/Includes/python_list.pxd similarity index 100% rename from Includes/python_list.pxi rename to Includes/python_list.pxd diff --git a/Includes/python_long.pxi b/Includes/python_long.pxd similarity index 100% rename from Includes/python_long.pxi rename to Includes/python_long.pxd diff --git a/Includes/python_mapping.pxi b/Includes/python_mapping.pxd similarity index 100% rename from Includes/python_mapping.pxi rename to Includes/python_mapping.pxd diff --git a/Includes/python_mem.pxi b/Includes/python_mem.pxd similarity index 100% rename from Includes/python_mem.pxi rename to Includes/python_mem.pxd diff --git a/Includes/python_method.pxi b/Includes/python_method.pxd similarity index 100% rename from Includes/python_method.pxi rename to Includes/python_method.pxd diff --git a/Includes/python_module.pxi b/Includes/python_module.pxd similarity index 100% rename from Includes/python_module.pxi rename to Includes/python_module.pxd diff --git a/Includes/python_number.pxi b/Includes/python_number.pxd similarity index 100% rename from Includes/python_number.pxi rename to Includes/python_number.pxd diff --git a/Includes/python_object.pxi b/Includes/python_object.pxd similarity index 100% rename from Includes/python_object.pxi rename to Includes/python_object.pxd diff --git a/Includes/python_parse.pxi b/Includes/python_parse.pxd similarity index 100% rename from Includes/python_parse.pxi rename to Includes/python_parse.pxd diff --git a/Includes/python_ref.pxi b/Includes/python_ref.pxd similarity index 100% rename from Includes/python_ref.pxi rename to Includes/python_ref.pxd diff --git a/Includes/python_sequence.pxi b/Includes/python_sequence.pxd similarity index 100% rename from Includes/python_sequence.pxi rename to Includes/python_sequence.pxd diff --git a/Includes/python_set.pxi b/Includes/python_set.pxd similarity index 100% rename from Includes/python_set.pxi rename to Includes/python_set.pxd diff --git a/Includes/python_string.pxi b/Includes/python_string.pxd similarity index 100% rename from Includes/python_string.pxi rename to Includes/python_string.pxd diff --git a/Includes/python_tuple.pxi b/Includes/python_tuple.pxd similarity index 100% rename from Includes/python_tuple.pxi rename to Includes/python_tuple.pxd diff --git a/Includes/python_type.pxi b/Includes/python_type.pxd similarity index 100% rename from Includes/python_type.pxi rename to Includes/python_type.pxd diff --git a/Includes/stdio.pxi b/Includes/stdio.pxd similarity index 100% rename from Includes/stdio.pxi rename to Includes/stdio.pxd diff --git a/Includes/stdlib.pxi b/Includes/stdlib.pxd similarity index 100% rename from Includes/stdlib.pxi rename to Includes/stdlib.pxd diff --git a/runtests.py b/runtests.py index d5bf75dce3d43d22b9d28181bc0544c3d5f36b21..1c3441bbf2f6bb5effbe59d1c0e5be92e99dd4a1 100644 --- a/runtests.py +++ b/runtests.py @@ -2,20 +2,16 @@ import os, sys, unittest, doctest -#from Cython.Distutils.build_ext import build_ext -#from Cython.Distutils.extension import Extension - -from distutils.extension import Extension +from Cython.Distutils.extension import Extension from Cython.Distutils import build_ext - from distutils.dist import Distribution distutils_distro = Distribution() TEST_DIRS = ['compile', 'run'] TEST_RUN_DIRS = ['run'] -INCLUDE_DIRS = os.getenv('INCLUDE', '').split(os.pathsep) +INCLUDE_DIRS = [ d for d in os.getenv('INCLUDE', '').split(os.pathsep) if d ] CFLAGS = os.getenv('CFLAGS', '').split() class TestBuilder(object): diff --git a/tests/compile/crunchytype.h b/tests/compile/crunchytype.h new file mode 100644 index 0000000000000000000000000000000000000000..6ea0e37c0f0411ab354e7eb0c236ea9d12e2548a --- /dev/null +++ b/tests/compile/crunchytype.h @@ -0,0 +1,5 @@ + +struct CrunchyType { + int number; + PyObject* string; +}; diff --git a/tests/compile/crunchytype.pxd b/tests/compile/crunchytype.pxd new file mode 100644 index 0000000000000000000000000000000000000000..c03e38dad57657c2ec2118bc7bc13901b46b814b --- /dev/null +++ b/tests/compile/crunchytype.pxd @@ -0,0 +1,4 @@ +cdef extern from "crunchytype.h": + cdef class crunchytype.Crunchy [ object CrunchyType ]: + cdef int number + cdef object string diff --git a/tests/compile/extimportedsubtype.pyx b/tests/compile/extimportedsubtype.pyx new file mode 100644 index 0000000000000000000000000000000000000000..dd081e0c7f9bc86f343f37f092c19c693b353f50 --- /dev/null +++ b/tests/compile/extimportedsubtype.pyx @@ -0,0 +1,7 @@ +from crunchytype cimport Crunchy + +cdef class Sub2(Crunchy): + cdef char character + +cdef class Sub1(Sub2): + cdef char character diff --git a/tests/run/addop.pyx b/tests/run/addop.pyx index 0bf19e5ccfc38d7be39e7ccab7b644fb87a5563e..739c214d6665e27b525826f8f2f282efdc85eef5 100644 --- a/tests/run/addop.pyx +++ b/tests/run/addop.pyx @@ -1,10 +1,11 @@ __doc__ = """ - >>> + >>> f() + (30, 22) """ def f(): cdef int int1, int2, int3 - cdef char *ptr1, *ptr2, *ptr3 + cdef char *ptr1, *ptr2 = "test", *ptr3 = "toast" int2 = 10 int3 = 20 obj1 = 1 diff --git a/tests/run/cintop.pyx b/tests/run/cintop.pyx index 49d36ee67dcd92c27d71e64a61903e84ad557e88..4001346934b9eea4c2f31d5f73fdc5eb34f5c7cd 100644 --- a/tests/run/cintop.pyx +++ b/tests/run/cintop.pyx @@ -1,6 +1,20 @@ __doc__ = """ + >>> int2 = 42 + >>> int3 = 7 + >>> char1 = ord('C') + + >>> int1 = int2 | int3 + >>> int1 |= int2 ^ int3 + >>> int1 ^= int2 & int3 + >>> int1 ^= int2 << int3 + >>> int1 ^= int2 >> int3 + >>> int1 ^= int2 << int3 | int2 >> int3 + >>> long1 = char1 | int1 + >>> print (int1, long1) == f() + True + >>> f() - (5376, 67) + (45, 111) """ def f(): @@ -12,10 +26,10 @@ def f(): char1 = c'C' int1 = int2 | int3 - int1 = int2 ^ int3 - int1 = int2 & int3 - int1 = int2 << int3 - int1 = int2 >> int3 - int1 = int2 << int3 | int2 >> int3 - long1 = char1 | long2 + int1 |= int2 ^ int3 + int1 ^= int2 & int3 + int1 ^= int2 << int3 + int1 ^= int2 >> int3 + int1 ^= int2 << int3 | int2 >> int3 + long1 = char1 | int1 return int1, long1 diff --git a/tests/run/ct_DEF.pyx b/tests/run/ct_DEF.pyx index bf191a11d2ab6abdcfb90f0a56ce7070152ef460..d85cfb0668a5fb637f6717a7a1309cdd92cfffc3 100644 --- a/tests/run/ct_DEF.pyx +++ b/tests/run/ct_DEF.pyx @@ -1,8 +1,12 @@ __doc__ = """ >>> c() 120 - >>> i() - 42 + >>> i1() == 42 + True + >>> i2() == 0x42 + True + >>> i3() == 042 + True >>> l() 666 >>> f() @@ -23,7 +27,9 @@ DEF TUPLE = (1, 2, "buckle my shoe") DEF TRUE_FALSE = (True, False) DEF CHAR = c'x' -DEF INT = 42 +DEF INT1 = 42 +DEF INT2 = 0x42 +DEF INT3 = 042 DEF LONG = 666L DEF FLOAT = 12.5 DEF STR = "spam" @@ -37,9 +43,19 @@ def c(): c = CHAR return c -def i(): +def i1(): + cdef int i + i = INT1 + return i + +def i2(): + cdef int i + i = INT2 + return i + +def i3(): cdef int i - i = INT + i = INT3 return i def l(): diff --git a/tests/run/include.pyx b/tests/run/include.pyx new file mode 100644 index 0000000000000000000000000000000000000000..c350a74bf2e8e81122cdc32fba9164037f3047eb --- /dev/null +++ b/tests/run/include.pyx @@ -0,0 +1,8 @@ +__doc__ = """ + >>> D + 2 +""" + +D = 1 + +include "testinclude.pxi" diff --git a/tests/run/pyextattrref.pyx b/tests/run/pyextattrref.pyx index 3f9261a90ec8ef3fd948c06828bc050e6db1f932..7ef36467319370eaf9d3e43e081e274fd6fabc21 100644 --- a/tests/run/pyextattrref.pyx +++ b/tests/run/pyextattrref.pyx @@ -1,12 +1,22 @@ __doc__ = """ - >>> + >>> s = Spam(Eggs("ham")) + >>> test(s) + 'ham' """ cdef class Eggs: cdef object ham + def __init__(self, ham): + self.ham = ham cdef class Spam: cdef Eggs eggs + def __init__(self, eggs): + self.eggs = eggs -cdef void tomato(Spam s): +cdef object tomato(Spam s): food = s.eggs.ham + return food + +def test(Spam s): + return tomato(s) diff --git a/tests/run/r_docstrings.pyx b/tests/run/r_docstrings.pyx index 22999967ff96f3c9665c9ea7d9c807bd89c332a9..cfb6f1e02e2d770306baf58798252556aefecd04 100644 --- a/tests/run/r_docstrings.pyx +++ b/tests/run/r_docstrings.pyx @@ -1,10 +1,39 @@ __doc__ = """ - >>> print f.__doc__ - This is a function docstring. - >>> print C.__doc__ - This is a class docstring. - >>> print T.__doc__ - This is an extension type docstring. + >>> f.__doc__ + 'This is a function docstring.' + + >>> C.__doc__ + 'This is a class docstring.' + >>> CS.__doc__ + 'This is a subclass docstring.' + >>> print CSS.__doc__ + None + + >>> T.__doc__ + 'This is an extension type docstring.' + >>> TS.__doc__ + 'This is an extension subtype docstring.' + >>> TSS.__doc__ + +Compare with standard Python: + + >>> def f(): + ... 'This is a function docstring.' + >>> f.__doc__ + 'This is a function docstring.' + + >>> class C: + ... 'This is a class docstring.' + >>> class CS(C): + ... 'This is a subclass docstring.' + >>> class CSS(CS): + ... pass + + >>> C.__doc__ + 'This is a class docstring.' + >>> CS.__doc__ + 'This is a subclass docstring.' + >>> CSS.__doc__ """ def f(): @@ -13,6 +42,17 @@ def f(): class C: "This is a class docstring." +class CS(C): + "This is a subclass docstring." + +class CSS(CS): + pass + cdef class T: "This is an extension type docstring." +cdef class TS(T): + "This is an extension subtype docstring." + +cdef class TSS(TS): + pass diff --git a/tests/run/specialfloat.pyx b/tests/run/specialfloat.pyx new file mode 100644 index 0000000000000000000000000000000000000000..5a2524d0e6935cfbe8b5b93b582f1f4c269512e5 --- /dev/null +++ b/tests/run/specialfloat.pyx @@ -0,0 +1,107 @@ +__doc__ = """ + >>> f() + 12.5 + + >>> nan1() + nan + >>> nan2() + nan + >>> nan3() + nan + >>> float_nan + nan + + >>> infp1() + inf + >>> infp1() == float('inf') + True + >>> infp2() + inf + >>> infp2() == float('inf') + True + >>> infp3() + inf + >>> infp3() == float('inf') + True + >>> float_infp + inf + >>> float_infp == float('inf') + True + + >>> infn1() + -inf + >>> infn1() == float('-inf') + True + >>> infn2() + -inf + >>> infn2() == float('-inf') + True + >>> infn3() + -inf + >>> infn3() == float('-inf') + True + >>> float_infn + -inf + >>> float_infn == float('-inf') + True +""" + +DEF FLOAT = 12.5 +DEF FLOAT_NAN = float('nan') +DEF FLOAT_INFP = float('+inf') +DEF FLOAT_INFN = float('-inf') + +float_nan = FLOAT_NAN +float_infp = FLOAT_INFP +float_infn = FLOAT_INFN + +def f(): + cdef float f + f = FLOAT + return f + +def nan1(): + cdef double f + f = FLOAT_NAN + return f + +def nan2(): + cdef double f + f = float('nan') + return f + +def nan3(): + cdef float f + f = FLOAT_NAN + return f + +def infp1(): + cdef double f + f = FLOAT_INFP + return f + +def infp2(): + cdef double f + f = float('+inf') + return f + +def infp3(): + cdef float f + f = FLOAT_INFP + return f + +def infn1(): + cdef double f + f = FLOAT_INFN + return f + +def infn2(): + cdef double f + f = float('-inf') + return f + +def infn3(): + cdef float f + f = FLOAT_INFN + return f + diff --git a/tests/run/testinclude.pxi b/tests/run/testinclude.pxi new file mode 100644 index 0000000000000000000000000000000000000000..9870e682cd6d71b0789bc800ea04ec02e2621d5d --- /dev/null +++ b/tests/run/testinclude.pxi @@ -0,0 +1 @@ +D = 2 diff --git a/tests/run/unicodeliterals.pyx b/tests/run/unicodeliterals.pyx index d0c4db8f69fcab242d279a3ca9ddce9156dae682..436f20aedebaeb8176d65da804a8691c7b948d4f 100644 --- a/tests/run/unicodeliterals.pyx +++ b/tests/run/unicodeliterals.pyx @@ -49,13 +49,17 @@ __doc__ = r""" True >>> d == u'üÖä' True - >>> e == u'\x03\x67\xf8\uf8d2Søk ik' + >>> e == u'\x03\x67\xf8\uf8d2Søk ik' # unescaped by Cython True - >>> f == u'\xf8' + >>> e == u'\\x03\\x67\\xf8\\uf8d2Søk ik' # unescaped by Python + True + >>> f == u'\xf8' # unescaped by Cython + True + >>> f == u'\\xf8' # unescaped by Python True >>> add == u'Søk ik' + u'üÖä' + 'abc' True - >>> null == u'\\x00' # doctest needs a double slash here + >>> null == u'\\x00' # unescaped by Python (required by doctest) True """