Commit 2bde7094 authored by Robert Bradshaw's avatar Robert Bradshaw

trivial merge

parents e918de3e bf579cac
......@@ -129,6 +129,9 @@ def parse_command_line(args):
arg = pop_arg()
if arg.endswith(".pyx"):
sources.append(arg)
elif arg.endswith(".py"):
# maybe do some other stuff, but this should work for now
sources.append(arg)
elif arg.endswith(".o"):
options.objects.append(arg)
else:
......
......@@ -2,9 +2,10 @@
# Pyrex - Code output module
#
import codecs
import Naming
import Options
from Cython.Utils import open_new_file
from Cython.Utils import open_new_file, open_source_file
from PyrexTypes import py_object_type, typecast
from TypeSlots import method_coexist
......@@ -85,23 +86,24 @@ class CCodeWriter:
def indent(self):
self.f.write(" " * self.level)
def get_py_version_hex(self, pyversion):
return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
def file_contents(self, file):
try:
return self.input_file_contents[file]
except KeyError:
F = [line.replace('*/', '*[inserted by cython to avoid comment closer]/')
for line in open(file).readlines()]
F = [line.encode('ASCII', 'replace').replace(
'*/', '*[inserted by cython to avoid comment closer]/')
for line in open_source_file(file)]
self.input_file_contents[file] = F
return F
def get_py_version_hex(self, pyversion):
return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
def mark_pos(self, pos):
if pos is None:
return
file, line, col = pos
contents = self.file_contents(file)
filename, line, col = pos
contents = self.file_contents(filename)
context = ''
for i in range(max(0,line-3), min(line+2, len(contents))):
......@@ -110,7 +112,7 @@ class CCodeWriter:
s = s.rstrip() + ' # <<<<<<<<<<<<<< ' + '\n'
context += " * " + s
marker = '"%s":%s\n%s' % (file, line, context)
marker = '"%s":%d\n%s' % (filename.encode('ASCII', 'replace'), line, context)
if self.last_marker != marker:
self.marker = marker
......
import bisect
import bisect, sys
# This module keeps track of arbitrary "states" at any point of the code.
# A state is considered known if every path to the given point agrees on
......@@ -13,6 +13,8 @@ import bisect
# redesigned. It doesn't take return, raise, continue, or break into
# account.
_END_POS = ((unichr(sys.maxunicode)*10),())
class ControlFlow:
def __init__(self, start_pos, incoming, parent):
......@@ -22,7 +24,7 @@ class ControlFlow:
parent = incoming.parent
self.parent = parent
self.tip = {}
self.end_pos = ((),)
self.end_pos = _END_POS
def start_branch(self, pos):
self.end_pos = pos
......@@ -40,10 +42,10 @@ class ControlFlow:
self.parent.end_pos = pos
return LinearControlFlow(pos, self.parent)
def get_state(self, item, pos=((),())):
def get_state(self, item, pos=_END_POS):
return self.get_pos_state(item, pos)[1]
def get_pos_state(self, item, pos=((),())):
def get_pos_state(self, item, pos=_END_POS):
# do some caching
if pos > self.end_pos:
try:
......@@ -61,13 +63,13 @@ class LinearControlFlow(ControlFlow):
self.events = {}
def set_state(self, pos, item, state):
if self.tip.has_key(item):
if item in self.tip:
del self.tip[item]
if pos < self.start_pos:
if self.incoming is not None:
self.incoming.set_state(pos, item, state)
else:
if self.events.has_key(item):
if item in self.events:
event_list = self.events[item]
else:
event_list = []
......@@ -77,7 +79,7 @@ class LinearControlFlow(ControlFlow):
def _get_pos_state(self, item, pos):
if pos > self.start_pos:
if self.events.has_key(item):
if item in self.events:
event_list = self.events[item]
for event in event_list[::-1]:
if event[0] < pos:
......@@ -116,7 +118,7 @@ class BranchingControlFlow(ControlFlow):
def set_state(self, pos, item, state):
if self.tip.has_key(item):
if item in self.tip:
del self.tip[item]
if pos < self.start_pos:
......@@ -157,5 +159,3 @@ class BranchingControlFlow(ControlFlow):
if self.incoming is not limit and self.incoming is not None:
s = "%s\n%s" % (self.incoming.to_string(indent, limit=limit), s)
return s
\ No newline at end of file
......@@ -18,6 +18,29 @@ from Cython.Debugging import print_call_chain
from DebugFlags import debug_disposal_code, debug_temp_alloc, \
debug_coercion
class EncodedString(unicode):
# unicode string subclass to keep track of the original encoding.
# 'encoding' is None for unicode strings and the source encoding
# otherwise
encoding = None
def byteencode(self):
assert self.encoding is not None
return self.encode(self.encoding)
def utf8encode(self):
assert self.encoding is None
return self.encode("UTF-8")
def is_unicode(self):
return self.encoding is None
is_unicode = property(is_unicode)
# def __eq__(self, other):
# return unicode.__eq__(self, other) and \
# getattr(other, 'encoding', '') == self.encoding
class ExprNode(Node):
# subexprs [string] Class var holding names of subexpr node attrs
# type PyrexType Type of the result
......@@ -669,7 +692,7 @@ class IntNode(ConstNode):
return str(self.value)
def compile_time_value(self, denv):
return int(self.value)
return int(self.value, 0)
class FloatNode(ConstNode):
......@@ -678,6 +701,17 @@ class FloatNode(ConstNode):
def compile_time_value(self, denv):
return float(self.value)
def calculate_result_code(self):
strval = str(self.value)
if strval == 'nan':
return "(Py_HUGE_VAL * 0)"
elif strval == 'inf':
return "Py_HUGE_VAL"
elif strval == '-inf':
return "(-Py_HUGE_VAL)"
else:
return strval
class StringNode(ConstNode):
# entry Symtab.Entry
......@@ -685,15 +719,16 @@ class StringNode(ConstNode):
type = PyrexTypes.c_char_ptr_type
def compile_time_value(self, denv):
return eval('"%s"' % self.value)
return self.value
def analyse_types(self, env):
self.entry = env.add_string_const(self.value)
def coerce_to(self, dst_type, env):
if dst_type.is_int:
if not self.type.is_pyobject and len(self.value) == 1:
return CharNode(self.pos, value=self.value)
if not self.type.is_pyobject and len(self.entry.init) == 1:
# we use the *encoded* value here
return CharNode(self.pos, value=self.entry.init)
else:
error(self.pos, "Only coerce single-character ascii strings can be used as ints.")
return self
......@@ -776,7 +811,7 @@ class NameNode(AtomicExprNode):
try:
return denv.lookup(self.name)
except KeyError:
error(self.pos, "Compile-time name '%s' not defined", self.name)
error(self.pos, "Compile-time name '%s' not defined" % self.name)
def coerce_to(self, dst_type, env):
# If coercing to a generic pyobject and this is a builtin
......
......@@ -2,12 +2,11 @@
# Cython Top Level
#
import os, sys, re
import os, sys, re, codecs
if sys.version_info[:2] < (2, 2):
print >>sys.stderr, "Sorry, Cython requires Python 2.2 or later"
sys.exit(1)
import os
from time import time
import Version
from Scanning import PyrexScanner
......@@ -140,8 +139,17 @@ class Context:
def parse(self, source_filename, type_names, pxd, full_module_name):
# Parse the given source file and return a parse tree.
f = open(source_filename, "rU")
s = PyrexScanner(f, source_filename,
f = Utils.open_source_file(source_filename, "rU")
if isinstance(source_filename, unicode):
name = source_filename
else:
filename_encoding = sys.getfilesystemencoding()
if filename_encoding is None:
filename_encoding = getdefaultencoding()
name = source_filename.decode(filename_encoding)
s = PyrexScanner(f, name, source_encoding = f.encoding,
type_names = type_names, context = self)
try:
tree = Parsing.p_module(s, pxd, full_module_name)
......@@ -310,7 +318,6 @@ def compile(source, options = None, c_compile = 0, c_link = 0,
#------------------------------------------------------------------------
def main(command_line = 0):
args = sys.argv[1:]
any_failures = 0
if command_line:
......
......@@ -19,35 +19,6 @@ from Errors import error
from PyrexTypes import py_object_type
from Cython.Utils import open_new_file, replace_suffix
def recurse_vtab_check_inheritance(entry, b, dict):
base = entry
while base is not None:
if base.type.base_type is None or base.type.base_type.vtabstruct_cname is None:
return False
if base.type.base_type.vtabstruct_cname == b.type.vtabstruct_cname:
return True
try:
base = dict[base.type.base_type.vtabstruct_cname]
except KeyError:
return True
return False
def recurse_vtabslot_check_inheritance(entry, b, dict):
base = entry
while base is not None:
if base.type.base_type is None:
return False
if base.type.base_type.objstruct_cname == b.type.objstruct_cname:
return True
try:
base = dict[base.type.base_type.objstruct_cname]
except KeyError:
return True
return False
class ModuleNode(Nodes.Node, Nodes.BlockNode):
# doc string or None
# body StatListNode
......@@ -255,13 +226,14 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
self.generate_method_table(env, code)
self.generate_filename_init_prototype(code)
self.generate_module_init_func(modules[:-1], env, code)
self.generate_module_init2_func(modules[:-1], env, code)
code.mark_pos(None)
self.generate_module_cleanup_func(env, code)
self.generate_filename_table(code)
self.generate_utility_functions(env, code)
self.generate_declarations_for_modules(env, modules, code.h)
for module in modules:
self.generate_declarations_for_module(module, code.h,
definition = module is env)
f = open_new_file(result.c_file)
f.write(code.h.f.getvalue())
......@@ -273,9 +245,6 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
self.annotate(code)
code.save_annotation(result.c_file[:-1] + "pyx") # change?
def find_referenced_modules(self, env, module_list, modules_seen):
if env not in modules_seen:
modules_seen[env] = 1
......@@ -364,101 +333,16 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("0")
code.putln("};")
def generate_vtab_dict(self, module_list):
vtab_dict = {}
for module in module_list:
for entry in module.c_class_entries:
if not entry.in_cinclude:
type = entry.type
scope = type.scope
if type.vtabstruct_cname:
vtab_dict[type.vtabstruct_cname]=entry
return vtab_dict
def generate_vtab_list(self, vtab_dict):
vtab_list = list()
for entry in vtab_dict.itervalues():
vtab_list.append(entry)
for i in range(0,len(vtab_list)):
for j in range(0,len(vtab_list)):
if(recurse_vtab_check_inheritance(vtab_list[j],vtab_list[i], vtab_dict)==1):
if i > j:
vtab_list.insert(j,vtab_list[i])
if i > j:
vtab_list.pop(i+1)
else:
vtab_list.pop(i)
#for entry in vtab_list:
#print entry.type.vtabstruct_cname
return vtab_list
def generate_vtabslot_dict(self, module_list, env):
vtab_dict={}
type_entries=[]
for module in module_list:
definition = module is env
if definition:
type_entries.extend( env.type_entries)
else:
for entry in module.type_entries:
if entry.defined_in_pxd:
type_entries.append(entry)
for entry in type_entries:
type = entry.type
if type.is_extension_type:
if not entry.in_cinclude:
type = entry.type
scope = type.scope
vtab_dict[type.objstruct_cname]=entry
return vtab_dict
def generate_vtabslot_list(self, vtab_dict):
vtab_list = list()
for entry in vtab_dict.itervalues():
vtab_list.append(entry)
for i in range(0,len(vtab_list)):
for j in range(0,len(vtab_list)):
if(recurse_vtabslot_check_inheritance(vtab_list[j],vtab_list[i], vtab_dict)==1):
if i > j:
vtab_list.insert(j,vtab_list[i])
if i > j:
vtab_list.pop(i+1)
else:
vtab_list.pop(i)
#for entry in vtab_list:
#print entry.type.vtabstruct_cname
return vtab_list
def generate_type_definitions(self, env, modules, vtab_list, vtabslot_list, code):
for module in modules:
definition = module is env
if definition:
type_entries = module.type_entries
else:
type_entries = []
for entry in module.type_entries:
if entry.defined_in_pxd:
type_entries.append(entry)
self.generate_type_header_code(type_entries, code)
for entry in vtabslot_list:
self.generate_obj_struct_definition(entry.type, code)
for entry in vtab_list:
self.generate_typeobject_predeclaration(entry, code)
self.generate_exttype_vtable_struct(entry, code)
self.generate_exttype_vtabptr_declaration(entry, code)
def generate_declarations_for_modules(self, env, modules, code):
def generate_declarations_for_module(self, env, code, definition):
code.putln("")
code.putln("/* Declarations */")
vtab_dict = self.generate_vtab_dict(modules)
vtab_list = self.generate_vtab_list(vtab_dict)
vtabslot_dict = self.generate_vtabslot_dict(modules,env)
vtabslot_list = self.generate_vtabslot_list(vtabslot_dict)
self.generate_type_definitions(env, modules, vtab_list, vtabslot_list, code)
for module in modules:
definition = module is env
self.generate_global_declarations(module, code, definition)
self.generate_cfunction_predeclarations(module, code, definition)
code.putln("/* Declarations from %s */" % env.qualified_name)
self.generate_type_predeclarations(env, code)
self.generate_type_definitions(env, code, definition)
self.generate_global_declarations(env, code, definition)
self.generate_cfunction_predeclarations(env, code, definition)
def generate_type_predeclarations(self, env, code):
pass
def generate_type_header_code(self, type_entries, code):
# Generate definitions of structs/unions/enums/typedefs/objstructs.
......@@ -474,6 +358,23 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
self.generate_struct_union_definition(entry, code)
elif type.is_enum:
self.generate_enum_definition(entry, code)
elif type.is_extension_type:
self.generate_obj_struct_definition(type, code)
def generate_type_definitions(self, env, code, definition):
if definition:
type_entries = env.type_entries
else:
type_entries = []
for entry in env.type_entries:
if entry.defined_in_pxd:
type_entries.append(entry)
self.generate_type_header_code(type_entries, code)
for entry in env.c_class_entries:
if not entry.in_cinclude:
self.generate_typeobject_predeclaration(entry, code)
self.generate_exttype_vtable_struct(entry, code)
self.generate_exttype_vtabptr_declaration(entry, code)
def generate_gcc33_hack(self, env, code):
# Workaround for spurious warning generation in gcc 3.3
......@@ -1369,7 +1270,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
entry.pystring_cname,
entry.cname,
entry.cname,
isinstance(entry.init, unicode)
entry.type.is_unicode
))
code.putln(
"{0, 0, 0, 0}")
......@@ -1382,13 +1283,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname)
def generate_module_init_func(self, imported_modules, env, code):
code.putln("")
code.putln("PyMODINIT_FUNC init2%s(void);" % env.module_name)
code.putln("")
header = "PyMODINIT_FUNC init%s(void)" % env.module_name
code.putln("%s; /*proto*/" % header)
code.putln("%s {" % header)
# do we need any of these here, or just in init2?
code.put_var_declarations(env.temp_entries)
code.putln("/*--- Libary function declarations ---*/")
......@@ -1417,41 +1315,24 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("/*--- Function export code ---*/")
self.generate_c_function_export_code(env, code)
env.use_utility_code(function_export_utility_code)
code.putln('if (__Pyx_ExportFunction("init2%s", (void*)init2%s, "int (void)") < 0) %s' % (env.module_name, env.module_name, code.error_goto((env.qualified_name,0,0) ) ) )
code.putln("/*--- Function import code ---*/")
for module in imported_modules:
self.generate_c_function_import_code_for_module(module, env, code)
code.putln("/*--- Type init code ---*/")
self.generate_type_init_code(env, code)
code.putln("/*--- Type import code ---*/")
for module in imported_modules:
self.generate_type_import_code_for_module(module, env, code)
code.putln("/*--- Function import code ---*/")
for module in imported_modules:
self.generate_c_function_import_code_for_module(module, env, code)
code.putln('init2%s();' % env.module_name)
if Options.generate_cleanup_code:
code.putln("if (__Pyx_RegisterCleanup()) %s;" % code.error_goto(self.pos))
code.putln("return;")
code.put_label(code.error_label)
code.put_var_xdecrefs(env.temp_entries)
code.putln('__Pyx_AddTraceback("%s");' % env.qualified_name)
env.use_utility_code(Nodes.traceback_utility_code)
code.putln('}')
def generate_module_init2_func(self, imported_modules, env, code):
code.init_labels()
code.putln("")
header = "PyMODINIT_FUNC init2%s(void)" % env.module_name
code.putln("%s; /*proto*/" % header)
code.putln("%s {" % header)
code.putln("static int __Pyx_unique = 0;")
code.putln("if (__Pyx_unique==1) return;")
code.putln("__Pyx_unique = 1;")
code.put_var_declarations(env.temp_entries)
code.putln("/*--- Execution code ---*/")
code.mark_pos(None)
self.body.generate_execution_code(code)
if Options.generate_cleanup_code:
code.putln("if (__Pyx_RegisterCleanup()) %s;" % code.error_goto(self.pos))
code.putln("return;")
code.put_label(code.error_label)
code.put_var_xdecrefs(env.temp_entries)
......@@ -1776,49 +1657,6 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
#
#------------------------------------------------------------------------------------
call_module_function_code = [
"""
static PyObject *__Pyx_CallModuleFunction(char* module, char *name); /*proto*/
""","""
static PyObject *__Pyx_CallModuleFunction(char* module, char *name)
{
PyObject* py_name = 0;
PyObject* py_module_name = 0;
PyObject* py_module = 0;
PyObject* py_dict = 0;
PyObject* py_func = 0;
PyObject* py_tuple = PyTuple_New(0);
PyObject* ret = 0;
py_dict = PyImport_GetModuleDict();
if(py_dict == 0)
goto bad;
if(py_tuple == 0)
goto bad;
py_name = PyString_FromString(name);
if(py_name == 0)
goto bad;
py_module_name = PyString_FromString(module);
if(py_module_name == 0)
goto bad;
py_module = PyObject_GetItem(py_dict, py_module);
if(py_module == 0)
goto bad;
if ( (py_func = PyObject_GetAttr(py_module, py_name) ) == 0)
goto bad;
if ( (ret = PyObject_Call(py_func, py_tuple,NULL) ) == 0)
goto bad;
return ret;
bad:
Py_XDECREF(py_name);
Py_XDECREF(py_module_name);
Py_XDECREF(py_module);
Py_XDECREF(py_dict);
Py_XDECREF(py_func);
return 0;
}
"""]
import_module_utility_code = [
"""
static PyObject *__Pyx_ImportModule(char *name); /*proto*/
......@@ -1892,7 +1730,7 @@ bad:
function_export_utility_code = [
"""
static int __Pyx_ExportFunction(char *n, void *f, char *s); /*proto*/
static int __Pyx_ExportFunction(char *name, void *f, char *sig); /*proto*/
""",r"""
static int __Pyx_ExportFunction(char *name, void *f, char *sig) {
PyObject *d = 0;
......
......@@ -38,6 +38,30 @@ def relative_position(pos):
"""
return (pos[0][absolute_path_length+1:], pos[1])
def embed_position(pos, docstring):
if not Options.embed_pos_in_docstring:
return docstring
pos_line = u'File: %s (starting at line %s)' % relative_position(self.pos)
if docstring is None:
# unicode string
return ExprNodes.EncodedString(pos_line)
# make sure we can encode the filename in the docstring encoding
# otherwise make the docstring a unicode string
encoding = docstring.encoding
if encoding is not None:
try:
encoded_bytes = pos_line.encode(encoding)
except UnicodeEncodeError:
encoding = None
if not docstring:
# reuse the string encoding of the original docstring
doc = ExprNodes.EncodedString(pos_line)
else:
doc = ExprNodes.EncodedString(pos_line + u'\\n' + docstring)
doc.encoding = encoding
return doc
class AttributeAccessor:
"""Used as the result of the Node.get_children_accessors() generator"""
......@@ -1199,7 +1223,7 @@ class DefNode(FuncDefNode):
# args [CArgDeclNode] formal arguments
# star_arg PyArgDeclNode or None * argument
# starstar_arg PyArgDeclNode or None ** argument
# doc string or None
# doc EncodedString or None
# body StatListNode
#
# The following subnode is constructed internally
......@@ -1357,17 +1381,12 @@ class DefNode(FuncDefNode):
Naming.pyfunc_prefix + prefix + name
entry.pymethdef_cname = \
Naming.pymethdef_prefix + prefix + name
if not Options.docstrings:
self.entry.doc = None
else:
if Options.embed_pos_in_docstring:
entry.doc = 'File: %s (starting at line %s)'%relative_position(self.pos)
if not self.doc is None:
entry.doc = entry.doc + '\\n' + self.doc
else:
entry.doc = self.doc
if Options.docstrings:
entry.doc = embed_position(self.pos, self.doc)
entry.doc_cname = \
Naming.funcdoc_prefix + prefix + name
else:
entry.doc = None
def declare_arguments(self, env):
for arg in self.args:
......@@ -1897,7 +1916,7 @@ class OverrideCheckNode(StatNode):
class PyClassDefNode(StatNode, BlockNode):
# A Python class definition.
#
# name string Name of the class
# name EncodedString Name of the class
# doc string or None
# body StatNode Attribute definition code
# entry Symtab.Entry
......@@ -1919,9 +1938,7 @@ class PyClassDefNode(StatNode, BlockNode):
import ExprNodes
self.dict = ExprNodes.DictNode(pos, key_value_pairs = [])
if self.doc and Options.docstrings:
if Options.embed_pos_in_docstring:
doc = 'File: %s (starting at line %s)'%relative_position(self.pos)
doc = doc + '\\n' + self.doc
doc = embed_position(self.pos, self.doc)
doc_node = ExprNodes.StringNode(pos, value = doc)
else:
doc_node = None
......@@ -1961,7 +1978,7 @@ class PyClassDefNode(StatNode, BlockNode):
self.dict.generate_disposal_code(code)
class CClassDefNode(StatNode):
class CClassDefNode(StatNode, BlockNode):
# An extension type definition.
#
# visibility 'private' or 'public' or 'extern'
......@@ -2034,11 +2051,7 @@ class CClassDefNode(StatNode):
scope = self.entry.type.scope
if self.doc and Options.docstrings:
if Options.embed_pos_in_docstring:
scope.doc = 'File: %s (starting at line %s)'%relative_position(self.pos)
scope.doc = scope.doc + '\\n' + self.doc
else:
scope.doc = self.doc
scope.doc = embed_position(self.pos, self.doc)
if has_body:
self.body.analyse_declarations(scope)
......@@ -2054,6 +2067,7 @@ class CClassDefNode(StatNode):
self.body.analyse_expressions(scope)
def generate_function_definitions(self, env, code, transforms):
self.generate_py_string_decls(self.entry.type.scope, code)
if self.body:
self.body.generate_function_definitions(
self.entry.type.scope, code, transforms)
......@@ -2073,7 +2087,7 @@ class PropertyNode(StatNode):
# Definition of a property in an extension type.
#
# name string
# doc string or None Doc string
# doc EncodedString or None Doc string
# body StatListNode
child_attrs = ["body"]
......
......@@ -2,7 +2,7 @@
# Pyrex Parser
#
import os, re
import os, re, codecs
from string import join, replace
from types import ListType, TupleType
from Scanning import PyrexScanner
......@@ -10,6 +10,7 @@ import Nodes
import ExprNodes
from ModuleNode import ModuleNode
from Errors import error, InternalError
from Cython import Utils
def p_ident(s, message = "Expected an identifier"):
if s.sy == 'IDENT':
......@@ -281,8 +282,10 @@ def p_call(s, function):
if not arg.is_name:
s.error("Expected an identifier before '='",
pos = arg.pos)
encoded_name = ExprNodes.EncodedString(arg.name)
encoded_name.encoding = s.source_encoding
keyword = ExprNodes.StringNode(arg.pos,
value = arg.name)
value = encoded_name)
arg = p_simple_expr(s)
keyword_args.append((keyword, arg))
else:
......@@ -459,7 +462,7 @@ def p_atom(s):
value = s.systring[:-1]
s.next()
return ExprNodes.ImagNode(pos, value = value)
elif sy == 'STRING' or sy == 'BEGIN_STRING':
elif sy == 'BEGIN_STRING':
kind, value = p_cat_string_literal(s)
if kind == 'c':
return ExprNodes.CharNode(pos, value = value)
......@@ -500,7 +503,12 @@ def p_name(s, name):
elif isinstance(value, float):
return ExprNodes.FloatNode(pos, value = rep)
elif isinstance(value, str):
return ExprNodes.StringNode(pos, value = rep[1:-1])
sval = ExprNodes.EncodedString(rep[1:-1])
sval.encoding = value.encoding
return ExprNodes.StringNode(pos, value = sval)
elif isinstance(value, unicode):
sval = ExprNodes.EncodedString(rep[2:-1])
return ExprNodes.StringNode(pos, value = sval)
else:
error(pos, "Invalid type for compile-time constant: %s"
% value.__class__.__name__)
......@@ -508,21 +516,25 @@ def p_name(s, name):
def p_cat_string_literal(s):
# A sequence of one or more adjacent string literals.
# Returns (kind, value) where kind in ('', 'c', 'r')
# Returns (kind, value) where kind in ('', 'c', 'r', 'u')
kind, value = p_string_literal(s)
if kind != 'c':
strings = [value]
while s.sy == 'STRING' or s.sy == 'BEGIN_STRING':
while s.sy == 'BEGIN_STRING':
next_kind, next_value = p_string_literal(s)
if next_kind == 'c':
self.error(
"Cannot concatenate char literal with another string or char literal")
elif next_kind == 'u':
kind = 'u'
strings.append(next_value)
value = ''.join(strings)
value = ExprNodes.EncodedString( u''.join(strings) )
if kind != 'u':
value.encoding = s.source_encoding
return kind, value
def p_opt_string_literal(s):
if s.sy == 'STRING' or s.sy == 'BEGIN_STRING':
if s.sy == 'BEGIN_STRING':
return p_string_literal(s)
else:
return None
......@@ -530,10 +542,6 @@ def p_opt_string_literal(s):
def p_string_literal(s):
# A single string or char literal.
# Returns (kind, value) where kind in ('', 'c', 'r', 'u')
if s.sy == 'STRING':
value = unquote(s.systring)
s.next()
return value
# s.sy == 'BEGIN_STRING'
pos = s.position()
#is_raw = s.systring[:1].lower() == "r"
......@@ -549,8 +557,6 @@ def p_string_literal(s):
systr = s.systring
if len(systr) == 1 and systr in "'\"\n":
chars.append('\\')
if kind == 'u' and not isinstance(systr, unicode):
systr = systr.decode("UTF-8")
chars.append(systr)
elif sy == 'ESCAPE':
systr = s.systring
......@@ -572,7 +578,8 @@ def p_string_literal(s):
elif c in 'ux':
if kind == 'u':
try:
chars.append(systr.decode('unicode_escape'))
chars.append(
systr.encode("ASCII").decode('unicode_escape'))
except UnicodeDecodeError:
s.error("Invalid unicode escape '%s'" % systr,
pos = pos)
......@@ -593,50 +600,12 @@ def p_string_literal(s):
"Unexpected token %r:%r in string literal" %
(sy, s.systring))
s.next()
value = ''.join(chars)
value = ExprNodes.EncodedString( u''.join(chars) )
if kind != 'u':
value.encoding = s.source_encoding
#print "p_string_literal: value =", repr(value) ###
return kind, value
def unquote(s):
is_raw = 0
if s[:1].lower() == "r":
is_raw = 1
s = s[1:]
q = s[:3]
if q == '"""' or q == "'''":
s = s[3:-3]
else:
s = s[1:-1]
if is_raw:
s = s.replace('\\', '\\\\')
s = s.replace('\n', '\\\n')
else:
# Split into double quotes, newlines, escape sequences
# and spans of regular chars
l1 = re.split(r'((?:\\[0-7]{1,3})|(?:\\x[0-9A-Fa-f]{2})|(?:\\.)|(?:\\\n)|(?:\n)|")', s)
#print "unquote: l1 =", l1 ###
l2 = []
for item in l1:
if item == '"' or item == '\n':
l2.append('\\' + item)
elif item == '\\\n':
pass
elif item[:1] == '\\':
if len(item) == 2:
if item[1] in '"\\abfnrtv':
l2.append(item)
else:
l2.append(item[1])
elif item[1:2] == 'x':
l2.append('\\x0' + item[2:])
else:
# octal escape
l2.append(item)
else:
l2.append(item)
s = "".join(l2)
return s
# list_display ::= "[" [listmaker] "]"
# listmaker ::= expression ( list_for | ( "," expression )* [","] )
# list_iter ::= list_for | list_if
......@@ -946,6 +915,8 @@ def p_import_statement(s):
ExprNodes.StringNode(pos, value = "*")])
else:
name_list = None
dotted_name = ExprNodes.EncodedString(dotted_name)
dotted_name.encoding = s.source_encoding
stat = Nodes.SingleAssignmentNode(pos,
lhs = ExprNodes.NameNode(pos,
name = as_name or target_name),
......@@ -984,14 +955,18 @@ def p_from_import_statement(s):
imported_name_strings = []
items = []
for (name_pos, name, as_name) in imported_names:
encoded_name = ExprNodes.EncodedString(name)
encoded_name.encoding = s.source_encoding
imported_name_strings.append(
ExprNodes.StringNode(name_pos, value = name))
ExprNodes.StringNode(name_pos, value = encoded_name))
items.append(
(name,
ExprNodes.NameNode(name_pos,
name = as_name or name)))
import_list = ExprNodes.ListNode(
imported_names[0][0], args = imported_name_strings)
dotted_name = ExprNodes.EncodedString(dotted_name)
dotted_name.encoding = s.source_encoding
return Nodes.FromImportStatNode(pos,
module = ExprNodes.ImportNode(dotted_name_pos,
module_name = ExprNodes.StringNode(dotted_name_pos,
......@@ -1204,8 +1179,8 @@ def p_include_statement(s, level):
if s.compile_time_eval:
include_file_path = s.context.find_include_file(include_file_name, pos)
if include_file_path:
f = open(include_file_path, "rU")
s2 = PyrexScanner(f, include_file_path, s)
f = Utils.open_source_file(include_file_path, mode="rU")
s2 = PyrexScanner(f, include_file_path, s, source_encoding=f.encoding)
try:
tree = p_statement_list(s2, level)
finally:
......@@ -1996,7 +1971,8 @@ def p_class_statement(s):
# s.sy == 'class'
pos = s.position()
s.next()
class_name = p_ident(s)
class_name = ExprNodes.EncodedString( p_ident(s) )
class_name.encoding = s.source_encoding
if s.sy == '(':
s.next()
base_list = p_simple_expr_list(s)
......@@ -2113,7 +2089,7 @@ def p_property_decl(s):
return Nodes.PropertyNode(pos, name = name, doc = doc, body = body)
def p_doc_string(s):
if s.sy == 'STRING' or s.sy == 'BEGIN_STRING':
if s.sy == 'BEGIN_STRING':
_, result = p_cat_string_literal(s)
if s.sy != 'EOF':
s.expect_newline("Syntax error in doc string")
......
......@@ -37,6 +37,7 @@ class PyrexType(BaseType):
# is_enum boolean Is a C enum type
# is_typedef boolean Is a typedef type
# is_string boolean Is a C char * type
# is_unicode boolean Is a UTF-8 encoded C char * type
# is_returncode boolean Is used only to signal exceptions
# is_error boolean Is the dummy error type
# has_attributes boolean Has C dot-selectable attributes
......@@ -83,6 +84,7 @@ class PyrexType(BaseType):
is_enum = 0
is_typedef = 0
is_string = 0
is_unicode = 0
is_returncode = 0
is_error = 0
has_attributes = 0
......@@ -875,19 +877,49 @@ class CEnumType(CType):
return self.base_declaration_code(public_decl(base, dll_linkage), entity_code)
def _escape_byte_string(s):
try:
s.decode("ASCII")
return s
except UnicodeDecodeError:
pass
l = []
append = l.append
for c in s:
o = ord(c)
if o >= 128:
append('\\x%X' % o)
else:
append(c)
return ''.join(l)
class CStringType:
# Mixin class for C string types.
is_string = 1
is_unicode = 0
to_py_function = "PyString_FromString"
from_py_function = "PyString_AsString"
exception_value = "NULL"
def literal_code(self, value):
if isinstance(value, unicode):
value = value.encode("UTF-8")
return '"%s"' % value
assert isinstance(value, str)
return '"%s"' % _escape_byte_string(value)
class CUTF8StringType:
# Mixin class for C unicode types.
is_string = 1
is_unicode = 1
to_py_function = "PyUnicode_DecodeUTF8"
exception_value = "NULL"
def literal_code(self, value):
assert isinstance(value, str)
return '"%s"' % _escape_byte_string(value)
class CCharArrayType(CStringType, CArrayType):
......@@ -900,6 +932,16 @@ class CCharArrayType(CStringType, CArrayType):
CArrayType.__init__(self, c_char_type, size)
class CUTF8CharArrayType(CUTF8StringType, CArrayType):
# C 'char []' type.
parsetuple_format = "s"
pymemberdef_typecode = "T_STRING_INPLACE"
def __init__(self, size):
CArrayType.__init__(self, c_char_type, size)
class CCharPtrType(CStringType, CPtrType):
# C 'char *' type.
......@@ -910,6 +952,16 @@ class CCharPtrType(CStringType, CPtrType):
CPtrType.__init__(self, c_char_type)
class CUTF8CharPtrType(CUTF8StringType, CPtrType):
# C 'char *' type, encoded in UTF-8.
parsetuple_format = "s"
pymemberdef_typecode = "T_STRING"
def __init__(self):
CPtrType.__init__(self, c_char_type)
class ErrorType(PyrexType):
# Used to prevent propagation of error messages.
......@@ -974,7 +1026,9 @@ c_longdouble_type = CFloatType(8)
c_null_ptr_type = CNullPtrType(c_void_type)
c_char_array_type = CCharArrayType(None)
c_utf8_char_array_type = CUTF8CharArrayType(None)
c_char_ptr_type = CCharPtrType()
c_utf8_char_ptr_type = CUTF8CharPtrType()
c_char_ptr_ptr_type = CPtrType(c_char_ptr_type)
c_int_ptr_type = CPtrType(c_int_type)
......
......@@ -212,7 +212,7 @@ class PyrexScanner(Scanner):
resword_dict = build_resword_dict()
def __init__(self, file, filename, parent_scanner = None,
type_names = None, context = None):
type_names = None, context = None, source_encoding=None):
Scanner.__init__(self, get_lexicon(), file, filename)
if parent_scanner:
self.context = parent_scanner.context
......@@ -226,6 +226,7 @@ class PyrexScanner(Scanner):
self.compile_time_env = initial_compile_time_env()
self.compile_time_eval = 1
self.compile_time_expr = 0
self.source_encoding = source_encoding
self.trace = trace_scanner
self.indentation_stack = [0]
self.indentation_char = None
......
......@@ -3,8 +3,6 @@
#
import re
import bisect
from Errors import warning, error, InternalError
import Options
import Naming
......@@ -438,7 +436,13 @@ class Scope:
def add_string_const(self, value):
# Add an entry for a string constant.
cname = self.new_const_cname()
entry = Entry("", cname, c_char_array_type, init = value)
if value.is_unicode:
c_type = c_utf8_char_array_type
value = value.utf8encode()
else:
c_type = c_char_array_type
value = value.byteencode()
entry = Entry("", cname, c_type, init = value)
entry.used = 1
self.const_entries.append(entry)
return entry
......@@ -460,7 +464,7 @@ class Scope:
# Python identifier, it will be interned.
if not entry.pystring_cname:
value = entry.init
if identifier_pattern.match(value) and isinstance(value, str):
if not entry.type.is_unicode and identifier_pattern.match(value):
entry.pystring_cname = self.intern(value)
entry.is_interned = 1
else:
......
......@@ -3,7 +3,7 @@
# anywhere else in particular
#
import os, sys
import os, sys, re, codecs
def replace_suffix(path, newsuf):
base, _ = os.path.splitext(path)
......@@ -32,3 +32,25 @@ def castrate_file(path, st):
f.close()
if st:
os.utime(path, (st.st_atime, st.st_mtime))
# support for source file encoding detection and unicode decoding
_match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search
def detect_file_encoding(source_filename):
# PEPs 263 and 3120
f = codecs.open(source_filename, "rU", encoding="UTF-8")
try:
for line_no, line in enumerate(f):
encoding = _match_file_encoding(line)
if encoding:
return encoding.group(1)
if line_no == 1:
break
finally:
f.close()
return "UTF-8"
def open_source_file(source_filename, mode="rU"):
encoding = detect_file_encoding(source_filename)
return codecs.open(source_filename, mode=mode, encoding=encoding)
......@@ -2,20 +2,16 @@
import os, sys, unittest, doctest
#from Cython.Distutils.build_ext import build_ext
#from Cython.Distutils.extension import Extension
from distutils.extension import Extension
from Cython.Distutils.extension import Extension
from Cython.Distutils import build_ext
from distutils.dist import Distribution
distutils_distro = Distribution()
TEST_DIRS = ['compile', 'run']
TEST_RUN_DIRS = ['run']
INCLUDE_DIRS = os.getenv('INCLUDE', '').split(os.pathsep)
INCLUDE_DIRS = [ d for d in os.getenv('INCLUDE', '').split(os.pathsep) if d ]
CFLAGS = os.getenv('CFLAGS', '').split()
class TestBuilder(object):
......
struct CrunchyType {
int number;
PyObject* string;
};
cdef extern from "crunchytype.h":
cdef class crunchytype.Crunchy [ object CrunchyType ]:
cdef int number
cdef object string
from crunchytype cimport Crunchy
cdef class Sub2(Crunchy):
cdef char character
cdef class Sub1(Sub2):
cdef char character
__doc__ = """
>>>
>>> f()
(30, 22)
"""
def f():
cdef int int1, int2, int3
cdef char *ptr1, *ptr2, *ptr3
cdef char *ptr1, *ptr2 = "test", *ptr3 = "toast"
int2 = 10
int3 = 20
obj1 = 1
......
__doc__ = """
>>> int2 = 42
>>> int3 = 7
>>> char1 = ord('C')
>>> int1 = int2 | int3
>>> int1 |= int2 ^ int3
>>> int1 ^= int2 & int3
>>> int1 ^= int2 << int3
>>> int1 ^= int2 >> int3
>>> int1 ^= int2 << int3 | int2 >> int3
>>> long1 = char1 | int1
>>> print (int1, long1) == f()
True
>>> f()
(5376, 67)
(45, 111)
"""
def f():
......@@ -12,10 +26,10 @@ def f():
char1 = c'C'
int1 = int2 | int3
int1 = int2 ^ int3
int1 = int2 & int3
int1 = int2 << int3
int1 = int2 >> int3
int1 = int2 << int3 | int2 >> int3
long1 = char1 | long2
int1 |= int2 ^ int3
int1 ^= int2 & int3
int1 ^= int2 << int3
int1 ^= int2 >> int3
int1 ^= int2 << int3 | int2 >> int3
long1 = char1 | int1
return int1, long1
__doc__ = """
>>> c()
120
>>> i()
42
>>> i1() == 42
True
>>> i2() == 0x42
True
>>> i3() == 042
True
>>> l()
666
>>> f()
......@@ -23,7 +27,9 @@ DEF TUPLE = (1, 2, "buckle my shoe")
DEF TRUE_FALSE = (True, False)
DEF CHAR = c'x'
DEF INT = 42
DEF INT1 = 42
DEF INT2 = 0x42
DEF INT3 = 042
DEF LONG = 666L
DEF FLOAT = 12.5
DEF STR = "spam"
......@@ -37,9 +43,19 @@ def c():
c = CHAR
return c
def i():
def i1():
cdef int i
i = INT1
return i
def i2():
cdef int i
i = INT2
return i
def i3():
cdef int i
i = INT
i = INT3
return i
def l():
......
__doc__ = """
>>> D
2
"""
D = 1
include "testinclude.pxi"
__doc__ = """
>>>
>>> s = Spam(Eggs("ham"))
>>> test(s)
'ham'
"""
cdef class Eggs:
cdef object ham
def __init__(self, ham):
self.ham = ham
cdef class Spam:
cdef Eggs eggs
def __init__(self, eggs):
self.eggs = eggs
cdef void tomato(Spam s):
cdef object tomato(Spam s):
food = s.eggs.ham
return food
def test(Spam s):
return tomato(s)
__doc__ = """
>>> print f.__doc__
This is a function docstring.
>>> print C.__doc__
This is a class docstring.
>>> print T.__doc__
This is an extension type docstring.
>>> f.__doc__
'This is a function docstring.'
>>> C.__doc__
'This is a class docstring.'
>>> CS.__doc__
'This is a subclass docstring.'
>>> print CSS.__doc__
None
>>> T.__doc__
'This is an extension type docstring.'
>>> TS.__doc__
'This is an extension subtype docstring.'
>>> TSS.__doc__
Compare with standard Python:
>>> def f():
... 'This is a function docstring.'
>>> f.__doc__
'This is a function docstring.'
>>> class C:
... 'This is a class docstring.'
>>> class CS(C):
... 'This is a subclass docstring.'
>>> class CSS(CS):
... pass
>>> C.__doc__
'This is a class docstring.'
>>> CS.__doc__
'This is a subclass docstring.'
>>> CSS.__doc__
"""
def f():
......@@ -13,6 +42,17 @@ def f():
class C:
"This is a class docstring."
class CS(C):
"This is a subclass docstring."
class CSS(CS):
pass
cdef class T:
"This is an extension type docstring."
cdef class TS(T):
"This is an extension subtype docstring."
cdef class TSS(TS):
pass
__doc__ = """
>>> f()
12.5
>>> nan1()
nan
>>> nan2()
nan
>>> nan3()
nan
>>> float_nan
nan
>>> infp1()
inf
>>> infp1() == float('inf')
True
>>> infp2()
inf
>>> infp2() == float('inf')
True
>>> infp3()
inf
>>> infp3() == float('inf')
True
>>> float_infp
inf
>>> float_infp == float('inf')
True
>>> infn1()
-inf
>>> infn1() == float('-inf')
True
>>> infn2()
-inf
>>> infn2() == float('-inf')
True
>>> infn3()
-inf
>>> infn3() == float('-inf')
True
>>> float_infn
-inf
>>> float_infn == float('-inf')
True
"""
DEF FLOAT = 12.5
DEF FLOAT_NAN = float('nan')
DEF FLOAT_INFP = float('+inf')
DEF FLOAT_INFN = float('-inf')
float_nan = FLOAT_NAN
float_infp = FLOAT_INFP
float_infn = FLOAT_INFN
def f():
cdef float f
f = FLOAT
return f
def nan1():
cdef double f
f = FLOAT_NAN
return f
def nan2():
cdef double f
f = float('nan')
return f
def nan3():
cdef float f
f = FLOAT_NAN
return f
def infp1():
cdef double f
f = FLOAT_INFP
return f
def infp2():
cdef double f
f = float('+inf')
return f
def infp3():
cdef float f
f = FLOAT_INFP
return f
def infn1():
cdef double f
f = FLOAT_INFN
return f
def infn2():
cdef double f
f = float('-inf')
return f
def infn3():
cdef float f
f = FLOAT_INFN
return f
......@@ -49,13 +49,17 @@ __doc__ = r"""
True
>>> d == u'üÖä'
True
>>> e == u'\x03\x67\xf8\uf8d2Søk ik'
>>> e == u'\x03\x67\xf8\uf8d2Søk ik' # unescaped by Cython
True
>>> f == u'\xf8'
>>> e == u'\\x03\\x67\\xf8\\uf8d2Søk ik' # unescaped by Python
True
>>> f == u'\xf8' # unescaped by Cython
True
>>> f == u'\\xf8' # unescaped by Python
True
>>> add == u'Søk ik' + u'üÖä' + 'abc'
True
>>> null == u'\\x00' # doctest needs a double slash here
>>> null == u'\\x00' # unescaped by Python (required by doctest)
True
"""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment