Commit 2bde7094 authored by Robert Bradshaw's avatar Robert Bradshaw

trivial merge

parents e918de3e bf579cac
...@@ -129,6 +129,9 @@ def parse_command_line(args): ...@@ -129,6 +129,9 @@ def parse_command_line(args):
arg = pop_arg() arg = pop_arg()
if arg.endswith(".pyx"): if arg.endswith(".pyx"):
sources.append(arg) sources.append(arg)
elif arg.endswith(".py"):
# maybe do some other stuff, but this should work for now
sources.append(arg)
elif arg.endswith(".o"): elif arg.endswith(".o"):
options.objects.append(arg) options.objects.append(arg)
else: else:
......
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
# Pyrex - Code output module # Pyrex - Code output module
# #
import codecs
import Naming import Naming
import Options import Options
from Cython.Utils import open_new_file from Cython.Utils import open_new_file, open_source_file
from PyrexTypes import py_object_type, typecast from PyrexTypes import py_object_type, typecast
from TypeSlots import method_coexist from TypeSlots import method_coexist
...@@ -85,23 +86,24 @@ class CCodeWriter: ...@@ -85,23 +86,24 @@ class CCodeWriter:
def indent(self): def indent(self):
self.f.write(" " * self.level) self.f.write(" " * self.level)
def get_py_version_hex(self, pyversion):
return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
def file_contents(self, file): def file_contents(self, file):
try: try:
return self.input_file_contents[file] return self.input_file_contents[file]
except KeyError: except KeyError:
F = [line.replace('*/', '*[inserted by cython to avoid comment closer]/') F = [line.encode('ASCII', 'replace').replace(
for line in open(file).readlines()] '*/', '*[inserted by cython to avoid comment closer]/')
for line in open_source_file(file)]
self.input_file_contents[file] = F self.input_file_contents[file] = F
return F return F
def get_py_version_hex(self, pyversion):
return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
def mark_pos(self, pos): def mark_pos(self, pos):
if pos is None: if pos is None:
return return
file, line, col = pos filename, line, col = pos
contents = self.file_contents(file) contents = self.file_contents(filename)
context = '' context = ''
for i in range(max(0,line-3), min(line+2, len(contents))): for i in range(max(0,line-3), min(line+2, len(contents))):
...@@ -110,7 +112,7 @@ class CCodeWriter: ...@@ -110,7 +112,7 @@ class CCodeWriter:
s = s.rstrip() + ' # <<<<<<<<<<<<<< ' + '\n' s = s.rstrip() + ' # <<<<<<<<<<<<<< ' + '\n'
context += " * " + s context += " * " + s
marker = '"%s":%s\n%s' % (file, line, context) marker = '"%s":%d\n%s' % (filename.encode('ASCII', 'replace'), line, context)
if self.last_marker != marker: if self.last_marker != marker:
self.marker = marker self.marker = marker
......
import bisect import bisect, sys
# This module keeps track of arbitrary "states" at any point of the code. # This module keeps track of arbitrary "states" at any point of the code.
# A state is considered known if every path to the given point agrees on # A state is considered known if every path to the given point agrees on
...@@ -13,6 +13,8 @@ import bisect ...@@ -13,6 +13,8 @@ import bisect
# redesigned. It doesn't take return, raise, continue, or break into # redesigned. It doesn't take return, raise, continue, or break into
# account. # account.
_END_POS = ((unichr(sys.maxunicode)*10),())
class ControlFlow: class ControlFlow:
def __init__(self, start_pos, incoming, parent): def __init__(self, start_pos, incoming, parent):
...@@ -22,7 +24,7 @@ class ControlFlow: ...@@ -22,7 +24,7 @@ class ControlFlow:
parent = incoming.parent parent = incoming.parent
self.parent = parent self.parent = parent
self.tip = {} self.tip = {}
self.end_pos = ((),) self.end_pos = _END_POS
def start_branch(self, pos): def start_branch(self, pos):
self.end_pos = pos self.end_pos = pos
...@@ -40,10 +42,10 @@ class ControlFlow: ...@@ -40,10 +42,10 @@ class ControlFlow:
self.parent.end_pos = pos self.parent.end_pos = pos
return LinearControlFlow(pos, self.parent) return LinearControlFlow(pos, self.parent)
def get_state(self, item, pos=((),())): def get_state(self, item, pos=_END_POS):
return self.get_pos_state(item, pos)[1] return self.get_pos_state(item, pos)[1]
def get_pos_state(self, item, pos=((),())): def get_pos_state(self, item, pos=_END_POS):
# do some caching # do some caching
if pos > self.end_pos: if pos > self.end_pos:
try: try:
...@@ -61,13 +63,13 @@ class LinearControlFlow(ControlFlow): ...@@ -61,13 +63,13 @@ class LinearControlFlow(ControlFlow):
self.events = {} self.events = {}
def set_state(self, pos, item, state): def set_state(self, pos, item, state):
if self.tip.has_key(item): if item in self.tip:
del self.tip[item] del self.tip[item]
if pos < self.start_pos: if pos < self.start_pos:
if self.incoming is not None: if self.incoming is not None:
self.incoming.set_state(pos, item, state) self.incoming.set_state(pos, item, state)
else: else:
if self.events.has_key(item): if item in self.events:
event_list = self.events[item] event_list = self.events[item]
else: else:
event_list = [] event_list = []
...@@ -77,7 +79,7 @@ class LinearControlFlow(ControlFlow): ...@@ -77,7 +79,7 @@ class LinearControlFlow(ControlFlow):
def _get_pos_state(self, item, pos): def _get_pos_state(self, item, pos):
if pos > self.start_pos: if pos > self.start_pos:
if self.events.has_key(item): if item in self.events:
event_list = self.events[item] event_list = self.events[item]
for event in event_list[::-1]: for event in event_list[::-1]:
if event[0] < pos: if event[0] < pos:
...@@ -116,7 +118,7 @@ class BranchingControlFlow(ControlFlow): ...@@ -116,7 +118,7 @@ class BranchingControlFlow(ControlFlow):
def set_state(self, pos, item, state): def set_state(self, pos, item, state):
if self.tip.has_key(item): if item in self.tip:
del self.tip[item] del self.tip[item]
if pos < self.start_pos: if pos < self.start_pos:
...@@ -157,5 +159,3 @@ class BranchingControlFlow(ControlFlow): ...@@ -157,5 +159,3 @@ class BranchingControlFlow(ControlFlow):
if self.incoming is not limit and self.incoming is not None: if self.incoming is not limit and self.incoming is not None:
s = "%s\n%s" % (self.incoming.to_string(indent, limit=limit), s) s = "%s\n%s" % (self.incoming.to_string(indent, limit=limit), s)
return s return s
\ No newline at end of file
...@@ -18,6 +18,29 @@ from Cython.Debugging import print_call_chain ...@@ -18,6 +18,29 @@ from Cython.Debugging import print_call_chain
from DebugFlags import debug_disposal_code, debug_temp_alloc, \ from DebugFlags import debug_disposal_code, debug_temp_alloc, \
debug_coercion debug_coercion
class EncodedString(unicode):
# unicode string subclass to keep track of the original encoding.
# 'encoding' is None for unicode strings and the source encoding
# otherwise
encoding = None
def byteencode(self):
assert self.encoding is not None
return self.encode(self.encoding)
def utf8encode(self):
assert self.encoding is None
return self.encode("UTF-8")
def is_unicode(self):
return self.encoding is None
is_unicode = property(is_unicode)
# def __eq__(self, other):
# return unicode.__eq__(self, other) and \
# getattr(other, 'encoding', '') == self.encoding
class ExprNode(Node): class ExprNode(Node):
# subexprs [string] Class var holding names of subexpr node attrs # subexprs [string] Class var holding names of subexpr node attrs
# type PyrexType Type of the result # type PyrexType Type of the result
...@@ -669,7 +692,7 @@ class IntNode(ConstNode): ...@@ -669,7 +692,7 @@ class IntNode(ConstNode):
return str(self.value) return str(self.value)
def compile_time_value(self, denv): def compile_time_value(self, denv):
return int(self.value) return int(self.value, 0)
class FloatNode(ConstNode): class FloatNode(ConstNode):
...@@ -678,6 +701,17 @@ class FloatNode(ConstNode): ...@@ -678,6 +701,17 @@ class FloatNode(ConstNode):
def compile_time_value(self, denv): def compile_time_value(self, denv):
return float(self.value) return float(self.value)
def calculate_result_code(self):
strval = str(self.value)
if strval == 'nan':
return "(Py_HUGE_VAL * 0)"
elif strval == 'inf':
return "Py_HUGE_VAL"
elif strval == '-inf':
return "(-Py_HUGE_VAL)"
else:
return strval
class StringNode(ConstNode): class StringNode(ConstNode):
# entry Symtab.Entry # entry Symtab.Entry
...@@ -685,15 +719,16 @@ class StringNode(ConstNode): ...@@ -685,15 +719,16 @@ class StringNode(ConstNode):
type = PyrexTypes.c_char_ptr_type type = PyrexTypes.c_char_ptr_type
def compile_time_value(self, denv): def compile_time_value(self, denv):
return eval('"%s"' % self.value) return self.value
def analyse_types(self, env): def analyse_types(self, env):
self.entry = env.add_string_const(self.value) self.entry = env.add_string_const(self.value)
def coerce_to(self, dst_type, env): def coerce_to(self, dst_type, env):
if dst_type.is_int: if dst_type.is_int:
if not self.type.is_pyobject and len(self.value) == 1: if not self.type.is_pyobject and len(self.entry.init) == 1:
return CharNode(self.pos, value=self.value) # we use the *encoded* value here
return CharNode(self.pos, value=self.entry.init)
else: else:
error(self.pos, "Only coerce single-character ascii strings can be used as ints.") error(self.pos, "Only coerce single-character ascii strings can be used as ints.")
return self return self
...@@ -776,7 +811,7 @@ class NameNode(AtomicExprNode): ...@@ -776,7 +811,7 @@ class NameNode(AtomicExprNode):
try: try:
return denv.lookup(self.name) return denv.lookup(self.name)
except KeyError: except KeyError:
error(self.pos, "Compile-time name '%s' not defined", self.name) error(self.pos, "Compile-time name '%s' not defined" % self.name)
def coerce_to(self, dst_type, env): def coerce_to(self, dst_type, env):
# If coercing to a generic pyobject and this is a builtin # If coercing to a generic pyobject and this is a builtin
......
...@@ -2,12 +2,11 @@ ...@@ -2,12 +2,11 @@
# Cython Top Level # Cython Top Level
# #
import os, sys, re import os, sys, re, codecs
if sys.version_info[:2] < (2, 2): if sys.version_info[:2] < (2, 2):
print >>sys.stderr, "Sorry, Cython requires Python 2.2 or later" print >>sys.stderr, "Sorry, Cython requires Python 2.2 or later"
sys.exit(1) sys.exit(1)
import os
from time import time from time import time
import Version import Version
from Scanning import PyrexScanner from Scanning import PyrexScanner
...@@ -140,8 +139,17 @@ class Context: ...@@ -140,8 +139,17 @@ class Context:
def parse(self, source_filename, type_names, pxd, full_module_name): def parse(self, source_filename, type_names, pxd, full_module_name):
# Parse the given source file and return a parse tree. # Parse the given source file and return a parse tree.
f = open(source_filename, "rU") f = Utils.open_source_file(source_filename, "rU")
s = PyrexScanner(f, source_filename,
if isinstance(source_filename, unicode):
name = source_filename
else:
filename_encoding = sys.getfilesystemencoding()
if filename_encoding is None:
filename_encoding = getdefaultencoding()
name = source_filename.decode(filename_encoding)
s = PyrexScanner(f, name, source_encoding = f.encoding,
type_names = type_names, context = self) type_names = type_names, context = self)
try: try:
tree = Parsing.p_module(s, pxd, full_module_name) tree = Parsing.p_module(s, pxd, full_module_name)
...@@ -310,7 +318,6 @@ def compile(source, options = None, c_compile = 0, c_link = 0, ...@@ -310,7 +318,6 @@ def compile(source, options = None, c_compile = 0, c_link = 0,
#------------------------------------------------------------------------ #------------------------------------------------------------------------
def main(command_line = 0): def main(command_line = 0):
args = sys.argv[1:] args = sys.argv[1:]
any_failures = 0 any_failures = 0
if command_line: if command_line:
......
This diff is collapsed.
...@@ -38,6 +38,30 @@ def relative_position(pos): ...@@ -38,6 +38,30 @@ def relative_position(pos):
""" """
return (pos[0][absolute_path_length+1:], pos[1]) return (pos[0][absolute_path_length+1:], pos[1])
def embed_position(pos, docstring):
if not Options.embed_pos_in_docstring:
return docstring
pos_line = u'File: %s (starting at line %s)' % relative_position(self.pos)
if docstring is None:
# unicode string
return ExprNodes.EncodedString(pos_line)
# make sure we can encode the filename in the docstring encoding
# otherwise make the docstring a unicode string
encoding = docstring.encoding
if encoding is not None:
try:
encoded_bytes = pos_line.encode(encoding)
except UnicodeEncodeError:
encoding = None
if not docstring:
# reuse the string encoding of the original docstring
doc = ExprNodes.EncodedString(pos_line)
else:
doc = ExprNodes.EncodedString(pos_line + u'\\n' + docstring)
doc.encoding = encoding
return doc
class AttributeAccessor: class AttributeAccessor:
"""Used as the result of the Node.get_children_accessors() generator""" """Used as the result of the Node.get_children_accessors() generator"""
...@@ -1199,7 +1223,7 @@ class DefNode(FuncDefNode): ...@@ -1199,7 +1223,7 @@ class DefNode(FuncDefNode):
# args [CArgDeclNode] formal arguments # args [CArgDeclNode] formal arguments
# star_arg PyArgDeclNode or None * argument # star_arg PyArgDeclNode or None * argument
# starstar_arg PyArgDeclNode or None ** argument # starstar_arg PyArgDeclNode or None ** argument
# doc string or None # doc EncodedString or None
# body StatListNode # body StatListNode
# #
# The following subnode is constructed internally # The following subnode is constructed internally
...@@ -1357,17 +1381,12 @@ class DefNode(FuncDefNode): ...@@ -1357,17 +1381,12 @@ class DefNode(FuncDefNode):
Naming.pyfunc_prefix + prefix + name Naming.pyfunc_prefix + prefix + name
entry.pymethdef_cname = \ entry.pymethdef_cname = \
Naming.pymethdef_prefix + prefix + name Naming.pymethdef_prefix + prefix + name
if not Options.docstrings: if Options.docstrings:
self.entry.doc = None entry.doc = embed_position(self.pos, self.doc)
else:
if Options.embed_pos_in_docstring:
entry.doc = 'File: %s (starting at line %s)'%relative_position(self.pos)
if not self.doc is None:
entry.doc = entry.doc + '\\n' + self.doc
else:
entry.doc = self.doc
entry.doc_cname = \ entry.doc_cname = \
Naming.funcdoc_prefix + prefix + name Naming.funcdoc_prefix + prefix + name
else:
entry.doc = None
def declare_arguments(self, env): def declare_arguments(self, env):
for arg in self.args: for arg in self.args:
...@@ -1897,7 +1916,7 @@ class OverrideCheckNode(StatNode): ...@@ -1897,7 +1916,7 @@ class OverrideCheckNode(StatNode):
class PyClassDefNode(StatNode, BlockNode): class PyClassDefNode(StatNode, BlockNode):
# A Python class definition. # A Python class definition.
# #
# name string Name of the class # name EncodedString Name of the class
# doc string or None # doc string or None
# body StatNode Attribute definition code # body StatNode Attribute definition code
# entry Symtab.Entry # entry Symtab.Entry
...@@ -1919,9 +1938,7 @@ class PyClassDefNode(StatNode, BlockNode): ...@@ -1919,9 +1938,7 @@ class PyClassDefNode(StatNode, BlockNode):
import ExprNodes import ExprNodes
self.dict = ExprNodes.DictNode(pos, key_value_pairs = []) self.dict = ExprNodes.DictNode(pos, key_value_pairs = [])
if self.doc and Options.docstrings: if self.doc and Options.docstrings:
if Options.embed_pos_in_docstring: doc = embed_position(self.pos, self.doc)
doc = 'File: %s (starting at line %s)'%relative_position(self.pos)
doc = doc + '\\n' + self.doc
doc_node = ExprNodes.StringNode(pos, value = doc) doc_node = ExprNodes.StringNode(pos, value = doc)
else: else:
doc_node = None doc_node = None
...@@ -1961,7 +1978,7 @@ class PyClassDefNode(StatNode, BlockNode): ...@@ -1961,7 +1978,7 @@ class PyClassDefNode(StatNode, BlockNode):
self.dict.generate_disposal_code(code) self.dict.generate_disposal_code(code)
class CClassDefNode(StatNode): class CClassDefNode(StatNode, BlockNode):
# An extension type definition. # An extension type definition.
# #
# visibility 'private' or 'public' or 'extern' # visibility 'private' or 'public' or 'extern'
...@@ -2034,11 +2051,7 @@ class CClassDefNode(StatNode): ...@@ -2034,11 +2051,7 @@ class CClassDefNode(StatNode):
scope = self.entry.type.scope scope = self.entry.type.scope
if self.doc and Options.docstrings: if self.doc and Options.docstrings:
if Options.embed_pos_in_docstring: scope.doc = embed_position(self.pos, self.doc)
scope.doc = 'File: %s (starting at line %s)'%relative_position(self.pos)
scope.doc = scope.doc + '\\n' + self.doc
else:
scope.doc = self.doc
if has_body: if has_body:
self.body.analyse_declarations(scope) self.body.analyse_declarations(scope)
...@@ -2054,6 +2067,7 @@ class CClassDefNode(StatNode): ...@@ -2054,6 +2067,7 @@ class CClassDefNode(StatNode):
self.body.analyse_expressions(scope) self.body.analyse_expressions(scope)
def generate_function_definitions(self, env, code, transforms): def generate_function_definitions(self, env, code, transforms):
self.generate_py_string_decls(self.entry.type.scope, code)
if self.body: if self.body:
self.body.generate_function_definitions( self.body.generate_function_definitions(
self.entry.type.scope, code, transforms) self.entry.type.scope, code, transforms)
...@@ -2073,7 +2087,7 @@ class PropertyNode(StatNode): ...@@ -2073,7 +2087,7 @@ class PropertyNode(StatNode):
# Definition of a property in an extension type. # Definition of a property in an extension type.
# #
# name string # name string
# doc string or None Doc string # doc EncodedString or None Doc string
# body StatListNode # body StatListNode
child_attrs = ["body"] child_attrs = ["body"]
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# Pyrex Parser # Pyrex Parser
# #
import os, re import os, re, codecs
from string import join, replace from string import join, replace
from types import ListType, TupleType from types import ListType, TupleType
from Scanning import PyrexScanner from Scanning import PyrexScanner
...@@ -10,6 +10,7 @@ import Nodes ...@@ -10,6 +10,7 @@ import Nodes
import ExprNodes import ExprNodes
from ModuleNode import ModuleNode from ModuleNode import ModuleNode
from Errors import error, InternalError from Errors import error, InternalError
from Cython import Utils
def p_ident(s, message = "Expected an identifier"): def p_ident(s, message = "Expected an identifier"):
if s.sy == 'IDENT': if s.sy == 'IDENT':
...@@ -281,8 +282,10 @@ def p_call(s, function): ...@@ -281,8 +282,10 @@ def p_call(s, function):
if not arg.is_name: if not arg.is_name:
s.error("Expected an identifier before '='", s.error("Expected an identifier before '='",
pos = arg.pos) pos = arg.pos)
encoded_name = ExprNodes.EncodedString(arg.name)
encoded_name.encoding = s.source_encoding
keyword = ExprNodes.StringNode(arg.pos, keyword = ExprNodes.StringNode(arg.pos,
value = arg.name) value = encoded_name)
arg = p_simple_expr(s) arg = p_simple_expr(s)
keyword_args.append((keyword, arg)) keyword_args.append((keyword, arg))
else: else:
...@@ -459,7 +462,7 @@ def p_atom(s): ...@@ -459,7 +462,7 @@ def p_atom(s):
value = s.systring[:-1] value = s.systring[:-1]
s.next() s.next()
return ExprNodes.ImagNode(pos, value = value) return ExprNodes.ImagNode(pos, value = value)
elif sy == 'STRING' or sy == 'BEGIN_STRING': elif sy == 'BEGIN_STRING':
kind, value = p_cat_string_literal(s) kind, value = p_cat_string_literal(s)
if kind == 'c': if kind == 'c':
return ExprNodes.CharNode(pos, value = value) return ExprNodes.CharNode(pos, value = value)
...@@ -500,7 +503,12 @@ def p_name(s, name): ...@@ -500,7 +503,12 @@ def p_name(s, name):
elif isinstance(value, float): elif isinstance(value, float):
return ExprNodes.FloatNode(pos, value = rep) return ExprNodes.FloatNode(pos, value = rep)
elif isinstance(value, str): elif isinstance(value, str):
return ExprNodes.StringNode(pos, value = rep[1:-1]) sval = ExprNodes.EncodedString(rep[1:-1])
sval.encoding = value.encoding
return ExprNodes.StringNode(pos, value = sval)
elif isinstance(value, unicode):
sval = ExprNodes.EncodedString(rep[2:-1])
return ExprNodes.StringNode(pos, value = sval)
else: else:
error(pos, "Invalid type for compile-time constant: %s" error(pos, "Invalid type for compile-time constant: %s"
% value.__class__.__name__) % value.__class__.__name__)
...@@ -508,21 +516,25 @@ def p_name(s, name): ...@@ -508,21 +516,25 @@ def p_name(s, name):
def p_cat_string_literal(s): def p_cat_string_literal(s):
# A sequence of one or more adjacent string literals. # A sequence of one or more adjacent string literals.
# Returns (kind, value) where kind in ('', 'c', 'r') # Returns (kind, value) where kind in ('', 'c', 'r', 'u')
kind, value = p_string_literal(s) kind, value = p_string_literal(s)
if kind != 'c': if kind != 'c':
strings = [value] strings = [value]
while s.sy == 'STRING' or s.sy == 'BEGIN_STRING': while s.sy == 'BEGIN_STRING':
next_kind, next_value = p_string_literal(s) next_kind, next_value = p_string_literal(s)
if next_kind == 'c': if next_kind == 'c':
self.error( self.error(
"Cannot concatenate char literal with another string or char literal") "Cannot concatenate char literal with another string or char literal")
elif next_kind == 'u':
kind = 'u'
strings.append(next_value) strings.append(next_value)
value = ''.join(strings) value = ExprNodes.EncodedString( u''.join(strings) )
if kind != 'u':
value.encoding = s.source_encoding
return kind, value return kind, value
def p_opt_string_literal(s): def p_opt_string_literal(s):
if s.sy == 'STRING' or s.sy == 'BEGIN_STRING': if s.sy == 'BEGIN_STRING':
return p_string_literal(s) return p_string_literal(s)
else: else:
return None return None
...@@ -530,10 +542,6 @@ def p_opt_string_literal(s): ...@@ -530,10 +542,6 @@ def p_opt_string_literal(s):
def p_string_literal(s): def p_string_literal(s):
# A single string or char literal. # A single string or char literal.
# Returns (kind, value) where kind in ('', 'c', 'r', 'u') # Returns (kind, value) where kind in ('', 'c', 'r', 'u')
if s.sy == 'STRING':
value = unquote(s.systring)
s.next()
return value
# s.sy == 'BEGIN_STRING' # s.sy == 'BEGIN_STRING'
pos = s.position() pos = s.position()
#is_raw = s.systring[:1].lower() == "r" #is_raw = s.systring[:1].lower() == "r"
...@@ -549,8 +557,6 @@ def p_string_literal(s): ...@@ -549,8 +557,6 @@ def p_string_literal(s):
systr = s.systring systr = s.systring
if len(systr) == 1 and systr in "'\"\n": if len(systr) == 1 and systr in "'\"\n":
chars.append('\\') chars.append('\\')
if kind == 'u' and not isinstance(systr, unicode):
systr = systr.decode("UTF-8")
chars.append(systr) chars.append(systr)
elif sy == 'ESCAPE': elif sy == 'ESCAPE':
systr = s.systring systr = s.systring
...@@ -572,7 +578,8 @@ def p_string_literal(s): ...@@ -572,7 +578,8 @@ def p_string_literal(s):
elif c in 'ux': elif c in 'ux':
if kind == 'u': if kind == 'u':
try: try:
chars.append(systr.decode('unicode_escape')) chars.append(
systr.encode("ASCII").decode('unicode_escape'))
except UnicodeDecodeError: except UnicodeDecodeError:
s.error("Invalid unicode escape '%s'" % systr, s.error("Invalid unicode escape '%s'" % systr,
pos = pos) pos = pos)
...@@ -593,50 +600,12 @@ def p_string_literal(s): ...@@ -593,50 +600,12 @@ def p_string_literal(s):
"Unexpected token %r:%r in string literal" % "Unexpected token %r:%r in string literal" %
(sy, s.systring)) (sy, s.systring))
s.next() s.next()
value = ''.join(chars) value = ExprNodes.EncodedString( u''.join(chars) )
if kind != 'u':
value.encoding = s.source_encoding
#print "p_string_literal: value =", repr(value) ### #print "p_string_literal: value =", repr(value) ###
return kind, value return kind, value
def unquote(s):
is_raw = 0
if s[:1].lower() == "r":
is_raw = 1
s = s[1:]
q = s[:3]
if q == '"""' or q == "'''":
s = s[3:-3]
else:
s = s[1:-1]
if is_raw:
s = s.replace('\\', '\\\\')
s = s.replace('\n', '\\\n')
else:
# Split into double quotes, newlines, escape sequences
# and spans of regular chars
l1 = re.split(r'((?:\\[0-7]{1,3})|(?:\\x[0-9A-Fa-f]{2})|(?:\\.)|(?:\\\n)|(?:\n)|")', s)
#print "unquote: l1 =", l1 ###
l2 = []
for item in l1:
if item == '"' or item == '\n':
l2.append('\\' + item)
elif item == '\\\n':
pass
elif item[:1] == '\\':
if len(item) == 2:
if item[1] in '"\\abfnrtv':
l2.append(item)
else:
l2.append(item[1])
elif item[1:2] == 'x':
l2.append('\\x0' + item[2:])
else:
# octal escape
l2.append(item)
else:
l2.append(item)
s = "".join(l2)
return s
# list_display ::= "[" [listmaker] "]" # list_display ::= "[" [listmaker] "]"
# listmaker ::= expression ( list_for | ( "," expression )* [","] ) # listmaker ::= expression ( list_for | ( "," expression )* [","] )
# list_iter ::= list_for | list_if # list_iter ::= list_for | list_if
...@@ -946,6 +915,8 @@ def p_import_statement(s): ...@@ -946,6 +915,8 @@ def p_import_statement(s):
ExprNodes.StringNode(pos, value = "*")]) ExprNodes.StringNode(pos, value = "*")])
else: else:
name_list = None name_list = None
dotted_name = ExprNodes.EncodedString(dotted_name)
dotted_name.encoding = s.source_encoding
stat = Nodes.SingleAssignmentNode(pos, stat = Nodes.SingleAssignmentNode(pos,
lhs = ExprNodes.NameNode(pos, lhs = ExprNodes.NameNode(pos,
name = as_name or target_name), name = as_name or target_name),
...@@ -984,14 +955,18 @@ def p_from_import_statement(s): ...@@ -984,14 +955,18 @@ def p_from_import_statement(s):
imported_name_strings = [] imported_name_strings = []
items = [] items = []
for (name_pos, name, as_name) in imported_names: for (name_pos, name, as_name) in imported_names:
encoded_name = ExprNodes.EncodedString(name)
encoded_name.encoding = s.source_encoding
imported_name_strings.append( imported_name_strings.append(
ExprNodes.StringNode(name_pos, value = name)) ExprNodes.StringNode(name_pos, value = encoded_name))
items.append( items.append(
(name, (name,
ExprNodes.NameNode(name_pos, ExprNodes.NameNode(name_pos,
name = as_name or name))) name = as_name or name)))
import_list = ExprNodes.ListNode( import_list = ExprNodes.ListNode(
imported_names[0][0], args = imported_name_strings) imported_names[0][0], args = imported_name_strings)
dotted_name = ExprNodes.EncodedString(dotted_name)
dotted_name.encoding = s.source_encoding
return Nodes.FromImportStatNode(pos, return Nodes.FromImportStatNode(pos,
module = ExprNodes.ImportNode(dotted_name_pos, module = ExprNodes.ImportNode(dotted_name_pos,
module_name = ExprNodes.StringNode(dotted_name_pos, module_name = ExprNodes.StringNode(dotted_name_pos,
...@@ -1204,8 +1179,8 @@ def p_include_statement(s, level): ...@@ -1204,8 +1179,8 @@ def p_include_statement(s, level):
if s.compile_time_eval: if s.compile_time_eval:
include_file_path = s.context.find_include_file(include_file_name, pos) include_file_path = s.context.find_include_file(include_file_name, pos)
if include_file_path: if include_file_path:
f = open(include_file_path, "rU") f = Utils.open_source_file(include_file_path, mode="rU")
s2 = PyrexScanner(f, include_file_path, s) s2 = PyrexScanner(f, include_file_path, s, source_encoding=f.encoding)
try: try:
tree = p_statement_list(s2, level) tree = p_statement_list(s2, level)
finally: finally:
...@@ -1996,7 +1971,8 @@ def p_class_statement(s): ...@@ -1996,7 +1971,8 @@ def p_class_statement(s):
# s.sy == 'class' # s.sy == 'class'
pos = s.position() pos = s.position()
s.next() s.next()
class_name = p_ident(s) class_name = ExprNodes.EncodedString( p_ident(s) )
class_name.encoding = s.source_encoding
if s.sy == '(': if s.sy == '(':
s.next() s.next()
base_list = p_simple_expr_list(s) base_list = p_simple_expr_list(s)
...@@ -2113,7 +2089,7 @@ def p_property_decl(s): ...@@ -2113,7 +2089,7 @@ def p_property_decl(s):
return Nodes.PropertyNode(pos, name = name, doc = doc, body = body) return Nodes.PropertyNode(pos, name = name, doc = doc, body = body)
def p_doc_string(s): def p_doc_string(s):
if s.sy == 'STRING' or s.sy == 'BEGIN_STRING': if s.sy == 'BEGIN_STRING':
_, result = p_cat_string_literal(s) _, result = p_cat_string_literal(s)
if s.sy != 'EOF': if s.sy != 'EOF':
s.expect_newline("Syntax error in doc string") s.expect_newline("Syntax error in doc string")
......
...@@ -37,6 +37,7 @@ class PyrexType(BaseType): ...@@ -37,6 +37,7 @@ class PyrexType(BaseType):
# is_enum boolean Is a C enum type # is_enum boolean Is a C enum type
# is_typedef boolean Is a typedef type # is_typedef boolean Is a typedef type
# is_string boolean Is a C char * type # is_string boolean Is a C char * type
# is_unicode boolean Is a UTF-8 encoded C char * type
# is_returncode boolean Is used only to signal exceptions # is_returncode boolean Is used only to signal exceptions
# is_error boolean Is the dummy error type # is_error boolean Is the dummy error type
# has_attributes boolean Has C dot-selectable attributes # has_attributes boolean Has C dot-selectable attributes
...@@ -83,6 +84,7 @@ class PyrexType(BaseType): ...@@ -83,6 +84,7 @@ class PyrexType(BaseType):
is_enum = 0 is_enum = 0
is_typedef = 0 is_typedef = 0
is_string = 0 is_string = 0
is_unicode = 0
is_returncode = 0 is_returncode = 0
is_error = 0 is_error = 0
has_attributes = 0 has_attributes = 0
...@@ -875,19 +877,49 @@ class CEnumType(CType): ...@@ -875,19 +877,49 @@ class CEnumType(CType):
return self.base_declaration_code(public_decl(base, dll_linkage), entity_code) return self.base_declaration_code(public_decl(base, dll_linkage), entity_code)
def _escape_byte_string(s):
try:
s.decode("ASCII")
return s
except UnicodeDecodeError:
pass
l = []
append = l.append
for c in s:
o = ord(c)
if o >= 128:
append('\\x%X' % o)
else:
append(c)
return ''.join(l)
class CStringType: class CStringType:
# Mixin class for C string types. # Mixin class for C string types.
is_string = 1 is_string = 1
is_unicode = 0
to_py_function = "PyString_FromString" to_py_function = "PyString_FromString"
from_py_function = "PyString_AsString" from_py_function = "PyString_AsString"
exception_value = "NULL" exception_value = "NULL"
def literal_code(self, value): def literal_code(self, value):
if isinstance(value, unicode): assert isinstance(value, str)
value = value.encode("UTF-8") return '"%s"' % _escape_byte_string(value)
return '"%s"' % value
class CUTF8StringType:
# Mixin class for C unicode types.
is_string = 1
is_unicode = 1
to_py_function = "PyUnicode_DecodeUTF8"
exception_value = "NULL"
def literal_code(self, value):
assert isinstance(value, str)
return '"%s"' % _escape_byte_string(value)
class CCharArrayType(CStringType, CArrayType): class CCharArrayType(CStringType, CArrayType):
...@@ -900,6 +932,16 @@ class CCharArrayType(CStringType, CArrayType): ...@@ -900,6 +932,16 @@ class CCharArrayType(CStringType, CArrayType):
CArrayType.__init__(self, c_char_type, size) CArrayType.__init__(self, c_char_type, size)
class CUTF8CharArrayType(CUTF8StringType, CArrayType):
# C 'char []' type.
parsetuple_format = "s"
pymemberdef_typecode = "T_STRING_INPLACE"
def __init__(self, size):
CArrayType.__init__(self, c_char_type, size)
class CCharPtrType(CStringType, CPtrType): class CCharPtrType(CStringType, CPtrType):
# C 'char *' type. # C 'char *' type.
...@@ -910,6 +952,16 @@ class CCharPtrType(CStringType, CPtrType): ...@@ -910,6 +952,16 @@ class CCharPtrType(CStringType, CPtrType):
CPtrType.__init__(self, c_char_type) CPtrType.__init__(self, c_char_type)
class CUTF8CharPtrType(CUTF8StringType, CPtrType):
# C 'char *' type, encoded in UTF-8.
parsetuple_format = "s"
pymemberdef_typecode = "T_STRING"
def __init__(self):
CPtrType.__init__(self, c_char_type)
class ErrorType(PyrexType): class ErrorType(PyrexType):
# Used to prevent propagation of error messages. # Used to prevent propagation of error messages.
...@@ -974,7 +1026,9 @@ c_longdouble_type = CFloatType(8) ...@@ -974,7 +1026,9 @@ c_longdouble_type = CFloatType(8)
c_null_ptr_type = CNullPtrType(c_void_type) c_null_ptr_type = CNullPtrType(c_void_type)
c_char_array_type = CCharArrayType(None) c_char_array_type = CCharArrayType(None)
c_utf8_char_array_type = CUTF8CharArrayType(None)
c_char_ptr_type = CCharPtrType() c_char_ptr_type = CCharPtrType()
c_utf8_char_ptr_type = CUTF8CharPtrType()
c_char_ptr_ptr_type = CPtrType(c_char_ptr_type) c_char_ptr_ptr_type = CPtrType(c_char_ptr_type)
c_int_ptr_type = CPtrType(c_int_type) c_int_ptr_type = CPtrType(c_int_type)
......
...@@ -212,7 +212,7 @@ class PyrexScanner(Scanner): ...@@ -212,7 +212,7 @@ class PyrexScanner(Scanner):
resword_dict = build_resword_dict() resword_dict = build_resword_dict()
def __init__(self, file, filename, parent_scanner = None, def __init__(self, file, filename, parent_scanner = None,
type_names = None, context = None): type_names = None, context = None, source_encoding=None):
Scanner.__init__(self, get_lexicon(), file, filename) Scanner.__init__(self, get_lexicon(), file, filename)
if parent_scanner: if parent_scanner:
self.context = parent_scanner.context self.context = parent_scanner.context
...@@ -226,6 +226,7 @@ class PyrexScanner(Scanner): ...@@ -226,6 +226,7 @@ class PyrexScanner(Scanner):
self.compile_time_env = initial_compile_time_env() self.compile_time_env = initial_compile_time_env()
self.compile_time_eval = 1 self.compile_time_eval = 1
self.compile_time_expr = 0 self.compile_time_expr = 0
self.source_encoding = source_encoding
self.trace = trace_scanner self.trace = trace_scanner
self.indentation_stack = [0] self.indentation_stack = [0]
self.indentation_char = None self.indentation_char = None
......
...@@ -3,8 +3,6 @@ ...@@ -3,8 +3,6 @@
# #
import re import re
import bisect
from Errors import warning, error, InternalError from Errors import warning, error, InternalError
import Options import Options
import Naming import Naming
...@@ -438,7 +436,13 @@ class Scope: ...@@ -438,7 +436,13 @@ class Scope:
def add_string_const(self, value): def add_string_const(self, value):
# Add an entry for a string constant. # Add an entry for a string constant.
cname = self.new_const_cname() cname = self.new_const_cname()
entry = Entry("", cname, c_char_array_type, init = value) if value.is_unicode:
c_type = c_utf8_char_array_type
value = value.utf8encode()
else:
c_type = c_char_array_type
value = value.byteencode()
entry = Entry("", cname, c_type, init = value)
entry.used = 1 entry.used = 1
self.const_entries.append(entry) self.const_entries.append(entry)
return entry return entry
...@@ -460,7 +464,7 @@ class Scope: ...@@ -460,7 +464,7 @@ class Scope:
# Python identifier, it will be interned. # Python identifier, it will be interned.
if not entry.pystring_cname: if not entry.pystring_cname:
value = entry.init value = entry.init
if identifier_pattern.match(value) and isinstance(value, str): if not entry.type.is_unicode and identifier_pattern.match(value):
entry.pystring_cname = self.intern(value) entry.pystring_cname = self.intern(value)
entry.is_interned = 1 entry.is_interned = 1
else: else:
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# anywhere else in particular # anywhere else in particular
# #
import os, sys import os, sys, re, codecs
def replace_suffix(path, newsuf): def replace_suffix(path, newsuf):
base, _ = os.path.splitext(path) base, _ = os.path.splitext(path)
...@@ -32,3 +32,25 @@ def castrate_file(path, st): ...@@ -32,3 +32,25 @@ def castrate_file(path, st):
f.close() f.close()
if st: if st:
os.utime(path, (st.st_atime, st.st_mtime)) os.utime(path, (st.st_atime, st.st_mtime))
# support for source file encoding detection and unicode decoding
_match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search
def detect_file_encoding(source_filename):
# PEPs 263 and 3120
f = codecs.open(source_filename, "rU", encoding="UTF-8")
try:
for line_no, line in enumerate(f):
encoding = _match_file_encoding(line)
if encoding:
return encoding.group(1)
if line_no == 1:
break
finally:
f.close()
return "UTF-8"
def open_source_file(source_filename, mode="rU"):
encoding = detect_file_encoding(source_filename)
return codecs.open(source_filename, mode=mode, encoding=encoding)
...@@ -2,20 +2,16 @@ ...@@ -2,20 +2,16 @@
import os, sys, unittest, doctest import os, sys, unittest, doctest
#from Cython.Distutils.build_ext import build_ext from Cython.Distutils.extension import Extension
#from Cython.Distutils.extension import Extension
from distutils.extension import Extension
from Cython.Distutils import build_ext from Cython.Distutils import build_ext
from distutils.dist import Distribution from distutils.dist import Distribution
distutils_distro = Distribution() distutils_distro = Distribution()
TEST_DIRS = ['compile', 'run'] TEST_DIRS = ['compile', 'run']
TEST_RUN_DIRS = ['run'] TEST_RUN_DIRS = ['run']
INCLUDE_DIRS = os.getenv('INCLUDE', '').split(os.pathsep) INCLUDE_DIRS = [ d for d in os.getenv('INCLUDE', '').split(os.pathsep) if d ]
CFLAGS = os.getenv('CFLAGS', '').split() CFLAGS = os.getenv('CFLAGS', '').split()
class TestBuilder(object): class TestBuilder(object):
......
struct CrunchyType {
int number;
PyObject* string;
};
cdef extern from "crunchytype.h":
cdef class crunchytype.Crunchy [ object CrunchyType ]:
cdef int number
cdef object string
from crunchytype cimport Crunchy
cdef class Sub2(Crunchy):
cdef char character
cdef class Sub1(Sub2):
cdef char character
__doc__ = """ __doc__ = """
>>> >>> f()
(30, 22)
""" """
def f(): def f():
cdef int int1, int2, int3 cdef int int1, int2, int3
cdef char *ptr1, *ptr2, *ptr3 cdef char *ptr1, *ptr2 = "test", *ptr3 = "toast"
int2 = 10 int2 = 10
int3 = 20 int3 = 20
obj1 = 1 obj1 = 1
......
__doc__ = """ __doc__ = """
>>> int2 = 42
>>> int3 = 7
>>> char1 = ord('C')
>>> int1 = int2 | int3
>>> int1 |= int2 ^ int3
>>> int1 ^= int2 & int3
>>> int1 ^= int2 << int3
>>> int1 ^= int2 >> int3
>>> int1 ^= int2 << int3 | int2 >> int3
>>> long1 = char1 | int1
>>> print (int1, long1) == f()
True
>>> f() >>> f()
(5376, 67) (45, 111)
""" """
def f(): def f():
...@@ -12,10 +26,10 @@ def f(): ...@@ -12,10 +26,10 @@ def f():
char1 = c'C' char1 = c'C'
int1 = int2 | int3 int1 = int2 | int3
int1 = int2 ^ int3 int1 |= int2 ^ int3
int1 = int2 & int3 int1 ^= int2 & int3
int1 = int2 << int3 int1 ^= int2 << int3
int1 = int2 >> int3 int1 ^= int2 >> int3
int1 = int2 << int3 | int2 >> int3 int1 ^= int2 << int3 | int2 >> int3
long1 = char1 | long2 long1 = char1 | int1
return int1, long1 return int1, long1
__doc__ = """ __doc__ = """
>>> c() >>> c()
120 120
>>> i() >>> i1() == 42
42 True
>>> i2() == 0x42
True
>>> i3() == 042
True
>>> l() >>> l()
666 666
>>> f() >>> f()
...@@ -23,7 +27,9 @@ DEF TUPLE = (1, 2, "buckle my shoe") ...@@ -23,7 +27,9 @@ DEF TUPLE = (1, 2, "buckle my shoe")
DEF TRUE_FALSE = (True, False) DEF TRUE_FALSE = (True, False)
DEF CHAR = c'x' DEF CHAR = c'x'
DEF INT = 42 DEF INT1 = 42
DEF INT2 = 0x42
DEF INT3 = 042
DEF LONG = 666L DEF LONG = 666L
DEF FLOAT = 12.5 DEF FLOAT = 12.5
DEF STR = "spam" DEF STR = "spam"
...@@ -37,9 +43,19 @@ def c(): ...@@ -37,9 +43,19 @@ def c():
c = CHAR c = CHAR
return c return c
def i(): def i1():
cdef int i
i = INT1
return i
def i2():
cdef int i
i = INT2
return i
def i3():
cdef int i cdef int i
i = INT i = INT3
return i return i
def l(): def l():
......
__doc__ = """
>>> D
2
"""
D = 1
include "testinclude.pxi"
__doc__ = """ __doc__ = """
>>> >>> s = Spam(Eggs("ham"))
>>> test(s)
'ham'
""" """
cdef class Eggs: cdef class Eggs:
cdef object ham cdef object ham
def __init__(self, ham):
self.ham = ham
cdef class Spam: cdef class Spam:
cdef Eggs eggs cdef Eggs eggs
def __init__(self, eggs):
self.eggs = eggs
cdef void tomato(Spam s): cdef object tomato(Spam s):
food = s.eggs.ham food = s.eggs.ham
return food
def test(Spam s):
return tomato(s)
__doc__ = """ __doc__ = """
>>> print f.__doc__ >>> f.__doc__
This is a function docstring. 'This is a function docstring.'
>>> print C.__doc__
This is a class docstring. >>> C.__doc__
>>> print T.__doc__ 'This is a class docstring.'
This is an extension type docstring. >>> CS.__doc__
'This is a subclass docstring.'
>>> print CSS.__doc__
None
>>> T.__doc__
'This is an extension type docstring.'
>>> TS.__doc__
'This is an extension subtype docstring.'
>>> TSS.__doc__
Compare with standard Python:
>>> def f():
... 'This is a function docstring.'
>>> f.__doc__
'This is a function docstring.'
>>> class C:
... 'This is a class docstring.'
>>> class CS(C):
... 'This is a subclass docstring.'
>>> class CSS(CS):
... pass
>>> C.__doc__
'This is a class docstring.'
>>> CS.__doc__
'This is a subclass docstring.'
>>> CSS.__doc__
""" """
def f(): def f():
...@@ -13,6 +42,17 @@ def f(): ...@@ -13,6 +42,17 @@ def f():
class C: class C:
"This is a class docstring." "This is a class docstring."
class CS(C):
"This is a subclass docstring."
class CSS(CS):
pass
cdef class T: cdef class T:
"This is an extension type docstring." "This is an extension type docstring."
cdef class TS(T):
"This is an extension subtype docstring."
cdef class TSS(TS):
pass
__doc__ = """
>>> f()
12.5
>>> nan1()
nan
>>> nan2()
nan
>>> nan3()
nan
>>> float_nan
nan
>>> infp1()
inf
>>> infp1() == float('inf')
True
>>> infp2()
inf
>>> infp2() == float('inf')
True
>>> infp3()
inf
>>> infp3() == float('inf')
True
>>> float_infp
inf
>>> float_infp == float('inf')
True
>>> infn1()
-inf
>>> infn1() == float('-inf')
True
>>> infn2()
-inf
>>> infn2() == float('-inf')
True
>>> infn3()
-inf
>>> infn3() == float('-inf')
True
>>> float_infn
-inf
>>> float_infn == float('-inf')
True
"""
DEF FLOAT = 12.5
DEF FLOAT_NAN = float('nan')
DEF FLOAT_INFP = float('+inf')
DEF FLOAT_INFN = float('-inf')
float_nan = FLOAT_NAN
float_infp = FLOAT_INFP
float_infn = FLOAT_INFN
def f():
cdef float f
f = FLOAT
return f
def nan1():
cdef double f
f = FLOAT_NAN
return f
def nan2():
cdef double f
f = float('nan')
return f
def nan3():
cdef float f
f = FLOAT_NAN
return f
def infp1():
cdef double f
f = FLOAT_INFP
return f
def infp2():
cdef double f
f = float('+inf')
return f
def infp3():
cdef float f
f = FLOAT_INFP
return f
def infn1():
cdef double f
f = FLOAT_INFN
return f
def infn2():
cdef double f
f = float('-inf')
return f
def infn3():
cdef float f
f = FLOAT_INFN
return f
...@@ -49,13 +49,17 @@ __doc__ = r""" ...@@ -49,13 +49,17 @@ __doc__ = r"""
True True
>>> d == u'üÖä' >>> d == u'üÖä'
True True
>>> e == u'\x03\x67\xf8\uf8d2Søk ik' >>> e == u'\x03\x67\xf8\uf8d2Søk ik' # unescaped by Cython
True True
>>> f == u'\xf8' >>> e == u'\\x03\\x67\\xf8\\uf8d2Søk ik' # unescaped by Python
True
>>> f == u'\xf8' # unescaped by Cython
True
>>> f == u'\\xf8' # unescaped by Python
True True
>>> add == u'Søk ik' + u'üÖä' + 'abc' >>> add == u'Søk ik' + u'üÖä' + 'abc'
True True
>>> null == u'\\x00' # doctest needs a double slash here >>> null == u'\\x00' # unescaped by Python (required by doctest)
True True
""" """
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment