Commit 305e355c authored by Mark Florisson's avatar Mark Florisson

Merge fixes from fused_cdef

parents e2bd21ab ca393626
...@@ -8,6 +8,7 @@ Cython/Runtime/refnanny.c ...@@ -8,6 +8,7 @@ Cython/Runtime/refnanny.c
BUILD/ BUILD/
build/ build/
!tests/build/
dist/ dist/
.gitrev .gitrev
.coverage .coverage
......
...@@ -11,6 +11,7 @@ Cython/Runtime/refnanny.c ...@@ -11,6 +11,7 @@ Cython/Runtime/refnanny.c
BUILD/ BUILD/
build/ build/
dist/ dist/
.git/
.gitrev .gitrev
.coverage .coverage
*.orig *.orig
......
...@@ -30,7 +30,7 @@ class TestInline(CythonTest): ...@@ -30,7 +30,7 @@ class TestInline(CythonTest):
self.assertEquals(inline(""" self.assertEquals(inline("""
cimport cython cimport cython
return cython.typeof(a), cython.typeof(b) return cython.typeof(a), cython.typeof(b)
""", a=1.0, b=[], **self.test_kwds), ('double', 'list object')) """, a=1.0, b=[], **self.test_kwds), ('double', 'list'))
def test_locals(self): def test_locals(self):
a = 1 a = 1
......
...@@ -85,12 +85,10 @@ def parse_command_line(args): ...@@ -85,12 +85,10 @@ def parse_command_line(args):
options.use_listing_file = 1 options.use_listing_file = 1
elif option in ("-+", "--cplus"): elif option in ("-+", "--cplus"):
options.cplus = 1 options.cplus = 1
elif option.startswith("--embed"): elif option == "--embed":
ix = option.find('=') Options.embed = "main"
if ix == -1: elif option.startswith("--embed="):
Options.embed = "main" Options.embed = options[8:]
else:
Options.embed = option[ix+1:]
elif option.startswith("-I"): elif option.startswith("-I"):
options.include_path.append(get_param(option)) options.include_path.append(get_param(option))
elif option == "--include-dir": elif option == "--include-dir":
......
...@@ -35,6 +35,7 @@ cdef class FunctionState: ...@@ -35,6 +35,7 @@ cdef class FunctionState:
cdef public size_t temp_counter cdef public size_t temp_counter
cdef public object closure_temps cdef public object closure_temps
cdef public bint should_declare_error_indicator
@cython.locals(n=size_t) @cython.locals(n=size_t)
cpdef new_label(self, name=*) cpdef new_label(self, name=*)
......
...@@ -134,6 +134,12 @@ class FunctionState(object): ...@@ -134,6 +134,12 @@ class FunctionState(object):
self.temp_counter = 0 self.temp_counter = 0
self.closure_temps = None self.closure_temps = None
# This is used for the error indicator, which needs to be local to the
# function. It used to be global, which relies on the GIL being held.
# However, exceptions may need to be propagated through 'nogil'
# sections, in which case we introduce a race condition.
self.should_declare_error_indicator = False
# labels # labels
def new_label(self, name=None): def new_label(self, name=None):
...@@ -1166,39 +1172,19 @@ class CCodeWriter(object): ...@@ -1166,39 +1172,19 @@ class CCodeWriter(object):
self.funcstate.use_label(lbl) self.funcstate.use_label(lbl)
self.putln("goto %s;" % lbl) self.putln("goto %s;" % lbl)
def put_var_declarations(self, entries, static = 0, dll_linkage = None, def put_var_declaration(self, entry, storage_class="",
definition = True): dll_linkage = None, definition = True):
for entry in entries:
if not entry.in_cinclude:
self.put_var_declaration(entry, static, dll_linkage, definition)
def put_var_declaration(self, entry, static = 0, dll_linkage = None,
definition = True):
#print "Code.put_var_declaration:", entry.name, "definition =", definition ### #print "Code.put_var_declaration:", entry.name, "definition =", definition ###
if entry.in_closure: if entry.visibility == 'private' and not (definition or entry.defined_in_pxd):
#print "...private and not definition, skipping", entry.cname ###
return return
visibility = entry.visibility if entry.visibility == "private" and not entry.used:
if visibility == 'private' and not definition: #print "...private and not used, skipping", entry.cname ###
#print "...private and not definition, skipping" ###
return return
if not entry.used and visibility == "private":
#print "not used and private, skipping", entry.cname ###
return
storage_class = ""
if visibility == 'extern':
storage_class = Naming.extern_c_macro
elif visibility == 'public':
if not definition:
storage_class = Naming.extern_c_macro
elif visibility == 'private':
if static:
storage_class = "static"
if storage_class: if storage_class:
self.put("%s " % storage_class) self.put("%s " % storage_class)
if visibility != 'public': self.put(entry.type.declaration_code(
dll_linkage = None entry.cname, dll_linkage = dll_linkage))
self.put(entry.type.declaration_code(entry.cname,
dll_linkage = dll_linkage))
if entry.init is not None: if entry.init is not None:
self.put_safe(" = %s" % entry.type.literal_code(entry.init)) self.put_safe(" = %s" % entry.type.literal_code(entry.init))
self.putln(";") self.putln(";")
...@@ -1382,6 +1368,52 @@ class CCodeWriter(object): ...@@ -1382,6 +1368,52 @@ class CCodeWriter(object):
doc_code, doc_code,
term)) term))
# GIL methods
def put_ensure_gil(self, declare_gilstate=True):
"""
Acquire the GIL. The generated code is safe even when no PyThreadState
has been allocated for this thread (for threads not initialized by
using the Python API). Additionally, the code generated by this method
may be called recursively.
"""
from Cython.Compiler import Nodes
self.globalstate.use_utility_code(Nodes.force_init_threads_utility_code)
self.putln("#ifdef WITH_THREAD")
if declare_gilstate:
self.put("PyGILState_STATE ")
self.putln("_save = PyGILState_Ensure();")
self.putln("#endif")
def put_release_ensured_gil(self):
"""
Releases the GIL, corresponds to `put_ensure_gil`.
"""
self.putln("#ifdef WITH_THREAD")
self.putln("PyGILState_Release(_save);")
self.putln("#endif")
def put_acquire_gil(self):
"""
Acquire the GIL. The thread's thread state must have been initialized
by a previous `put_release_gil`
"""
self.putln("Py_BLOCK_THREADS")
def put_release_gil(self):
"Release the GIL, corresponds to `put_acquire_gil`."
self.putln("#ifdef WITH_THREAD")
self.putln("PyThreadState *_save = NULL;")
self.putln("#endif")
self.putln("Py_UNBLOCK_THREADS")
def declare_gilstate(self):
self.putln("#ifdef WITH_THREAD")
self.putln("PyGILState_STATE _save;")
self.putln("#endif")
# error handling # error handling
def put_error_if_neg(self, pos, value): def put_error_if_neg(self, pos, value):
...@@ -1389,10 +1421,12 @@ class CCodeWriter(object): ...@@ -1389,10 +1421,12 @@ class CCodeWriter(object):
return self.putln("if (%s < 0) %s" % (value, self.error_goto(pos))) return self.putln("if (%s < 0) %s" % (value, self.error_goto(pos)))
def set_error_info(self, pos): def set_error_info(self, pos):
self.funcstate.should_declare_error_indicator = True
if self.c_line_in_traceback: if self.c_line_in_traceback:
cinfo = " %s = %s;" % (Naming.clineno_cname, Naming.line_c_macro) cinfo = " %s = %s;" % (Naming.clineno_cname, Naming.line_c_macro)
else: else:
cinfo = "" cinfo = ""
return "%s = %s[%s]; %s = %s;%s" % ( return "%s = %s[%s]; %s = %s;%s" % (
Naming.filename_cname, Naming.filename_cname,
Naming.filetable_cname, Naming.filetable_cname,
...@@ -1432,6 +1466,20 @@ class CCodeWriter(object): ...@@ -1432,6 +1466,20 @@ class CCodeWriter(object):
def put_finish_refcount_context(self): def put_finish_refcount_context(self):
self.putln("__Pyx_RefNannyFinishContext();") self.putln("__Pyx_RefNannyFinishContext();")
def put_add_traceback(self, qualified_name):
"""
Build a Python traceback for propagating exceptions.
qualified_name should be the qualified name of the function
"""
format_tuple = (
qualified_name,
Naming.clineno_cname,
Naming.lineno_cname,
Naming.filename_cname,
)
self.putln('__Pyx_AddTraceback("%s", %s, %s, %s);' % format_tuple)
def put_trace_declarations(self): def put_trace_declarations(self):
self.putln('__Pyx_TraceDeclarations'); self.putln('__Pyx_TraceDeclarations');
...@@ -1444,6 +1492,10 @@ class CCodeWriter(object): ...@@ -1444,6 +1492,10 @@ class CCodeWriter(object):
def put_trace_return(self, retvalue_cname): def put_trace_return(self, retvalue_cname):
self.putln("__Pyx_TraceReturn(%s);" % retvalue_cname) self.putln("__Pyx_TraceReturn(%s);" % retvalue_cname)
def putln_openmp(self, string):
self.putln("#ifdef _OPENMP")
self.putln(string)
self.putln("#endif /* _OPENMP */")
class PyrexCodeWriter(object): class PyrexCodeWriter(object):
# f file output file # f file output file
......
This diff is collapsed.
...@@ -106,9 +106,10 @@ class Context(object): ...@@ -106,9 +106,10 @@ class Context(object):
from ParseTreeTransforms import AnalyseDeclarationsTransform, AnalyseExpressionsTransform from ParseTreeTransforms import AnalyseDeclarationsTransform, AnalyseExpressionsTransform
from ParseTreeTransforms import CreateClosureClasses, MarkClosureVisitor, DecoratorTransform from ParseTreeTransforms import CreateClosureClasses, MarkClosureVisitor, DecoratorTransform
from ParseTreeTransforms import InterpretCompilerDirectives, TransformBuiltinMethods from ParseTreeTransforms import InterpretCompilerDirectives, TransformBuiltinMethods
from ParseTreeTransforms import ExpandInplaceOperators from ParseTreeTransforms import ExpandInplaceOperators, ParallelRangeTransform
from TypeInference import MarkAssignments, MarkOverflowingArithmetic from TypeInference import MarkAssignments, MarkOverflowingArithmetic
from ParseTreeTransforms import AlignFunctionDefinitions, GilCheck from ParseTreeTransforms import AdjustDefByDirectives, AlignFunctionDefinitions
from ParseTreeTransforms import RemoveUnreachableCode, GilCheck
from AnalysedTreeTransforms import AutoTestDictTransform from AnalysedTreeTransforms import AutoTestDictTransform
from AutoDocTransforms import EmbedSignature from AutoDocTransforms import EmbedSignature
from Optimize import FlattenInListTransform, SwitchTransform, IterationTransform from Optimize import FlattenInListTransform, SwitchTransform, IterationTransform
...@@ -135,8 +136,11 @@ class Context(object): ...@@ -135,8 +136,11 @@ class Context(object):
PostParse(self), PostParse(self),
_specific_post_parse, _specific_post_parse,
InterpretCompilerDirectives(self, self.compiler_directives), InterpretCompilerDirectives(self, self.compiler_directives),
ParallelRangeTransform(self),
AdjustDefByDirectives(self),
MarkClosureVisitor(self), MarkClosureVisitor(self),
_align_function_definitions, _align_function_definitions,
RemoveUnreachableCode(self),
ConstantFolding(), ConstantFolding(),
FlattenInListTransform(), FlattenInListTransform(),
WithTransform(self), WithTransform(self),
......
This diff is collapsed.
...@@ -36,6 +36,7 @@ prop_set_prefix = pyrex_prefix + "setprop_" ...@@ -36,6 +36,7 @@ prop_set_prefix = pyrex_prefix + "setprop_"
type_prefix = pyrex_prefix + "t_" type_prefix = pyrex_prefix + "t_"
typeobj_prefix = pyrex_prefix + "type_" typeobj_prefix = pyrex_prefix + "type_"
var_prefix = pyrex_prefix + "v_" var_prefix = pyrex_prefix + "v_"
varptr_prefix = pyrex_prefix + "vp_"
wrapperbase_prefix= pyrex_prefix + "wrapperbase_" wrapperbase_prefix= pyrex_prefix + "wrapperbase_"
bufstruct_prefix = pyrex_prefix + "bstruct_" bufstruct_prefix = pyrex_prefix + "bstruct_"
bufstride_prefix = pyrex_prefix + "bstride_" bufstride_prefix = pyrex_prefix + "bstride_"
......
This diff is collapsed.
This diff is collapsed.
...@@ -94,6 +94,10 @@ directive_defaults = { ...@@ -94,6 +94,10 @@ directive_defaults = {
'warn': None, 'warn': None,
'warn.undeclared': False, 'warn.undeclared': False,
'warn.unreachable': True,
# remove unreachable code
'remove_unreachable': True,
# test support # test support
'test_assert_path_exists' : [], 'test_assert_path_exists' : [],
...@@ -108,6 +112,9 @@ directive_types = { ...@@ -108,6 +112,9 @@ directive_types = {
'final' : bool, # final cdef classes and methods 'final' : bool, # final cdef classes and methods
'internal' : bool, # cdef class visibility in the module dict 'internal' : bool, # cdef class visibility in the module dict
'infer_types' : bool, # values can be True/None/False 'infer_types' : bool, # values can be True/None/False
'cfunc' : None, # decorators do not take directive value
'ccall' : None,
'cclass' : None,
} }
for key, val in directive_defaults.items(): for key, val in directive_defaults.items():
......
This diff is collapsed.
...@@ -1580,7 +1580,7 @@ def p_with_statement(s): ...@@ -1580,7 +1580,7 @@ def p_with_statement(s):
def p_with_items(s): def p_with_items(s):
pos = s.position() pos = s.position()
if not s.in_python_file and s.sy == 'IDENT' and s.systring == 'nogil': if not s.in_python_file and s.sy == 'IDENT' and s.systring in ('nogil', 'gil'):
state = s.systring state = s.systring
s.next() s.next()
if s.sy == ',': if s.sy == ',':
...@@ -1750,7 +1750,7 @@ def p_statement(s, ctx, first_statement = 0): ...@@ -1750,7 +1750,7 @@ def p_statement(s, ctx, first_statement = 0):
elif s.sy == 'IF': elif s.sy == 'IF':
return p_IF_statement(s, ctx) return p_IF_statement(s, ctx)
elif s.sy == 'DECORATOR': elif s.sy == 'DECORATOR':
if ctx.level not in ('module', 'class', 'c_class', 'function', 'property', 'module_pxd', 'c_class_pxd'): if ctx.level not in ('module', 'class', 'c_class', 'function', 'property', 'module_pxd', 'c_class_pxd', 'other'):
s.error('decorator not allowed here') s.error('decorator not allowed here')
s.level = ctx.level s.level = ctx.level
decorators = p_decorators(s) decorators = p_decorators(s)
......
...@@ -2163,7 +2163,10 @@ def get_all_specific_permutations(fused_types, id="", f2s=()): ...@@ -2163,7 +2163,10 @@ def get_all_specific_permutations(fused_types, id="", f2s=()):
result = [] result = []
for newid, specific_type in enumerate(fused_type.types): for newid, specific_type in enumerate(fused_type.types):
f2s = dict(f2s, **{ fused_type: specific_type }) # f2s = dict(f2s, **{ fused_type: specific_type })
f2s = dict(f2s)
f2s.update({ fused_type: specific_type })
if id: if id:
cname = '%s_%s' % (id, newid) cname = '%s_%s' % (id, newid)
else: else:
......
This diff is collapsed.
...@@ -4,6 +4,7 @@ from Cython.Compiler import CmdLine ...@@ -4,6 +4,7 @@ from Cython.Compiler import CmdLine
from Cython.TestUtils import TransformTest from Cython.TestUtils import TransformTest
from Cython.Compiler.ParseTreeTransforms import * from Cython.Compiler.ParseTreeTransforms import *
from Cython.Compiler.Nodes import * from Cython.Compiler.Nodes import *
from Cython.Compiler import Main
class TestNormalizeTree(TransformTest): class TestNormalizeTree(TransformTest):
...@@ -144,6 +145,62 @@ class TestWithTransform(object): # (TransformTest): # Disabled! ...@@ -144,6 +145,62 @@ class TestWithTransform(object): # (TransformTest): # Disabled!
""", t) """, t)
class TestInterpretCompilerDirectives(TransformTest):
"""
This class tests the parallel directives AST-rewriting and importing.
"""
# Test the parallel directives (c)importing
import_code = u"""
cimport cython.parallel
cimport cython.parallel as par
from cython cimport parallel as par2
from cython cimport parallel
from cython.parallel cimport threadid as tid
from cython.parallel cimport threadavailable as tavail
from cython.parallel cimport prange
"""
expected_directives_dict = {
u'cython.parallel': u'cython.parallel',
u'par': u'cython.parallel',
u'par2': u'cython.parallel',
u'parallel': u'cython.parallel',
u"tid": u"cython.parallel.threadid",
u"tavail": u"cython.parallel.threadavailable",
u"prange": u"cython.parallel.prange",
}
def setUp(self):
super(TestInterpretCompilerDirectives, self).setUp()
compilation_options = Main.CompilationOptions(Main.default_options)
ctx = compilation_options.create_context()
self.pipeline = [
InterpretCompilerDirectives(ctx, ctx.compiler_directives),
]
self.debug_exception_on_error = DebugFlags.debug_exception_on_error
def tearDown(self):
DebugFlags.debug_exception_on_error = self.debug_exception_on_error
def test_parallel_directives_cimports(self):
self.run_pipeline(self.pipeline, self.import_code)
parallel_directives = self.pipeline[0].parallel_directives
self.assertEqual(parallel_directives, self.expected_directives_dict)
def test_parallel_directives_imports(self):
self.run_pipeline(self.pipeline,
self.import_code.replace(u'cimport', u'import'))
parallel_directives = self.pipeline[0].parallel_directives
self.assertEqual(parallel_directives, self.expected_directives_dict)
# TODO: Re-enable once they're more robust. # TODO: Re-enable once they're more robust.
if sys.version_info[:2] >= (2, 5) and False: if sys.version_info[:2] >= (2, 5) and False:
from Cython.Debugger import DebugWriter from Cython.Debugger import DebugWriter
......
...@@ -23,12 +23,24 @@ object_expr = TypedExprNode(py_object_type) ...@@ -23,12 +23,24 @@ object_expr = TypedExprNode(py_object_type)
class MarkAssignments(CythonTransform): class MarkAssignments(CythonTransform):
def mark_assignment(self, lhs, rhs): def __init__(self, context):
super(CythonTransform, self).__init__()
self.context = context
# Track the parallel block scopes (with parallel, for i in prange())
self.parallel_block_stack = []
def mark_assignment(self, lhs, rhs, inplace_op=None):
if isinstance(lhs, (ExprNodes.NameNode, Nodes.PyArgDeclNode)): if isinstance(lhs, (ExprNodes.NameNode, Nodes.PyArgDeclNode)):
if lhs.entry is None: if lhs.entry is None:
# TODO: This shouldn't happen... # TODO: This shouldn't happen...
return return
lhs.entry.assignments.append(rhs) lhs.entry.assignments.append(rhs)
if self.parallel_block_stack:
parallel_node = self.parallel_block_stack[-1]
parallel_node.assignments[lhs.entry] = (lhs.pos, inplace_op)
elif isinstance(lhs, ExprNodes.SequenceNode): elif isinstance(lhs, ExprNodes.SequenceNode):
for arg in lhs.args: for arg in lhs.args:
self.mark_assignment(arg, object_expr) self.mark_assignment(arg, object_expr)
...@@ -48,7 +60,7 @@ class MarkAssignments(CythonTransform): ...@@ -48,7 +60,7 @@ class MarkAssignments(CythonTransform):
return node return node
def visit_InPlaceAssignmentNode(self, node): def visit_InPlaceAssignmentNode(self, node):
self.mark_assignment(node.lhs, node.create_binop_node()) self.mark_assignment(node.lhs, node.create_binop_node(), node.operator)
self.visitchildren(node) self.visitchildren(node)
return node return node
...@@ -56,6 +68,11 @@ class MarkAssignments(CythonTransform): ...@@ -56,6 +68,11 @@ class MarkAssignments(CythonTransform):
# TODO: Remove redundancy with range optimization... # TODO: Remove redundancy with range optimization...
is_special = False is_special = False
sequence = node.iterator.sequence sequence = node.iterator.sequence
if isinstance(sequence, ExprNodes.SimpleCallNode):
function = sequence.function
if sequence.self is None and function.is_name:
if function.name == 'reversed' and len(sequence.args) == 1:
sequence = sequence.args[0]
if isinstance(sequence, ExprNodes.SimpleCallNode): if isinstance(sequence, ExprNodes.SimpleCallNode):
function = sequence.function function = sequence.function
if sequence.self is None and function.is_name: if sequence.self is None and function.is_name:
...@@ -70,6 +87,7 @@ class MarkAssignments(CythonTransform): ...@@ -70,6 +87,7 @@ class MarkAssignments(CythonTransform):
'+', '+',
sequence.args[0], sequence.args[0],
sequence.args[2])) sequence.args[2]))
if not is_special: if not is_special:
# A for-loop basically translates to subsequent calls to # A for-loop basically translates to subsequent calls to
# __getitem__(), so using an IndexNode here allows us to # __getitem__(), so using an IndexNode here allows us to
...@@ -127,6 +145,27 @@ class MarkAssignments(CythonTransform): ...@@ -127,6 +145,27 @@ class MarkAssignments(CythonTransform):
self.visitchildren(node) self.visitchildren(node)
return node return node
def visit_ParallelStatNode(self, node):
if self.parallel_block_stack:
node.parent = self.parallel_block_stack[-1]
else:
node.parent = None
if node.is_prange:
if not node.parent:
node.is_parallel = True
else:
node.is_parallel = (node.parent.is_prange or not
node.parent.is_parallel)
else:
node.is_parallel = True
self.parallel_block_stack.append(node)
self.visitchildren(node)
self.parallel_block_stack.pop()
return node
class MarkOverflowingArithmetic(CythonTransform): class MarkOverflowingArithmetic(CythonTransform):
# It may be possible to integrate this with the above for # It may be possible to integrate this with the above for
......
...@@ -14,9 +14,12 @@ class TempHandle(object): ...@@ -14,9 +14,12 @@ class TempHandle(object):
# THIS IS DEPRECATED, USE LetRefNode instead # THIS IS DEPRECATED, USE LetRefNode instead
temp = None temp = None
needs_xdecref = False needs_xdecref = False
def __init__(self, type): def __init__(self, type, needs_cleanup=None):
self.type = type self.type = type
self.needs_cleanup = type.is_pyobject if needs_cleanup is None:
self.needs_cleanup = type.is_pyobject
else:
self.needs_cleanup = needs_cleanup
def ref(self, pos): def ref(self, pos):
return TempRefNode(pos, handle=self, type=self.type) return TempRefNode(pos, handle=self, type=self.type)
......
...@@ -64,7 +64,7 @@ class TreeVisitor(object): ...@@ -64,7 +64,7 @@ class TreeVisitor(object):
u'gil_message', u'cpp_message', u'gil_message', u'cpp_message',
u'subexprs'] u'subexprs']
values = [] values = []
pos = node.pos pos = getattr(node, 'pos', None)
if pos: if pos:
source = pos[0] source = pos[0]
if source: if source:
...@@ -131,7 +131,7 @@ class TreeVisitor(object): ...@@ -131,7 +131,7 @@ class TreeVisitor(object):
trace.append(u"File '%s', line %d, in %s: %s" % ( trace.append(u"File '%s', line %d, in %s: %s" % (
pos[0], pos[1], method_name, self.dump_node(node))) pos[0], pos[1], method_name, self.dump_node(node)))
raise Errors.CompilerCrash( raise Errors.CompilerCrash(
last_node.pos, self.__class__.__name__, getattr(last_node, 'pos', None), self.__class__.__name__,
u'\n'.join(trace), e, stacktrace) u'\n'.join(trace), e, stacktrace)
def find_handler(self, obj): def find_handler(self, obj):
......
...@@ -19,10 +19,6 @@ from distutils.dir_util import mkpath ...@@ -19,10 +19,6 @@ from distutils.dir_util import mkpath
from distutils.command import build_ext as _build_ext from distutils.command import build_ext as _build_ext
from distutils import sysconfig from distutils import sysconfig
if sys.version_info < (3, 0):
from Cython.Utils import any
extension_name_re = _build_ext.extension_name_re extension_name_re = _build_ext.extension_name_re
show_compilers = _build_ext.show_compilers show_compilers = _build_ext.show_compilers
...@@ -122,8 +118,8 @@ class build_ext(_build_ext.build_ext): ...@@ -122,8 +118,8 @@ class build_ext(_build_ext.build_ext):
# If --pyrex-gdb is in effect as a command line option or as option # If --pyrex-gdb is in effect as a command line option or as option
# of any Extension module, disable optimization for the C or C++ # of any Extension module, disable optimization for the C or C++
# compiler. # compiler.
if (self.pyrex_gdb or any([getattr(ext, 'pyrex_gdb', False) if self.pyrex_gdb or [1 for ext in self.extensions
for ext in self.extensions])): if getattr(ext, 'pyrex_gdb', False)]:
optimization.disable_optimization() optimization.disable_optimization()
_build_ext.build_ext.run(self) _build_ext.build_ext.run(self)
......
...@@ -146,6 +146,7 @@ from cpython.method cimport * ...@@ -146,6 +146,7 @@ from cpython.method cimport *
from cpython.weakref cimport * from cpython.weakref cimport *
from cpython.getargs cimport * from cpython.getargs cimport *
from cpython.pythread cimport * from cpython.pythread cimport *
from cpython.pystate cimport *
# Python <= 2.x # Python <= 2.x
from cpython.cobject cimport * from cpython.cobject cimport *
......
# Thread and interpreter state structures and their interfaces
from cpython.ref cimport PyObject
cdef extern from "Python.h":
# We make these an opague types. If the user wants specific attributes,
# they can be declared manually.
ctypedef struct PyInterpreterState:
pass
ctypedef struct PyThreadState:
pass
ctypedef struct PyFrameObject:
pass
# This is not actually a struct, but make sure it can never be coerced to
# an int or used in arithmetic expressions
ctypedef struct PyGILState_STATE
# The type of the trace function registered using PyEval_SetProfile() and
# PyEval_SetTrace().
# Py_tracefunc return -1 when raising an exception, or 0 for success.
ctypedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *)
# The following values are used for 'what' for tracefunc functions
enum:
PyTrace_CALL
PyTrace_EXCEPTION
PyTrace_LINE
PyTrace_RETURN
PyTrace_C_CALL
PyTrace_C_EXCEPTION
PyTrace_C_RETURN
PyInterpreterState * PyInterpreterState_New()
void PyInterpreterState_Clear(PyInterpreterState *)
void PyInterpreterState_Delete(PyInterpreterState *)
PyThreadState * PyThreadState_New(PyInterpreterState *)
void PyThreadState_Clear(PyThreadState *)
void PyThreadState_Delete(PyThreadState *)
PyThreadState * PyThreadState_Get()
PyThreadState * PyThreadState_Swap(PyThreadState *)
PyObject * PyThreadState_GetDict()
int PyThreadState_SetAsyncExc(long, PyObject *)
# Ensure that the current thread is ready to call the Python
# C API, regardless of the current state of Python, or of its
# thread lock. This may be called as many times as desired
# by a thread so long as each call is matched with a call to
# PyGILState_Release(). In general, other thread-state APIs may
# be used between _Ensure() and _Release() calls, so long as the
# thread-state is restored to its previous state before the Release().
# For example, normal use of the Py_BEGIN_ALLOW_THREADS/
# Py_END_ALLOW_THREADS macros are acceptable.
# The return value is an opaque "handle" to the thread state when
# PyGILState_Ensure() was called, and must be passed to
# PyGILState_Release() to ensure Python is left in the same state. Even
# though recursive calls are allowed, these handles can *not* be shared -
# each unique call to PyGILState_Ensure must save the handle for its
# call to PyGILState_Release.
# When the function returns, the current thread will hold the GIL.
# Failure is a fatal error.
PyGILState_STATE PyGILState_Ensure()
# Release any resources previously acquired. After this call, Python's
# state will be the same as it was prior to the corresponding
# PyGILState_Ensure() call (but generally this state will be unknown to
# the caller, hence the use of the GILState API.)
# Every call to PyGILState_Ensure must be matched by a call to
# PyGILState_Release on the same thread.
void PyGILState_Release(PyGILState_STATE)
# Routines for advanced debuggers, requested by David Beazley.
# Don't use unless you know what you are doing!
PyInterpreterState * PyInterpreterState_Head()
PyInterpreterState * PyInterpreterState_Next(PyInterpreterState *)
PyThreadState * PyInterpreterState_ThreadHead(PyInterpreterState *)
PyThreadState * PyThreadState_Next(PyThreadState *)
...@@ -81,17 +81,17 @@ cdef extern from "numpy/arrayobject.h": ...@@ -81,17 +81,17 @@ cdef extern from "numpy/arrayobject.h":
NPY_COMPLEX256 NPY_COMPLEX256
NPY_COMPLEX512 NPY_COMPLEX512
enum NPY_ORDER: ctypedef enum NPY_ORDER:
NPY_ANYORDER NPY_ANYORDER
NPY_CORDER NPY_CORDER
NPY_FORTRANORDER NPY_FORTRANORDER
enum NPY_CLIPMODE: ctypedef enum NPY_CLIPMODE:
NPY_CLIP NPY_CLIP
NPY_WRAP NPY_WRAP
NPY_RAISE NPY_RAISE
enum NPY_SCALARKIND: ctypedef enum NPY_SCALARKIND:
NPY_NOSCALAR, NPY_NOSCALAR,
NPY_BOOL_SCALAR, NPY_BOOL_SCALAR,
NPY_INTPOS_SCALAR, NPY_INTPOS_SCALAR,
...@@ -101,12 +101,12 @@ cdef extern from "numpy/arrayobject.h": ...@@ -101,12 +101,12 @@ cdef extern from "numpy/arrayobject.h":
NPY_OBJECT_SCALAR NPY_OBJECT_SCALAR
enum NPY_SORTKIND: ctypedef enum NPY_SORTKIND:
NPY_QUICKSORT NPY_QUICKSORT
NPY_HEAPSORT NPY_HEAPSORT
NPY_MERGESORT NPY_MERGESORT
cdef enum requirements: enum:
NPY_C_CONTIGUOUS NPY_C_CONTIGUOUS
NPY_F_CONTIGUOUS NPY_F_CONTIGUOUS
NPY_CONTIGUOUS NPY_CONTIGUOUS
......
cdef extern from "omp.h":
ctypedef struct omp_lock_t
ctypedef struct omp_nest_lock_t
ctypedef enum omp_sched_t:
omp_sched_static = 1,
omp_sched_dynamic = 2,
omp_sched_guided = 3,
omp_sched_auto = 4
extern void omp_set_num_threads(int)
extern int omp_get_num_threads()
extern int omp_get_max_threads()
extern int omp_get_thread_num()
extern int omp_get_num_procs()
extern int omp_in_parallel()
extern void omp_set_dynamic(int)
extern int omp_get_dynamic()
extern void omp_set_nested(int)
extern int omp_get_nested()
extern void omp_init_lock(omp_lock_t *)
extern void omp_destroy_lock(omp_lock_t *)
extern void omp_set_lock(omp_lock_t *)
extern void omp_unset_lock(omp_lock_t *)
extern int omp_test_lock(omp_lock_t *)
extern void omp_init_nest_lock(omp_nest_lock_t *)
extern void omp_destroy_nest_lock(omp_nest_lock_t *)
extern void omp_set_nest_lock(omp_nest_lock_t *)
extern void omp_unset_nest_lock(omp_nest_lock_t *)
extern int omp_test_nest_lock(omp_nest_lock_t *)
extern double omp_get_wtime()
extern double omp_get_wtick()
void omp_set_schedule(omp_sched_t, int)
void omp_get_schedule(omp_sched_t *, int *)
int omp_get_thread_limit()
void omp_set_max_active_levels(int)
int omp_get_max_active_levels()
int omp_get_level()
int omp_get_ancestor_thread_num(int)
int omp_get_team_size(int)
int omp_get_active_level()
from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF, Py_XDECREF from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF, Py_XDECREF
from cpython.exc cimport PyErr_Fetch, PyErr_Restore from cpython.exc cimport PyErr_Fetch, PyErr_Restore
from cpython.pystate cimport PyThreadState_Get
loglevel = 0 loglevel = 0
...@@ -80,6 +81,7 @@ cdef PyObject* SetupContext(char* funcname, int lineno, char* filename) except N ...@@ -80,6 +81,7 @@ cdef PyObject* SetupContext(char* funcname, int lineno, char* filename) except N
return NULL return NULL
cdef PyObject* type = NULL, *value = NULL, *tb = NULL cdef PyObject* type = NULL, *value = NULL, *tb = NULL
cdef PyObject* result = NULL cdef PyObject* result = NULL
PyThreadState_Get()
PyErr_Fetch(&type, &value, &tb) PyErr_Fetch(&type, &value, &tb)
try: try:
ctx = Context(funcname, lineno, filename) ctx = Context(funcname, lineno, filename)
...@@ -131,16 +133,19 @@ cdef void GIVEREF(PyObject* ctx, PyObject* p_obj, int lineno): ...@@ -131,16 +133,19 @@ cdef void GIVEREF(PyObject* ctx, PyObject* p_obj, int lineno):
cdef void INCREF(PyObject* ctx, PyObject* obj, int lineno): cdef void INCREF(PyObject* ctx, PyObject* obj, int lineno):
if obj is not NULL: Py_INCREF(<object>obj) if obj is not NULL: Py_INCREF(<object>obj)
PyThreadState_Get()
GOTREF(ctx, obj, lineno) GOTREF(ctx, obj, lineno)
cdef void DECREF(PyObject* ctx, PyObject* obj, int lineno): cdef void DECREF(PyObject* ctx, PyObject* obj, int lineno):
if GIVEREF_and_report(ctx, obj, lineno): if GIVEREF_and_report(ctx, obj, lineno):
if obj is not NULL: Py_DECREF(<object>obj) if obj is not NULL: Py_DECREF(<object>obj)
PyThreadState_Get()
cdef void FinishContext(PyObject** ctx): cdef void FinishContext(PyObject** ctx):
if ctx == NULL or ctx[0] == NULL: return if ctx == NULL or ctx[0] == NULL: return
cdef PyObject* type = NULL, *value = NULL, *tb = NULL cdef PyObject* type = NULL, *value = NULL, *tb = NULL
cdef object errors = None cdef object errors = None
PyThreadState_Get()
PyErr_Fetch(&type, &value, &tb) PyErr_Fetch(&type, &value, &tb)
try: try:
try: try:
......
...@@ -26,6 +26,8 @@ class _EmptyDecoratorAndManager(object): ...@@ -26,6 +26,8 @@ class _EmptyDecoratorAndManager(object):
def __exit__(self, exc_type, exc_value, traceback): def __exit__(self, exc_type, exc_value, traceback):
pass pass
cclass = ccall = cfunc = _EmptyDecoratorAndManager()
def inline(f, *args, **kwds): def inline(f, *args, **kwds):
if isinstance(f, basestring): if isinstance(f, basestring):
from Cython.Build.Inline import cython_inline from Cython.Build.Inline import cython_inline
...@@ -88,6 +90,7 @@ class _nogil(object): ...@@ -88,6 +90,7 @@ class _nogil(object):
return exc_class is None return exc_class is None
nogil = _nogil() nogil = _nogil()
gil = _nogil()
del _nogil del _nogil
# Emulated types # Emulated types
...@@ -326,3 +329,28 @@ NULL = p_void(0) ...@@ -326,3 +329,28 @@ NULL = p_void(0)
integral = floating = numeric = _FusedType() integral = floating = numeric = _FusedType()
type_ordering = [py_int, py_long, py_float, py_complex] type_ordering = [py_int, py_long, py_float, py_complex]
class CythonDotParallel(object):
"""
The cython.parallel module.
"""
__all__ = ['parallel', 'prange', 'threadid']
parallel = nogil
def prange(self, start=0, stop=None, step=1, schedule=None, nogil=False):
if stop is None:
stop = start
start = 0
return range(start, stop, step)
def threadid(self):
return 0
# def threadsavailable(self):
# return 1
import sys
sys.modules['cython.parallel'] = CythonDotParallel()
del sys
cimport cython
@cython.locals(x=Py_ssize_t)
cdef combinations(list l)
@cython.locals(x1=double, x2=double, y1=double, y2=double, z1=double, z2=double,
m1=double, m2=double, vx=double, vy=double, vz=double, i=long)
cdef advance(double dt, long n, list bodies=*, list pairs=*)
@cython.locals(x1=double, x2=double, y1=double, y2=double, z1=double, z2=double,
m=double, m1=double, m2=double, vx=double, vy=double, vz=double)
cdef report_energy(list bodies=*, list pairs=*, double e=*)
@cython.locals(vx=double, vy=double, vz=double, m=double)
cdef offset_momentum(tuple ref, list bodies=*, double px=*, double py=*, double pz=*)
cpdef test_nbody(long iterations)
#!/usr/bin/env python
"""N-body benchmark from the Computer Language Benchmarks Game.
This is intended to support Unladen Swallow's perf.py. Accordingly, it has been
modified from the Shootout version:
- Accept standard Unladen Swallow benchmark options.
- Run report_energy()/advance() in a loop.
- Reimplement itertools.combinations() to work with older Python versions.
"""
# Pulled from http://shootout.alioth.debian.org/u64q/benchmark.php?test=nbody&lang=python&id=4
# Contributed by Kevin Carson.
# Modified by Tupteq, Fredrik Johansson, and Daniel Nanz.
__contact__ = "collinwinter@google.com (Collin Winter)"
# Python imports
import optparse
import sys
from time import time
# Local imports
import util
def combinations(l):
"""Pure-Python implementation of itertools.combinations(l, 2)."""
result = []
for x in range(len(l) - 1):
ls = l[x+1:]
for y in ls:
result.append((l[x],y))
return result
PI = 3.14159265358979323
SOLAR_MASS = 4 * PI * PI
DAYS_PER_YEAR = 365.24
BODIES = {
'sun': ([0.0, 0.0, 0.0], [0.0, 0.0, 0.0], SOLAR_MASS),
'jupiter': ([4.84143144246472090e+00,
-1.16032004402742839e+00,
-1.03622044471123109e-01],
[1.66007664274403694e-03 * DAYS_PER_YEAR,
7.69901118419740425e-03 * DAYS_PER_YEAR,
-6.90460016972063023e-05 * DAYS_PER_YEAR],
9.54791938424326609e-04 * SOLAR_MASS),
'saturn': ([8.34336671824457987e+00,
4.12479856412430479e+00,
-4.03523417114321381e-01],
[-2.76742510726862411e-03 * DAYS_PER_YEAR,
4.99852801234917238e-03 * DAYS_PER_YEAR,
2.30417297573763929e-05 * DAYS_PER_YEAR],
2.85885980666130812e-04 * SOLAR_MASS),
'uranus': ([1.28943695621391310e+01,
-1.51111514016986312e+01,
-2.23307578892655734e-01],
[2.96460137564761618e-03 * DAYS_PER_YEAR,
2.37847173959480950e-03 * DAYS_PER_YEAR,
-2.96589568540237556e-05 * DAYS_PER_YEAR],
4.36624404335156298e-05 * SOLAR_MASS),
'neptune': ([1.53796971148509165e+01,
-2.59193146099879641e+01,
1.79258772950371181e-01],
[2.68067772490389322e-03 * DAYS_PER_YEAR,
1.62824170038242295e-03 * DAYS_PER_YEAR,
-9.51592254519715870e-05 * DAYS_PER_YEAR],
5.15138902046611451e-05 * SOLAR_MASS) }
SYSTEM = list(BODIES.values())
PAIRS = combinations(SYSTEM)
def advance(dt, n, bodies=SYSTEM, pairs=PAIRS):
for i in range(n):
for (([x1, y1, z1], v1, m1),
([x2, y2, z2], v2, m2)) in pairs:
dx = x1 - x2
dy = y1 - y2
dz = z1 - z2
mag = dt * ((dx * dx + dy * dy + dz * dz) ** (-1.5))
b1m = m1 * mag
b2m = m2 * mag
v1[0] -= dx * b2m
v1[1] -= dy * b2m
v1[2] -= dz * b2m
v2[0] += dx * b1m
v2[1] += dy * b1m
v2[2] += dz * b1m
for (r, [vx, vy, vz], m) in bodies:
r[0] += dt * vx
r[1] += dt * vy
r[2] += dt * vz
def report_energy(bodies=SYSTEM, pairs=PAIRS, e=0.0):
for (((x1, y1, z1), v1, m1),
((x2, y2, z2), v2, m2)) in pairs:
dx = x1 - x2
dy = y1 - y2
dz = z1 - z2
e -= (m1 * m2) / ((dx * dx + dy * dy + dz * dz) ** 0.5)
for (r, [vx, vy, vz], m) in bodies:
e += m * (vx * vx + vy * vy + vz * vz) / 2.
return e
def offset_momentum(ref, bodies=SYSTEM, px=0.0, py=0.0, pz=0.0):
for (r, [vx, vy, vz], m) in bodies:
px -= vx * m
py -= vy * m
pz -= vz * m
(r, v, m) = ref
v[0] = px / m
v[1] = py / m
v[2] = pz / m
def test_nbody(iterations):
# Warm-up runs.
report_energy()
advance(0.01, 20000)
report_energy()
times = []
for _ in range(iterations):
t0 = time()
report_energy()
advance(0.01, 20000)
report_energy()
t1 = time()
times.append(t1 - t0)
return times
if __name__ == '__main__':
parser = optparse.OptionParser(
usage="%prog [options]",
description=("Run the n-body benchmark."))
util.add_standard_options_to(parser)
options, args = parser.parse_args()
offset_momentum(BODIES['sun']) # Set up global state
util.run_benchmark(options, options.num_runs, test_nbody)
#!/usr/bin/env python
"""Simple, brute-force N-Queens solver."""
__author__ = "collinwinter@google.com (Collin Winter)"
# Python imports
import optparse
import re
import string
from time import time
# Local imports
import util
import cython
try:
from builtins import range as _xrange
except ImportError:
from __builtin__ import xrange as _xrange
# Pure-Python implementation of itertools.permutations().
@cython.locals(n=int, i=int, j=int)
def permutations(iterable):
"""permutations(range(3), 2) --> (0,1) (0,2) (1,0) (1,2) (2,0) (2,1)"""
pool = tuple(iterable)
n = len(pool)
indices = list(range(n))
cycles = list(range(1, n+1))[::-1]
yield [ pool[i] for i in indices ]
while n:
for i in reversed(range(n)):
j = cycles[i] - 1
if j == 0:
indices[i:] = indices[i+1:] + indices[i:i+1]
cycles[i] = n - i
else:
cycles[i] = j
indices[i], indices[-j] = indices[-j], indices[i]
yield [ pool[i] for i in indices ]
break
else:
return
# From http://code.activestate.com/recipes/576647/
@cython.locals(queen_count=int, i=int, vec=list)
def n_queens(queen_count):
"""N-Queens solver.
Args:
queen_count: the number of queens to solve for. This is also the
board size.
Yields:
Solutions to the problem. Each yielded value is looks like
(3, 8, 2, 1, 4, ..., 6) where each number is the column position for the
queen, and the index into the tuple indicates the row.
"""
cols = list(range(queen_count))
for vec in permutations(cols):
if (queen_count == len({ vec[i]+i for i in cols })
== len({ vec[i]-i for i in cols })):
yield vec
def test_n_queens(iterations):
# Warm-up runs.
list(n_queens(8))
list(n_queens(8))
times = []
for _ in _xrange(iterations):
t0 = time()
list(n_queens(8))
t1 = time()
times.append(t1 - t0)
return times
if __name__ == "__main__":
parser = optparse.OptionParser(
usage="%prog [options]",
description=("Test the performance of an N-Queens solvers."))
util.add_standard_options_to(parser)
options, args = parser.parse_args()
util.run_benchmark(options, options.num_runs, test_n_queens)
cimport cython
cdef class Packet:
cdef public object link
cdef public object ident
cdef public object kind
cdef public Py_ssize_t datum
cdef public list data
cpdef append_to(self,lst)
cdef class TaskRec:
pass
cdef class DeviceTaskRec(TaskRec):
cdef public object pending
cdef class IdleTaskRec(TaskRec):
cdef public long control
cdef public Py_ssize_t count
cdef class HandlerTaskRec(TaskRec):
cdef public object work_in # = None
cdef public object device_in # = None
cpdef workInAdd(self,p)
cpdef deviceInAdd(self,p)
cdef class WorkerTaskRec(TaskRec):
cdef public object destination # = I_HANDLERA
cdef public Py_ssize_t count
cdef class TaskState:
cdef public bint packet_pending # = True
cdef public bint task_waiting # = False
cdef public bint task_holding # = False
cpdef packetPending(self)
cpdef waiting(self)
cpdef running(self)
cpdef waitingWithPacket(self)
cpdef bint isPacketPending(self)
cpdef bint isTaskWaiting(self)
cpdef bint isTaskHolding(self)
cpdef bint isTaskHoldingOrWaiting(self)
cpdef bint isWaitingWithPacket(self)
cdef class TaskWorkArea:
cdef public list taskTab # = [None] * TASKTABSIZE
cdef public object taskList # = None
cdef public Py_ssize_t holdCount # = 0
cdef public Py_ssize_t qpktCount # = 0
cdef class Task(TaskState):
cdef public Task link # = taskWorkArea.taskList
cdef public object ident # = i
cdef public object priority # = p
cdef public object input # = w
cdef public object handle # = r
cpdef addPacket(self,Packet p,old)
cpdef runTask(self)
cpdef waitTask(self)
cpdef hold(self)
cpdef release(self,i)
cpdef qpkt(self,Packet pkt)
cpdef findtcb(self,id)
cdef class DeviceTask(Task):
@cython.locals(d=DeviceTaskRec)
cpdef fn(self,Packet pkt,r)
cdef class HandlerTask(Task):
@cython.locals(h=HandlerTaskRec)
cpdef fn(self,Packet pkt,r)
cdef class IdleTask(Task):
@cython.locals(i=IdleTaskRec)
cpdef fn(self,Packet pkt,r)
cdef class WorkTask(Task):
@cython.locals(w=WorkerTaskRec)
cpdef fn(self,Packet pkt,r)
@cython.locals(t=Task)
cpdef schedule()
cdef class Richards:
cpdef run(self, iterations)
This diff is collapsed.
cimport cython
cdef inline double eval_A(double i, double j)
@cython.locals(i=long)
cdef list eval_A_times_u(list u)
@cython.locals(i=long)
cdef list eval_At_times_u(list u)
cdef list eval_AtA_times_u(list u)
@cython.locals(j=long, u_j=double, partial_sum=double)
cdef double part_A_times_u(double i, list u)
@cython.locals(j=long, u_j=double, partial_sum=double)
cdef double part_At_times_u(double i, list u)
# -*- coding: utf-8 -*-
# The Computer Language Benchmarks Game
# http://shootout.alioth.debian.org/
# Contributed by Sebastien Loisel
# Fixed by Isaac Gouy
# Sped up by Josh Goldfoot
# Dirtily sped up by Simon Descarpentries
# Concurrency by Jason Stitt
from time import time
import util
import optparse
def eval_A (i, j):
return 1.0 / ((i + j) * (i + j + 1) / 2 + i + 1)
def eval_A_times_u (u):
return [ part_A_times_u(i,u) for i in range(len(u)) ]
def eval_At_times_u (u):
return [ part_At_times_u(i,u) for i in range(len(u)) ]
def eval_AtA_times_u (u):
return eval_At_times_u (eval_A_times_u (u))
def part_A_times_u(i, u):
partial_sum = 0
for j, u_j in enumerate(u):
partial_sum += eval_A (i, j) * u_j
return partial_sum
def part_At_times_u(i, u):
partial_sum = 0
for j, u_j in enumerate(u):
partial_sum += eval_A (j, i) * u_j
return partial_sum
DEFAULT_N = 130
def main(n):
times = []
for i in range(n):
t0 = time()
u = [1] * DEFAULT_N
for dummy in range (10):
v = eval_AtA_times_u (u)
u = eval_AtA_times_u (v)
vBv = vv = 0
for ue, ve in zip (u, v):
vBv += ue * ve
vv += ve * ve
tk = time()
times.append(tk - t0)
return times
if __name__ == "__main__":
parser = optparse.OptionParser(
usage="%prog [options]",
description="Test the performance of the spectralnorm benchmark")
util.add_standard_options_to(parser)
options, args = parser.parse_args()
util.run_benchmark(options, options.num_runs, main)
#!/usr/bin/env python
"""Utility code for benchmark scripts."""
__author__ = "collinwinter@google.com (Collin Winter)"
import math
import operator
try:
reduce
except NameError:
from functools import reduce
def run_benchmark(options, num_runs, bench_func, *args):
"""Run the given benchmark, print results to stdout.
Args:
options: optparse.Values instance.
num_runs: number of times to run the benchmark
bench_func: benchmark function. `num_runs, *args` will be passed to this
function. This should return a list of floats (benchmark execution
times).
"""
if options.profile:
import cProfile
prof = cProfile.Profile()
prof.runcall(bench_func, num_runs, *args)
prof.print_stats(sort=options.profile_sort)
else:
data = bench_func(num_runs, *args)
if options.take_geo_mean:
product = reduce(operator.mul, data, 1)
print(math.pow(product, 1.0 / len(data)))
else:
for x in data:
print(x)
def add_standard_options_to(parser):
"""Add a bunch of common command-line flags to an existing OptionParser.
This function operates on `parser` in-place.
Args:
parser: optparse.OptionParser instance.
"""
parser.add_option("-n", action="store", type="int", default=100,
dest="num_runs", help="Number of times to run the test.")
parser.add_option("--profile", action="store_true",
help="Run the benchmark through cProfile.")
parser.add_option("--profile_sort", action="store", type="str",
default="time", help="Column to sort cProfile output by.")
parser.add_option("--take_geo_mean", action="store_true",
help="Return the geo mean, rather than individual data.")
...@@ -12,6 +12,7 @@ include Doc/* ...@@ -12,6 +12,7 @@ include Doc/*
include Demos/*.pyx include Demos/*.pyx
include Demos/*.py include Demos/*.py
include Demos/callback/* include Demos/callback/*
include Demos/benchmarks/*
include Demos/embed/* include Demos/embed/*
include Demos/freeze/* include Demos/freeze/*
include Demos/libraries/* include Demos/libraries/*
......
...@@ -18,6 +18,7 @@ Contents: ...@@ -18,6 +18,7 @@ Contents:
limitations limitations
pyrex_differences pyrex_differences
early_binding_for_speed early_binding_for_speed
parallelism
debugging debugging
Indices and tables Indices and tables
......
.. highlight:: cython
.. py:module:: cython.parallel
**********************************
Using Parallelism
**********************************
Cython supports native parallelism through the :py:mod:`cython.parallel`
module. To use this kind of parallelism, the GIL must be released. It
currently supports OpenMP, but later on more backends might be supported.
.. function:: prange([start,] stop[, step], nogil=False, schedule=None)
This function can be used for parallel loops. OpenMP automatically
starts a thread pool and distributes the work according to the schedule
used. ``step`` must not be 0. This function can only be used with the
GIL released. If ``nogil`` is true, the loop will be wrapped in a nogil
section.
Thread-locality and reductions are automatically inferred for variables.
If you assign to a variable, it becomes lastprivate, meaning that the
variable will contain the value from the last iteration. If you use an
inplace operator on a variable, it becomes a reduction, meaning that the
values from the thread-local copies of the variable will be reduced with
the operator and assigned to the original variable after the loop. The
index variable is always lastprivate.
The ``schedule`` is passed to OpenMP and can be one of the following:
+-----------------+------------------------------------------------------+
| Schedule | Description |
+=================+======================================================+
|static | The iteration space is divided into chunks that are |
| | approximately equal in size, and at most one chunk |
| | is distributed to each thread. |
+-----------------+------------------------------------------------------+
|dynamic | The iterations are distributed to threads in the team|
| | as the threads request them, with a chunk size of 1. |
+-----------------+------------------------------------------------------+
|guided | The iterations are distributed to threads in the team|
| | as the threads request them. The size of each chunk |
| | is proportional to the number of unassigned |
| | iterations divided by the number of threads in the |
| | team, decreasing to 1. |
+-----------------+------------------------------------------------------+
|auto | The decision regarding scheduling is delegated to the|
| | compiler and/or runtime system. The programmer gives |
| | the implementation the freedom to choose any possible|
| | mapping of iterations to threads in the team. |
+-----------------+------------------------------------------------------+
|runtime | The schedule and chunk size are taken from the |
| | runtime-scheduling-variable, which can be set through|
| | the ``omp_set_schedule`` function call, or the |
| | ``OMP_SCHEDULE`` environment variable. |
+-----------------+------------------------------------------------------+
The default schedule is implementation defined. For more information consult
the OpenMP specification: [#]_.
Example with a reduction::
from cython.parallel import prange, parallel, threadid
cdef int i
cdef int sum = 0
for i in prange(n, nogil=True):
sum += i
print sum
Example with a shared numpy array::
from cython.parallel import *
def func(np.ndarray[double] x, double alpha):
cdef Py_ssize_t i
for i in prange(x.shape[0]):
x[i] = alpha * x[i]
.. function:: parallel
This directive can be used as part of a ``with`` statement to execute code
sequences in parallel. This is currently useful to setup thread-local
buffers used by a prange. A contained prange will be a worksharing loop
that is not parallel, so any variable assigned to in the parallel section
is also private to the prange. Variables that are private in the parallel
construct are undefined after the parallel block.
Example with thread-local buffers::
from cython.parallel import *
from cython.stdlib cimport abort
cdef Py_ssize_t i, n = 100
cdef int * local_buf
cdef size_t size = 10
with nogil, parallel:
local_buf = malloc(sizeof(int) * size)
if local_buf == NULL:
abort()
# populate our local buffer in a sequential loop
for i in range(size):
local_buf[i] = i * 2
# share the work using the thread-local buffer(s)
for i in prange(n, schedule='guided'):
func(local_buf)
free(local_buf)
Later on sections might be supported in parallel blocks, to distribute
code sections of work among threads.
.. function:: threadid()
Returns the id of the thread. For n threads, the ids will range from 0 to
n.
Compiling
=========
To actually use the OpenMP support, you need to tell the C or C++ compiler to
enable OpenMP. For gcc this can be done as follows in a setup.py::
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
ext_module = Extension(
"hello",
["hello.pyx"],
extra_compile_args=['-fopenmp'],
libraries=['gomp'],
)
setup(
name = 'Hello world app',
cmdclass = {'build_ext': build_ext},
ext_modules = [ext_module],
)
.. rubric:: References
.. [#] http://www.openmp.org/mp-documents/spec30.pdf
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment