Commit 09c833f0 authored by Dag Sverre Seljebotn's avatar Dag Sverre Seljebotn

Rebasing merge of pull request #30

parents 4d4bb4aa 42a187c5
......@@ -699,10 +699,12 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("")
self.generate_extern_c_macro_definition(code)
code.putln("")
code.putln("#if defined(WIN32) || defined(MS_WINDOWS)")
code.putln("#define _USE_MATH_DEFINES")
code.putln("#endif")
code.putln("#include <math.h>")
code.putln("#define %s" % Naming.h_guard_prefix + self.api_name(env))
code.putln("#define %s" % Naming.api_guard_prefix + self.api_name(env))
self.generate_includes(env, cimported_modules, code)
......@@ -711,6 +713,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("#define CYTHON_WITHOUT_ASSERTIONS")
code.putln("#endif")
code.putln("")
if env.directives['ccomplex']:
code.putln("")
code.putln("#if !defined(CYTHON_CCOMPLEX)")
......
......@@ -116,5 +116,7 @@ h_guard_prefix = "__PYX_HAVE__"
api_guard_prefix = "__PYX_HAVE_API__"
api_func_guard = "__PYX_HAVE_API_FUNC_"
PYX_NAN = "__PYX_NAN"
def py_version_hex(major, minor=0, micro=0, release_level=0, release_serial=0):
return (major << 24) | (minor << 16) | (micro << 8) | (release_level << 4) | (release_serial)
......@@ -5864,6 +5864,10 @@ class ParallelStatNode(StatNode, ParallelNode):
privatization_insertion_point a code insertion point used to make temps
private (esp. the "nsteps" temp)
args tuple the arguments passed to the parallel construct
kwargs DictNode the keyword arguments passed to the parallel
construct (replaced by its compile time value)
"""
child_attrs = ['body']
......@@ -5888,8 +5892,21 @@ class ParallelStatNode(StatNode, ParallelNode):
def analyse_declarations(self, env):
self.body.analyse_declarations(env)
if self.kwargs:
self.kwargs = self.kwargs.compile_time_value(env)
else:
self.kwargs = {}
for kw, val in self.kwargs.iteritems():
if kw not in self.valid_keyword_arguments:
error(self.pos, "Invalid keyword argument: %s" % kw)
else:
setattr(self, kw, val)
def analyse_expressions(self, env):
self.body.analyse_expressions(env)
self.analyse_sharing_attributes(env)
self.check_independent_iterations()
def analyse_sharing_attributes(self, env):
"""
......@@ -5995,6 +6012,64 @@ class ParallelStatNode(StatNode, ParallelNode):
code.putln("%s = %s;" % (cname, entry.cname))
entry.cname = cname
def check_independent_iterations(self):
"""
This checks for uninitialized thread-private variables, it's far from
fool-proof as it does not take control flow into account, nor
assignment to a variable as both the lhs and rhs. So it detects only
cases like this:
for i in prange(10, nogil=True):
var = x # error, x is private and read before assigned
x = i
Fortunately, it doesn't need to be perfect, as we still initialize
private variables to "invalid" values, such as NULL or NaN whenever
possible.
"""
from Cython.Compiler import ParseTreeTransforms
transform = ParseTreeTransforms.FindUninitializedParallelVars()
transform(self.body)
for entry, pos in transform.used_vars:
if entry in self.privates:
assignment_pos, op = self.assignments[entry]
# Reading reduction variables is valid (in fact necessary)
# before assignment
if not op and pos < assignment_pos:
if self.is_prange:
error(pos, "Expression value depends on previous loop "
"iteration, cannot execute in parallel")
else:
error(pos, "Expression depends on an uninitialized "
"thread-private variable")
def initialize_privates_to_nan(self, code, exclude=None):
code.putln("/* Initialize private variables to invalid values */")
for entry, op in self.privates.iteritems():
if not op and (not exclude or entry != exclude):
invalid_value = entry.type.invalid_value()
if invalid_value:
code.globalstate.use_utility_code(
invalid_values_utility_code)
code.putln("%s = %s;" % (entry.cname,
entry.type.cast_code(invalid_value)))
def put_num_threads(self, code):
"""
Write self.num_threads if set as the num_threads OpenMP directive
"""
if self.num_threads is not None:
if isinstance(self.num_threads, (int, long)):
code.put(" num_threads(%d)" % (self.num_threads,))
else:
error(self.pos, "Invalid value for num_threads argument, "
"expected an int")
def declare_closure_privates(self, code):
"""
Set self.privates to a dict mapping C variable names that are to be
......@@ -6032,9 +6107,15 @@ class ParallelWithBlockNode(ParallelStatNode):
nogil_check = None
def analyse_expressions(self, env):
super(ParallelWithBlockNode, self).analyse_expressions(env)
self.analyse_sharing_attributes(env)
valid_keyword_arguments = ['num_threads']
num_threads = None
def analyse_declarations(self, env):
super(ParallelWithBlockNode, self).analyse_declarations(env)
if self.args:
error(self.pos, "cython.parallel.parallel() does not take "
"positional arguments")
def generate_execution_code(self, code):
self.declare_closure_privates(code)
......@@ -6047,11 +6128,13 @@ class ParallelWithBlockNode(ParallelStatNode):
'private(%s)' % ', '.join([e.cname for e in self.privates]))
self.privatization_insertion_point = code.insertion_point()
self.put_num_threads(code)
code.putln("")
code.putln("#endif /* _OPENMP */")
code.begin_block()
self.initialize_privates_to_nan(code)
self.body.generate_execution_code(code)
code.end_block()
......@@ -6064,11 +6147,6 @@ class ParallelRangeNode(ParallelStatNode):
target NameNode the target iteration variable
else_clause Node or None the else clause of this loop
args tuple the arguments passed to prange()
kwargs DictNode the keyword arguments passed to prange()
(replaced by its compile time value)
is_nogil bool indicates whether this is a nogil prange() node
"""
child_attrs = ['body', 'target', 'else_clause', 'args']
......@@ -6078,7 +6156,12 @@ class ParallelRangeNode(ParallelStatNode):
start = stop = step = None
is_prange = True
is_nogil = False
nogil = False
schedule = None
num_threads = None
valid_keyword_arguments = ['schedule', 'nogil', 'num_threads']
def analyse_declarations(self, env):
super(ParallelRangeNode, self).analyse_declarations(env)
......@@ -6097,14 +6180,6 @@ class ParallelRangeNode(ParallelStatNode):
else:
self.start, self.stop, self.step = self.args
if self.kwargs:
self.kwargs = self.kwargs.compile_time_value(env)
else:
self.kwargs = {}
self.is_nogil = self.kwargs.pop('nogil', False)
self.schedule = self.kwargs.pop('schedule', None)
if hasattr(self.schedule, 'decode'):
self.schedule = self.schedule.decode('ascii')
......@@ -6113,9 +6188,6 @@ class ParallelRangeNode(ParallelStatNode):
error(self.pos, "Invalid schedule argument to prange: %s" %
(self.schedule,))
for kw in self.kwargs:
error(self.pos, "Invalid keyword argument to prange: %s" % kw)
def analyse_expressions(self, env):
if self.target is None:
error(self.pos, "prange() can only be used as part of a for loop")
......@@ -6156,7 +6228,6 @@ class ParallelRangeNode(ParallelStatNode):
self.index_type = PyrexTypes.widest_numeric_type(
self.index_type, node.type)
super(ParallelRangeNode, self).analyse_expressions(env)
if self.else_clause is not None:
self.else_clause.analyse_expressions(env)
......@@ -6169,6 +6240,7 @@ class ParallelRangeNode(ParallelStatNode):
self.assignments[self.target.entry] = self.target.pos, None
self.analyse_sharing_attributes(env)
super(ParallelRangeNode, self).analyse_expressions(env)
def nogil_check(self, env):
names = 'start', 'stop', 'step', 'target'
......@@ -6303,6 +6375,8 @@ class ParallelRangeNode(ParallelStatNode):
c = self.parent.privatization_insertion_point
c.put(" private(%(nsteps)s)" % fmt_dict)
self.put_num_threads(code)
self.privatization_insertion_point = code.insertion_point()
code.putln("")
......@@ -6311,6 +6385,9 @@ class ParallelRangeNode(ParallelStatNode):
code.put("for (%(i)s = 0; %(i)s < %(nsteps)s; %(i)s++)" % fmt_dict)
code.begin_block()
code.putln("%(target)s = %(start)s + %(step)s * %(i)s;" % fmt_dict)
self.initialize_privates_to_nan(code, exclude=self.target.entry)
self.body.generate_execution_code(code)
code.end_block()
......@@ -7512,3 +7589,22 @@ bad:
'EMPTY_BYTES' : Naming.empty_bytes,
"MODULE": Naming.module_cname,
})
################ Utility code for cython.parallel stuff ################
invalid_values_utility_code = UtilityCode(
proto="""\
#include <string.h>
void __pyx_init_nan(void);
static float %(PYX_NAN)s;
""" % vars(Naming),
init="""
/* Initialize NaN. The sign is irrelevant, an exponent with all bits 1 and
a nonzero mantissa means NaN. If the first bit in the mantissa is 1, it is
a signalling NaN. */
memset(&%(PYX_NAN)s, 0xFF, sizeof(%(PYX_NAN)s));
""" % vars(Naming))
......@@ -2094,8 +2094,8 @@ class GilCheck(VisitorTransform):
return node
def visit_ParallelRangeNode(self, node):
if node.is_nogil:
node.is_nogil = False
if node.nogil:
node.nogil = False
node = Nodes.GILStatNode(node.pos, state='nogil', body=node)
return self.visit_GILStatNode(node)
......@@ -2273,6 +2273,24 @@ class TransformBuiltinMethods(EnvTransform):
return node
class FindUninitializedParallelVars(CythonTransform, SkipDeclarations):
"""
This transform isn't part of the pipeline, it simply finds all references
to variables in parallel blocks.
"""
def __init__(self):
CythonTransform.__init__(self, None)
self.used_vars = []
def visit_ParallelStatNode(self, node):
return node
def visit_NameNode(self, node):
self.used_vars.append((node.entry, node.pos))
return node
class DebugTransform(CythonTransform):
"""
Write debug information for this Cython module.
......
......@@ -27,6 +27,12 @@ class BaseType(object):
else:
return base_code
def invalid_value(self):
"""
Returns the most invalid value an object of this type can assume as a
C expression string. Returns None if no such value exists.
"""
class PyrexType(BaseType):
#
# Base class for all Pyrex types.
......@@ -204,6 +210,9 @@ class CTypedefType(BaseType):
self.typedef_base_type = base_type
self.typedef_is_external = is_external
def invalid_value(self):
return self.typedef_base_type.invalid_value()
def resolve(self):
return self.typedef_base_type.resolve()
......@@ -378,6 +387,10 @@ class PyObjectType(PyrexType):
else:
return cname
def invalid_value(self):
return "1"
class BuiltinObjectType(PyObjectType):
# objstruct_cname string Name of PyObject struct
......@@ -902,6 +915,14 @@ class CIntType(CNumericType):
def assignable_from_resolved_type(self, src_type):
return src_type.is_int or src_type.is_enum or src_type is error_type
def invalid_value(self):
if rank_to_type_name[self.rank] == 'char':
return "'?'"
else:
# We do not really know the size of the type, so return
# a 32-bit literal and rely on casting to final type. It will
# be negative for signed ints, which is good.
return "0xbad0bad0";
class CAnonEnumType(CIntType):
......@@ -1109,6 +1130,8 @@ class CFloatType(CNumericType):
def assignable_from_resolved_type(self, src_type):
return (src_type.is_numeric and not src_type.is_complex) or src_type is error_type
def invalid_value(self):
return Naming.PYX_NAN
class CComplexType(CNumericType):
......@@ -1622,6 +1645,8 @@ class CPtrType(CType):
else:
return CPtrType(base_type)
def invalid_value(self):
return "1"
class CNullPtrType(CPtrType):
......
......@@ -39,7 +39,24 @@ class MarkAssignments(CythonTransform):
if self.parallel_block_stack:
parallel_node = self.parallel_block_stack[-1]
parallel_node.assignments[lhs.entry] = (lhs.pos, inplace_op)
previous_assignment = parallel_node.assignments.get(lhs.entry)
# If there was a previous assignment to the variable, keep the
# previous assignment position
if previous_assignment:
pos, previous_inplace_op = previous_assignment
if (inplace_op and previous_inplace_op and
inplace_op != previous_inplace_op):
# x += y; x *= y
t = (inplace_op, previous_inplace_op)
error(lhs.pos,
"Reduction operator '%s' is inconsistent "
"with previous reduction operator '%s'" % t)
else:
pos = lhs.pos
parallel_node.assignments[lhs.entry] = (pos, inplace_op)
elif isinstance(lhs, ExprNodes.SequenceNode):
for arg in lhs.args:
......
......@@ -8,42 +8,42 @@ cdef extern from "omp.h":
omp_sched_guided = 3,
omp_sched_auto = 4
extern void omp_set_num_threads(int)
extern int omp_get_num_threads()
extern int omp_get_max_threads()
extern int omp_get_thread_num()
extern int omp_get_num_procs()
extern int omp_in_parallel()
extern void omp_set_dynamic(int)
extern int omp_get_dynamic()
extern void omp_set_nested(int)
extern int omp_get_nested()
extern void omp_init_lock(omp_lock_t *)
extern void omp_destroy_lock(omp_lock_t *)
extern void omp_set_lock(omp_lock_t *)
extern void omp_unset_lock(omp_lock_t *)
extern int omp_test_lock(omp_lock_t *)
extern void omp_init_nest_lock(omp_nest_lock_t *)
extern void omp_destroy_nest_lock(omp_nest_lock_t *)
extern void omp_set_nest_lock(omp_nest_lock_t *)
extern void omp_unset_nest_lock(omp_nest_lock_t *)
extern int omp_test_nest_lock(omp_nest_lock_t *)
extern double omp_get_wtime()
extern double omp_get_wtick()
void omp_set_schedule(omp_sched_t, int)
void omp_get_schedule(omp_sched_t *, int *)
int omp_get_thread_limit()
void omp_set_max_active_levels(int)
int omp_get_max_active_levels()
int omp_get_level()
int omp_get_ancestor_thread_num(int)
int omp_get_team_size(int)
int omp_get_active_level()
extern void omp_set_num_threads(int) nogil
extern int omp_get_num_threads() nogil
extern int omp_get_max_threads() nogil
extern int omp_get_thread_num() nogil
extern int omp_get_num_procs() nogil
extern int omp_in_parallel() nogil
extern void omp_set_dynamic(int) nogil
extern int omp_get_dynamic() nogil
extern void omp_set_nested(int) nogil
extern int omp_get_nested() nogil
extern void omp_init_lock(omp_lock_t *) nogil
extern void omp_destroy_lock(omp_lock_t *) nogil
extern void omp_set_lock(omp_lock_t *) nogil
extern void omp_unset_lock(omp_lock_t *) nogil
extern int omp_test_lock(omp_lock_t *) nogil
extern void omp_init_nest_lock(omp_nest_lock_t *) nogil
extern void omp_destroy_nest_lock(omp_nest_lock_t *) nogil
extern void omp_set_nest_lock(omp_nest_lock_t *) nogil
extern void omp_unset_nest_lock(omp_nest_lock_t *) nogil
extern int omp_test_nest_lock(omp_nest_lock_t *) nogil
extern double omp_get_wtime() nogil
extern double omp_get_wtick() nogil
void omp_set_schedule(omp_sched_t, int) nogil
void omp_get_schedule(omp_sched_t *, int *) nogil
int omp_get_thread_limit() nogil
void omp_set_max_active_levels(int) nogil
int omp_get_max_active_levels() nogil
int omp_get_level() nogil
int omp_get_ancestor_thread_num(int) nogil
int omp_get_team_size(int) nogil
int omp_get_active_level() nogil
......@@ -288,7 +288,8 @@ class CythonDotParallel(object):
__all__ = ['parallel', 'prange', 'threadid']
parallel = nogil
def parallel(self, num_threads=None):
return nogil
def prange(self, start=0, stop=None, step=1, schedule=None, nogil=False):
if stop is None:
......
......@@ -39,6 +39,32 @@ with nogil, cython.parallel.parallel():
with nogil, cython.parallel.parallel:
pass
cdef int y
# this is not valid
for i in prange(10, nogil=True):
i = y * 4
y = i
# this is valid
for i in prange(10, nogil=True):
y = i
i = y * 4
y = i
with nogil, cython.parallel.parallel():
i = y
y = i
for i in prange(10, nogil=True):
y += i
y *= i
with nogil, cython.parallel.parallel("invalid"):
pass
with nogil, cython.parallel.parallel(invalid=True):
pass
_ERRORS = u"""
e_cython_parallel.pyx:3:8: cython.parallel.parallel is not a module
e_cython_parallel.pyx:4:0: No such directive: cython.parallel.something
......@@ -53,4 +79,9 @@ e_cython_parallel.pyx:30:9: Can only iterate over an iteration variable
e_cython_parallel.pyx:33:10: Must be of numeric type, not int *
e_cython_parallel.pyx:36:33: Closely nested 'with parallel:' blocks are disallowed
e_cython_parallel.pyx:39:12: The parallel directive must be called
e_cython_parallel.pyx:45:10: Expression value depends on previous loop iteration, cannot execute in parallel
e_cython_parallel.pyx:55:9: Expression depends on an uninitialized thread-private variable
e_cython_parallel.pyx:60:6: Reduction operator '*' is inconsistent with previous reduction operator '+'
e_cython_parallel.pyx:62:36: cython.parallel.parallel() does not take positional arguments
e_cython_parallel.pyx:65:36: Invalid keyword argument: invalid
"""
......@@ -24,4 +24,22 @@ def test_parallel():
free(buf)
def test_num_threads():
"""
>>> test_num_threads()
1
"""
cdef int dyn = openmp.omp_get_dynamic()
cdef int num_threads
cdef int *p = &num_threads
openmp.omp_set_dynamic(0)
with nogil, cython.parallel.parallel(num_threads=1):
p[0] = openmp.omp_get_num_threads()
openmp.omp_set_dynamic(dyn)
return num_threads
include "sequential_parallel.pyx"
......@@ -171,6 +171,64 @@ def test_pure_mode():
for i in pure_parallel.prange(4, -1, -1, schedule='dynamic', nogil=True):
print i
with pure_parallel.parallel:
with pure_parallel.parallel():
print pure_parallel.threadid()
cdef extern from "types.h":
ctypedef short actually_long_t
ctypedef long actually_short_t
ctypedef int myint_t
def test_nan_init():
"""
>>> test_nan_init()
"""
cdef int mybool = 0
cdef int err = 0
cdef int *errp = &err
cdef signed char a1 = 10
cdef unsigned char a2 = 10
cdef short b1 = 10
cdef unsigned short b2 = 10
cdef int c1 = 10
cdef unsigned int c2 = 10
cdef long d1 = 10
cdef unsigned long d2 = 10
cdef long long e1 = 10
cdef unsigned long long e2 = 10
cdef actually_long_t miss1 = 10
cdef actually_short_t miss2 = 10
cdef myint_t typedef1 = 10
cdef float f = 10.0
cdef double g = 10.0
cdef long double h = 10.0
cdef void *p = <void *> 10
with nogil, cython.parallel.parallel():
# First, trick the error checking to make it believe these variables
# are initialized after this if
if mybool: # mybool is always false!
a1 = a2 = b1 = b2 = c1 = c2 = d1 = d2 = e1 = e2 = 0
f = g = h = 0.0
p = NULL
miss1 = miss2 = typedef1 = 0
if (a1 == 10 or a2 == 10 or
b1 == 10 or b2 == 10 or
c1 == 10 or c2 == 10 or
d1 == 10 or d2 == 10 or
e1 == 10 or e2 == 10 or
f == 10.0 or g == 10.0 or h == 10.0 or
p == <void *> 10 or miss1 == 10 or miss2 == 10
or typedef1 == 10):
errp[0] = 1
if err:
raise Exception("One of the values was not initialized to a maximum "
"or NaN value")
/*
This header is present to test effects of misdeclaring
types Cython-side.
*/
typedef long actually_long_t;
typedef short actually_short_t;
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment