Commit 9e1e1ffe authored by Mark Florisson's avatar Mark Florisson

Disable OpenMP nested parallelism

parent 60a42461
...@@ -5910,6 +5910,22 @@ class ParallelStatNode(StatNode, ParallelNode): ...@@ -5910,6 +5910,22 @@ class ParallelStatNode(StatNode, ParallelNode):
analyse_expressions phase analyse_expressions phase
""" """
for entry, (pos, op) in self.assignments.iteritems(): for entry, (pos, op) in self.assignments.iteritems():
if self.is_prange and not self.is_parallel:
# closely nested prange in a with parallel block, disallow
# assigning to privates in the with parallel block (we
# consider it too implicit and magicky for users)
if entry in self.parent.assignments:
error(pos,
"Cannot assign to private of outer parallel block, "
"as we cannot retain its value after the loop")
continue
if not self.is_prange and op:
# Again possible, but considered to magicky
error(pos, "Reductions not allowed for parallel blocks")
continue
if self.is_private(entry): if self.is_private(entry):
# lastprivate = self.is_prange and entry == self.target.entry # lastprivate = self.is_prange and entry == self.target.entry
# By default all variables should have the same values as if # By default all variables should have the same values as if
...@@ -6590,7 +6606,6 @@ class ParallelRangeNode(ParallelStatNode): ...@@ -6590,7 +6606,6 @@ class ParallelRangeNode(ParallelStatNode):
if hasattr(self.target, 'entry'): if hasattr(self.target, 'entry'):
self.assignments[self.target.entry] = self.target.pos, None self.assignments[self.target.entry] = self.target.pos, None
self.analyse_sharing_attributes(env)
super(ParallelRangeNode, self).analyse_expressions(env) super(ParallelRangeNode, self).analyse_expressions(env)
def nogil_check(self, env): def nogil_check(self, env):
...@@ -6723,10 +6738,12 @@ class ParallelRangeNode(ParallelStatNode): ...@@ -6723,10 +6738,12 @@ class ParallelRangeNode(ParallelStatNode):
if not self.is_parallel: if not self.is_parallel:
code.put("#pragma omp for") code.put("#pragma omp for")
self.privatization_insertion_point = code.insertion_point()
reduction_codepoint = self.parent.privatization_insertion_point
else: else:
code.put("#pragma omp parallel") code.put("#pragma omp parallel")
self.put_num_threads(code)
self.privatization_insertion_point = code.insertion_point() self.privatization_insertion_point = code.insertion_point()
reduction_codepoint = self.privatization_insertion_point
code.putln("") code.putln("")
code.putln("#endif /* _OPENMP */") code.putln("#endif /* _OPENMP */")
...@@ -6744,7 +6761,10 @@ class ParallelRangeNode(ParallelStatNode): ...@@ -6744,7 +6761,10 @@ class ParallelRangeNode(ParallelStatNode):
if entry.type.is_pyobject: if entry.type.is_pyobject:
error(self.pos, "Python objects cannot be reductions") error(self.pos, "Python objects cannot be reductions")
else: else:
code.put(" reduction(%s:%s)" % (op, entry.cname)) #code.put(" reduction(%s:%s)" % (op, entry.cname))
# This is the only way reductions + nesting works in gcc4.5
reduction_codepoint.put(
" reduction(%s:%s)" % (op, entry.cname))
else: else:
if entry == self.target.entry: if entry == self.target.entry:
code.put(" firstprivate(%s)" % entry.cname) code.put(" firstprivate(%s)" % entry.cname)
...@@ -6762,9 +6782,7 @@ class ParallelRangeNode(ParallelStatNode): ...@@ -6762,9 +6782,7 @@ class ParallelRangeNode(ParallelStatNode):
if self.schedule: if self.schedule:
code.put(" schedule(%s)" % self.schedule) code.put(" schedule(%s)" % self.schedule)
if not self.is_parallel: self.put_num_threads(reduction_codepoint)
self.put_num_threads(self.parent.privatization_insertion_point)
self.privatization_insertion_point = code.insertion_point()
code.putln("") code.putln("")
code.putln("#endif /* _OPENMP */") code.putln("#endif /* _OPENMP */")
......
...@@ -26,6 +26,8 @@ class MarkAssignments(CythonTransform): ...@@ -26,6 +26,8 @@ class MarkAssignments(CythonTransform):
# tells us whether we're in a normal loop # tells us whether we're in a normal loop
in_loop = False in_loop = False
parallel_errors = False
def __init__(self, context): def __init__(self, context):
super(CythonTransform, self).__init__() super(CythonTransform, self).__init__()
self.context = context self.context = context
...@@ -176,17 +178,30 @@ class MarkAssignments(CythonTransform): ...@@ -176,17 +178,30 @@ class MarkAssignments(CythonTransform):
else: else:
node.parent = None node.parent = None
nested = False
if node.is_prange: if node.is_prange:
if not node.parent: if not node.parent:
node.is_parallel = True node.is_parallel = True
else: else:
node.is_parallel = (node.parent.is_prange or not node.is_parallel = (node.parent.is_prange or not
node.parent.is_parallel) node.parent.is_parallel)
nested = node.parent.is_prange
else: else:
node.is_parallel = True node.is_parallel = True
# Note: nested with parallel() blocks are handled by
# ParallelRangeTransform!
# nested = node.parent
nested = node.parent and node.parent.is_prange
self.parallel_block_stack.append(node) self.parallel_block_stack.append(node)
nested = nested or len(self.parallel_block_stack) > 2
if not self.parallel_errors and nested:
error(node.pos,
"Parallel nesting not supported due to bugs in gcc 4.5")
self.parallel_errors = True
if node.is_prange: if node.is_prange:
child_attrs = node.child_attrs child_attrs = node.child_attrs
node.child_attrs = ['body', 'target', 'args'] node.child_attrs = ['body', 'target', 'args']
...@@ -200,6 +215,7 @@ class MarkAssignments(CythonTransform): ...@@ -200,6 +215,7 @@ class MarkAssignments(CythonTransform):
self.visitchildren(node) self.visitchildren(node)
self.parallel_block_stack.pop() self.parallel_block_stack.pop()
self.parallel_errors = False
return node return node
def visit_YieldExprNode(self, node): def visit_YieldExprNode(self, node):
......
...@@ -76,6 +76,33 @@ def f(x): ...@@ -76,6 +76,33 @@ def f(x):
with gil: with gil:
yield x yield x
# Disabled nesting:
for i in prange(10, nogil=True):
for y in prange(10):
pass
with nogil, cython.parallel.parallel():
for i in prange(10):
for i in prange(10):
pass
# Assign to private from parallel block in prange:
cdef int myprivate1, myprivate2
with nogil, cython.parallel.parallel():
myprivate1 = 1
for i in prange(10):
myprivate1 = 3
myprivate2 = 4
myprivate2 = 2
# Disallow parallel with block reductions:
i = 0
with nogil, cython.parallel.parallel():
i += 1
_ERRORS = u""" _ERRORS = u"""
e_cython_parallel.pyx:3:8: cython.parallel.parallel is not a module e_cython_parallel.pyx:3:8: cython.parallel.parallel is not a module
e_cython_parallel.pyx:4:0: No such directive: cython.parallel.something e_cython_parallel.pyx:4:0: No such directive: cython.parallel.something
...@@ -97,4 +124,9 @@ e_cython_parallel.pyx:62:36: cython.parallel.parallel() does not take positional ...@@ -97,4 +124,9 @@ e_cython_parallel.pyx:62:36: cython.parallel.parallel() does not take positional
e_cython_parallel.pyx:65:36: Invalid keyword argument: invalid e_cython_parallel.pyx:65:36: Invalid keyword argument: invalid
e_cython_parallel.pyx:73:12: Yield not allowed in parallel sections e_cython_parallel.pyx:73:12: Yield not allowed in parallel sections
e_cython_parallel.pyx:77:16: Yield not allowed in parallel sections e_cython_parallel.pyx:77:16: Yield not allowed in parallel sections
e_cython_parallel.pyx:82:19: Parallel nesting not supported due to bugs in gcc 4.5
e_cython_parallel.pyx:87:23: Parallel nesting not supported due to bugs in gcc 4.5
e_cython_parallel.pyx:97:19: Cannot assign to private of outer parallel block, as we cannot retain its value after the loop
e_cython_parallel.pyx:98:19: Cannot assign to private of outer parallel block, as we cannot retain its value after the loop
e_cython_parallel.pyx:104:6: Reductions not allowed for parallel blocks
""" """
...@@ -44,6 +44,7 @@ def test_num_threads(): ...@@ -44,6 +44,7 @@ def test_num_threads():
return num_threads return num_threads
'''
def test_parallel_catch(): def test_parallel_catch():
""" """
>>> test_parallel_catch() >>> test_parallel_catch()
...@@ -67,6 +68,7 @@ def test_parallel_catch(): ...@@ -67,6 +68,7 @@ def test_parallel_catch():
print len(exceptions) == num_threads print len(exceptions) == num_threads
assert len(exceptions) == num_threads, (len(exceptions), num_threads) assert len(exceptions) == num_threads, (len(exceptions), num_threads)
'''
OPENMP_PARALLEL = True OPENMP_PARALLEL = True
......
...@@ -46,6 +46,7 @@ def test_descending_prange(): ...@@ -46,6 +46,7 @@ def test_descending_prange():
return sum return sum
'''
def test_propagation(): def test_propagation():
""" """
>>> test_propagation() >>> test_propagation()
...@@ -65,6 +66,7 @@ def test_propagation(): ...@@ -65,6 +66,7 @@ def test_propagation():
sum2 += y sum2 += y
return i, j, x, y, sum1, sum2 return i, j, x, y, sum1, sum2
'''
def test_unsigned_operands(): def test_unsigned_operands():
""" """
...@@ -224,7 +226,7 @@ def test_nan_init(): ...@@ -224,7 +226,7 @@ def test_nan_init():
cdef unsigned long d2 = 10 cdef unsigned long d2 = 10
cdef long long e1 = 10 cdef long long e1 = 10
cdef unsigned long long e2 = 10 cdef unsigned long long e2 = 10
cdef actually_long_t miss1 = 10 cdef actually_long_t miss1 = 10
cdef actually_short_t miss2 = 10 cdef actually_short_t miss2 = 10
cdef myint_t typedef1 = 10 cdef myint_t typedef1 = 10
...@@ -255,6 +257,27 @@ def test_nan_init(): ...@@ -255,6 +257,27 @@ def test_nan_init():
or typedef1 == 10): or typedef1 == 10):
errp[0] = 1 errp[0] = 1
cdef int i
for i in prange(10, nogil=True):
# First, trick the error checking to make it believe these variables
# are initialized after this if
if mybool: # mybool is always false!
a1 = a2 = b1 = b2 = c1 = c2 = d1 = d2 = e1 = e2 = 0
f = g = h = 0.0
p = NULL
miss1 = miss2 = typedef1 = 0
if (a1 == 10 or a2 == 10 or
b1 == 10 or b2 == 10 or
c1 == 10 or c2 == 10 or
d1 == 10 or d2 == 10 or
e1 == 10 or e2 == 10 or
f == 10.0 or g == 10.0 or h == 10.0 or
p == <void *> 10 or miss1 == 10 or miss2 == 10
or typedef1 == 10):
errp[0] = 1
if err: if err:
raise Exception("One of the values was not initialized to a maximum " raise Exception("One of the values was not initialized to a maximum "
"or NaN value") "or NaN value")
...@@ -263,9 +286,6 @@ def test_nan_init(): ...@@ -263,9 +286,6 @@ def test_nan_init():
with nogil, cython.parallel.parallel(): with nogil, cython.parallel.parallel():
c1 = 16 c1 = 16
if globals().get('OPENMP_PARALLEL'):
# We only reach here when we are included by parallel.pyx
assert c1 == 20, c1
cdef void nogil_print(char *s) with gil: cdef void nogil_print(char *s) with gil:
print s.decode('ascii') print s.decode('ascii')
...@@ -328,6 +348,7 @@ def test_prange_continue(): ...@@ -328,6 +348,7 @@ def test_prange_continue():
free(p) free(p)
'''
def test_nested_break_continue(): def test_nested_break_continue():
""" """
>>> test_nested_break_continue() >>> test_nested_break_continue()
...@@ -358,6 +379,7 @@ def test_nested_break_continue(): ...@@ -358,6 +379,7 @@ def test_nested_break_continue():
continue continue
print i print i
'''
cdef int parallel_return() nogil: cdef int parallel_return() nogil:
cdef int i cdef int i
...@@ -377,6 +399,7 @@ def test_return(): ...@@ -377,6 +399,7 @@ def test_return():
""" """
print parallel_return() print parallel_return()
'''
def test_parallel_exceptions(): def test_parallel_exceptions():
""" """
>>> test_parallel_exceptions() >>> test_parallel_exceptions()
...@@ -402,8 +425,34 @@ def test_parallel_exceptions(): ...@@ -402,8 +425,34 @@ def test_parallel_exceptions():
except Exception, e: except Exception, e:
print mylist[0] print mylist[0]
print e.args, sum print e.args, sum
'''
def test_parallel_exceptions_unnested():
"""
>>> test_parallel_exceptions_unnested()
('I am executed first', 0)
('propagate me',) 0
"""
cdef int i, sum = 0
mylist = []
try:
with nogil, cython.parallel.parallel():
try:
for i in prange(10):
with gil:
raise Exception("propagate me")
sum += i
finally:
with gil:
mylist.append(("I am executed first", sum))
except Exception, e:
print mylist[0]
print e.args, sum
'''
cdef int parallel_exc_cdef() except -3: cdef int parallel_exc_cdef() except -3:
cdef int i, j cdef int i, j
for i in prange(10, nogil=True): for i in prange(10, nogil=True):
...@@ -412,6 +461,15 @@ cdef int parallel_exc_cdef() except -3: ...@@ -412,6 +461,15 @@ cdef int parallel_exc_cdef() except -3:
raise Exception("propagate me") raise Exception("propagate me")
return 0 return 0
'''
cdef int parallel_exc_cdef_unnested() except -3:
cdef int i
for i in prange(10, nogil=True):
with gil:
raise Exception("propagate me")
return 0
def test_parallel_exc_cdef(): def test_parallel_exc_cdef():
""" """
...@@ -420,8 +478,10 @@ def test_parallel_exc_cdef(): ...@@ -420,8 +478,10 @@ def test_parallel_exc_cdef():
... ...
Exception: propagate me Exception: propagate me
""" """
parallel_exc_cdef() parallel_exc_cdef_unnested()
#parallel_exc_cdef()
'''
cpdef int parallel_exc_cpdef() except -3: cpdef int parallel_exc_cpdef() except -3:
cdef int i, j cdef int i, j
for i in prange(10, nogil=True): for i in prange(10, nogil=True):
...@@ -430,6 +490,16 @@ cpdef int parallel_exc_cpdef() except -3: ...@@ -430,6 +490,16 @@ cpdef int parallel_exc_cpdef() except -3:
raise Exception("propagate me") raise Exception("propagate me")
return 0 return 0
'''
cpdef int parallel_exc_cpdef_unnested() except -3:
cdef int i, j
for i in prange(10, nogil=True):
with gil:
raise Exception("propagate me")
return 0
def test_parallel_exc_cpdef(): def test_parallel_exc_cpdef():
""" """
...@@ -438,8 +508,10 @@ def test_parallel_exc_cpdef(): ...@@ -438,8 +508,10 @@ def test_parallel_exc_cpdef():
... ...
Exception: propagate me Exception: propagate me
""" """
parallel_exc_cpdef() parallel_exc_cpdef_unnested()
#parallel_exc_cpdef()
'''
cdef int parallel_exc_nogil_swallow() except -1: cdef int parallel_exc_nogil_swallow() except -1:
cdef int i, j cdef int i, j
for i in prange(10, nogil=True): for i in prange(10, nogil=True):
...@@ -451,15 +523,31 @@ cdef int parallel_exc_nogil_swallow() except -1: ...@@ -451,15 +523,31 @@ cdef int parallel_exc_nogil_swallow() except -1:
return i return i
return 0 return 0
'''
cdef int parallel_exc_nogil_swallow_unnested() except -1:
cdef int i
with nogil:
try:
for i in prange(10):
with gil:
raise Exception("propagate me")
finally:
return i
return 0
def test_parallel_exc_nogil_swallow(): def test_parallel_exc_nogil_swallow():
""" """
>>> test_parallel_exc_nogil_swallow() >>> test_parallel_exc_nogil_swallow()
execute me execute me
""" """
parallel_exc_nogil_swallow() parallel_exc_nogil_swallow_unnested()
print 'execute me' print 'execute me'
#parallel_exc_nogil_swallow()
#print 'execute me'
'''
def parallel_exc_replace(): def parallel_exc_replace():
""" """
>>> parallel_exc_replace() >>> parallel_exc_replace()
...@@ -480,7 +568,6 @@ def parallel_exc_replace(): ...@@ -480,7 +568,6 @@ def parallel_exc_replace():
return 0 return 0
def _parallel_exceptions2(): def _parallel_exceptions2():
cdef int i, j, k cdef int i, j, k
...@@ -545,6 +632,7 @@ def test_parallel_exceptions2(): ...@@ -545,6 +632,7 @@ def test_parallel_exceptions2():
print 'Got signal', os.WTERMSIG(status) print 'Got signal', os.WTERMSIG(status)
print 'Exit status:', os.WEXITSTATUS(status) print 'Exit status:', os.WEXITSTATUS(status)
'''
def test_parallel_with_gil_return(): def test_parallel_with_gil_return():
""" """
...@@ -565,6 +653,7 @@ def test_parallel_with_gil_return(): ...@@ -565,6 +653,7 @@ def test_parallel_with_gil_return():
with gil: with gil:
return sum return sum
'''
def test_parallel_with_gil_continue(): def test_parallel_with_gil_continue():
""" """
>>> test_parallel_with_gil_continue() >>> test_parallel_with_gil_continue()
...@@ -581,3 +670,20 @@ def test_parallel_with_gil_continue(): ...@@ -581,3 +670,20 @@ def test_parallel_with_gil_continue():
sum += i sum += i
print sum print sum
'''
def test_parallel_with_gil_continue_unnested():
"""
>>> test_parallel_with_gil_continue_unnested()
20
"""
cdef int i, sum = 0
for i in prange(10, nogil=True):
with gil:
if i % 2:
continue
sum += i
print sum
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment