Commit ffbdf766 authored by Mark Florisson's avatar Mark Florisson

OpenMP Control Flow

parent eb64d91c
...@@ -700,7 +700,9 @@ class CreateControlFlowGraph(CythonTransform): ...@@ -700,7 +700,9 @@ class CreateControlFlowGraph(CythonTransform):
return node return node
def visit_InPlaceAssignmentNode(self, node): def visit_InPlaceAssignmentNode(self, node):
self.in_inplace_assignment = True
self.visitchildren(node) self.visitchildren(node)
self.in_inplace_assignment = False
self.mark_assignment(node.lhs, node.create_binop_node()) self.mark_assignment(node.lhs, node.create_binop_node())
return node return node
...@@ -726,6 +728,11 @@ class CreateControlFlowGraph(CythonTransform): ...@@ -726,6 +728,11 @@ class CreateControlFlowGraph(CythonTransform):
entry = node.entry or self.env.lookup(node.name) entry = node.entry or self.env.lookup(node.name)
if entry: if entry:
self.flow.mark_reference(node, entry) self.flow.mark_reference(node, entry)
if entry in self.reductions and not self.in_inplace_assignment:
error(node.pos,
"Cannot read reduction variable in loop body")
return node return node
def visit_StatListNode(self, node): def visit_StatListNode(self, node):
...@@ -806,10 +813,16 @@ class CreateControlFlowGraph(CythonTransform): ...@@ -806,10 +813,16 @@ class CreateControlFlowGraph(CythonTransform):
# Target assignment # Target assignment
self.flow.nextblock() self.flow.nextblock()
self.mark_assignment(node.target) self.mark_assignment(node.target)
# Body block # Body block
if isinstance(node, Nodes.ParallelRangeNode):
# In case of an invalid
self._delete_privates(node, exclude=node.target.entry)
self.flow.nextblock() self.flow.nextblock()
self.visit(node.body) self.visit(node.body)
self.flow.loops.pop() self.flow.loops.pop()
# Loop it # Loop it
if self.flow.block: if self.flow.block:
self.flow.block.add_child(condition_block) self.flow.block.add_child(condition_block)
...@@ -828,11 +841,38 @@ class CreateControlFlowGraph(CythonTransform): ...@@ -828,11 +841,38 @@ class CreateControlFlowGraph(CythonTransform):
self.flow.block = None self.flow.block = None
return node return node
def _delete_privates(self, node, exclude=None):
for private_node in node.assigned_nodes:
if not exclude or private_node.entry is not exclude:
self.flow.mark_deletion(private_node, private_node.entry)
def visit_ParallelRangeNode(self, node): def visit_ParallelRangeNode(self, node):
# if node.target is None an error will have been previously issued reductions = self.reductions
if node.target is not None:
# if node.target is None or not a NameNode, an error will have
# been previously issued
if hasattr(node.target, 'entry'):
self.reductions = cython.set(reductions)
for private_node in node.assigned_nodes:
private_node.entry.error_on_uninitialized = True
pos, reduction = node.assignments[private_node.entry]
if reduction:
self.reductions.add(private_node.entry)
node = self.visit_ForInStatNode(node) node = self.visit_ForInStatNode(node)
self.reductions = reductions
return node
def visit_ParallelWithBlockNode(self, node):
for private_node in node.assigned_nodes:
private_node.entry.error_on_uninitialized = True
self._delete_privates(node)
self.visitchildren(node)
self._delete_privates(node)
return node return node
def visit_ForFromStatNode(self, node): def visit_ForFromStatNode(self, node):
......
...@@ -5884,6 +5884,9 @@ class ParallelStatNode(StatNode, ParallelNode): ...@@ -5884,6 +5884,9 @@ class ParallelStatNode(StatNode, ParallelNode):
# If op is not None, it's a reduction. # If op is not None, it's a reduction.
self.privates = {} self.privates = {}
# [NameNode]
self.assigned_nodes = []
def analyse_declarations(self, env): def analyse_declarations(self, env):
self.body.analyse_declarations(env) self.body.analyse_declarations(env)
...@@ -5901,7 +5904,6 @@ class ParallelStatNode(StatNode, ParallelNode): ...@@ -5901,7 +5904,6 @@ class ParallelStatNode(StatNode, ParallelNode):
def analyse_expressions(self, env): def analyse_expressions(self, env):
self.body.analyse_expressions(env) self.body.analyse_expressions(env)
self.analyse_sharing_attributes(env) self.analyse_sharing_attributes(env)
self.check_independent_iterations()
def analyse_sharing_attributes(self, env): def analyse_sharing_attributes(self, env):
""" """
...@@ -5917,8 +5919,7 @@ class ParallelStatNode(StatNode, ParallelNode): ...@@ -5917,8 +5919,7 @@ class ParallelStatNode(StatNode, ParallelNode):
# consider it too implicit and magicky for users) # consider it too implicit and magicky for users)
if entry in self.parent.assignments: if entry in self.parent.assignments:
error(pos, error(pos,
"Cannot assign to private of outer parallel block, " "Cannot assign to private of outer parallel block")
"as we cannot retain its value after the loop")
continue continue
if not self.is_prange and op: if not self.is_prange and op:
...@@ -5926,21 +5927,11 @@ class ParallelStatNode(StatNode, ParallelNode): ...@@ -5926,21 +5927,11 @@ class ParallelStatNode(StatNode, ParallelNode):
error(pos, "Reductions not allowed for parallel blocks") error(pos, "Reductions not allowed for parallel blocks")
continue continue
if self.is_private(entry):
# lastprivate = self.is_prange and entry == self.target.entry
# By default all variables should have the same values as if # By default all variables should have the same values as if
# executed sequentially # executed sequentially
lastprivate = True lastprivate = True
self.propagate_var_privatization(entry, op, lastprivate) self.propagate_var_privatization(entry, op, lastprivate)
def is_private(self, entry):
"""
True if this scope should declare the variable private, lastprivate
or reduction.
"""
return (self.is_parallel or
(self.parent and entry not in self.parent.privates))
def propagate_var_privatization(self, entry, op, lastprivate): def propagate_var_privatization(self, entry, op, lastprivate):
""" """
Propagate the sharing attributes of a variable. If the privatization is Propagate the sharing attributes of a variable. If the privatization is
...@@ -6029,40 +6020,6 @@ class ParallelStatNode(StatNode, ParallelNode): ...@@ -6029,40 +6020,6 @@ class ParallelStatNode(StatNode, ParallelNode):
code.putln("%s = %s;" % (cname, entry.cname)) code.putln("%s = %s;" % (cname, entry.cname))
entry.cname = cname entry.cname = cname
def check_independent_iterations(self):
"""
This checks for uninitialized thread-private variables, it's far from
fool-proof as it does not take control flow into account, nor
assignment to a variable as both the lhs and rhs. So it detects only
cases like this:
for i in prange(10, nogil=True):
var = x # error, x is private and read before assigned
x = i
Fortunately, it doesn't need to be perfect, as we still initialize
private variables to "invalid" values, such as NULL or NaN whenever
possible.
"""
from Cython.Compiler import ParseTreeTransforms
transform = ParseTreeTransforms.FindUninitializedParallelVars()
transform(self.body)
for entry, pos in transform.used_vars:
if entry in self.privates:
assignment_pos, op = self.assignments[entry]
# Reading reduction variables is valid (in fact necessary)
# before assignment
if not op and pos < assignment_pos:
if self.is_prange:
error(pos, "Expression value depends on previous loop "
"iteration, cannot execute in parallel")
else:
error(pos, "Expression depends on an uninitialized "
"thread-private variable")
def initialize_privates_to_nan(self, code, exclude=None): def initialize_privates_to_nan(self, code, exclude=None):
first = True first = True
...@@ -6739,11 +6696,11 @@ class ParallelRangeNode(ParallelStatNode): ...@@ -6739,11 +6696,11 @@ class ParallelRangeNode(ParallelStatNode):
if not self.is_parallel: if not self.is_parallel:
code.put("#pragma omp for") code.put("#pragma omp for")
self.privatization_insertion_point = code.insertion_point() self.privatization_insertion_point = code.insertion_point()
reduction_codepoint = self.parent.privatization_insertion_point # reduction_codepoint = self.parent.privatization_insertion_point
else: else:
code.put("#pragma omp parallel") code.put("#pragma omp parallel")
self.privatization_insertion_point = code.insertion_point() self.privatization_insertion_point = code.insertion_point()
reduction_codepoint = self.privatization_insertion_point # reduction_codepoint = self.privatization_insertion_point
code.putln("") code.putln("")
code.putln("#endif /* _OPENMP */") code.putln("#endif /* _OPENMP */")
...@@ -6755,6 +6712,11 @@ class ParallelRangeNode(ParallelStatNode): ...@@ -6755,6 +6712,11 @@ class ParallelRangeNode(ParallelStatNode):
code.putln("#ifdef _OPENMP") code.putln("#ifdef _OPENMP")
code.put("#pragma omp for") code.put("#pragma omp for")
# Nested parallelism is not supported, so we can put reductions on the
# for and not on the parallel (but would be valid, but gcc45 bugs on
# the former)
reduction_codepoint = code
for entry, (op, lastprivate) in self.privates.iteritems(): for entry, (op, lastprivate) in self.privates.iteritems():
# Don't declare the index variable as a reduction # Don't declare the index variable as a reduction
if op and op in "+*-&^|" and entry != self.target.entry: if op and op in "+*-&^|" and entry != self.target.entry:
......
...@@ -2338,24 +2338,6 @@ class TransformBuiltinMethods(EnvTransform): ...@@ -2338,24 +2338,6 @@ class TransformBuiltinMethods(EnvTransform):
return node return node
class FindUninitializedParallelVars(CythonTransform, SkipDeclarations):
"""
This transform isn't part of the pipeline, it simply finds all references
to variables in parallel blocks.
"""
def __init__(self):
CythonTransform.__init__(self, None)
self.used_vars = []
def visit_ParallelStatNode(self, node):
return node
def visit_NameNode(self, node):
self.used_vars.append((node.entry, node.pos))
return node
class DebugTransform(CythonTransform): class DebugTransform(CythonTransform):
""" """
Write debug information for this Cython module. Write debug information for this Cython module.
......
...@@ -148,9 +148,10 @@ def create_pipeline(context, mode, exclude_classes=()): ...@@ -148,9 +148,10 @@ def create_pipeline(context, mode, exclude_classes=()):
EmbedSignature(context), EmbedSignature(context),
EarlyReplaceBuiltinCalls(context), ## Necessary? EarlyReplaceBuiltinCalls(context), ## Necessary?
TransformBuiltinMethods(context), ## Necessary? TransformBuiltinMethods(context), ## Necessary?
MarkAssignments(context),
CreateControlFlowGraph(context), CreateControlFlowGraph(context),
RemoveUnreachableCode(context), RemoveUnreachableCode(context),
MarkAssignments(context), # MarkAssignments(context),
MarkOverflowingArithmetic(context), MarkOverflowingArithmetic(context),
IntroduceBufferAuxiliaryVars(context), IntroduceBufferAuxiliaryVars(context),
_check_c_declarations, _check_c_declarations,
......
...@@ -62,6 +62,7 @@ class MarkAssignments(CythonTransform): ...@@ -62,6 +62,7 @@ class MarkAssignments(CythonTransform):
pos = lhs.pos pos = lhs.pos
parallel_node.assignments[lhs.entry] = (pos, inplace_op) parallel_node.assignments[lhs.entry] = (pos, inplace_op)
parallel_node.assigned_nodes.append(lhs)
elif isinstance(lhs, ExprNodes.SequenceNode): elif isinstance(lhs, ExprNodes.SequenceNode):
for arg in lhs.args: for arg in lhs.args:
......
...@@ -103,6 +103,22 @@ i = 0 ...@@ -103,6 +103,22 @@ i = 0
with nogil, cython.parallel.parallel(): with nogil, cython.parallel.parallel():
i += 1 i += 1
# Use of privates after the parallel with block
with nogil, cython.parallel.parallel():
i = 1
print i
i = 2
print i
# Reading of reduction variables in the prange block
cdef int sum = 0
for i in prange(10, nogil=True):
sum += i
with gil:
print sum
_ERRORS = u""" _ERRORS = u"""
e_cython_parallel.pyx:3:8: cython.parallel.parallel is not a module e_cython_parallel.pyx:3:8: cython.parallel.parallel is not a module
e_cython_parallel.pyx:4:0: No such directive: cython.parallel.something e_cython_parallel.pyx:4:0: No such directive: cython.parallel.something
...@@ -117,8 +133,8 @@ e_cython_parallel.pyx:30:9: Can only iterate over an iteration variable ...@@ -117,8 +133,8 @@ e_cython_parallel.pyx:30:9: Can only iterate over an iteration variable
e_cython_parallel.pyx:33:10: Must be of numeric type, not int * e_cython_parallel.pyx:33:10: Must be of numeric type, not int *
e_cython_parallel.pyx:36:33: Closely nested parallel with blocks are disallowed e_cython_parallel.pyx:36:33: Closely nested parallel with blocks are disallowed
e_cython_parallel.pyx:39:12: The parallel directive must be called e_cython_parallel.pyx:39:12: The parallel directive must be called
e_cython_parallel.pyx:45:10: Expression value depends on previous loop iteration, cannot execute in parallel e_cython_parallel.pyx:45:10: local variable 'y' referenced before assignment
e_cython_parallel.pyx:55:9: Expression depends on an uninitialized thread-private variable e_cython_parallel.pyx:55:9: local variable 'y' referenced before assignment
e_cython_parallel.pyx:60:6: Reduction operator '*' is inconsistent with previous reduction operator '+' e_cython_parallel.pyx:60:6: Reduction operator '*' is inconsistent with previous reduction operator '+'
e_cython_parallel.pyx:62:36: cython.parallel.parallel() does not take positional arguments e_cython_parallel.pyx:62:36: cython.parallel.parallel() does not take positional arguments
e_cython_parallel.pyx:65:36: Invalid keyword argument: invalid e_cython_parallel.pyx:65:36: Invalid keyword argument: invalid
...@@ -126,7 +142,9 @@ e_cython_parallel.pyx:73:12: Yield not allowed in parallel sections ...@@ -126,7 +142,9 @@ e_cython_parallel.pyx:73:12: Yield not allowed in parallel sections
e_cython_parallel.pyx:77:16: Yield not allowed in parallel sections e_cython_parallel.pyx:77:16: Yield not allowed in parallel sections
e_cython_parallel.pyx:82:19: Parallel nesting not supported due to bugs in gcc 4.5 e_cython_parallel.pyx:82:19: Parallel nesting not supported due to bugs in gcc 4.5
e_cython_parallel.pyx:87:23: Parallel nesting not supported due to bugs in gcc 4.5 e_cython_parallel.pyx:87:23: Parallel nesting not supported due to bugs in gcc 4.5
e_cython_parallel.pyx:97:19: Cannot assign to private of outer parallel block, as we cannot retain its value after the loop e_cython_parallel.pyx:97:19: Cannot assign to private of outer parallel block
e_cython_parallel.pyx:98:19: Cannot assign to private of outer parallel block, as we cannot retain its value after the loop e_cython_parallel.pyx:98:19: Cannot assign to private of outer parallel block
e_cython_parallel.pyx:104:6: Reductions not allowed for parallel blocks e_cython_parallel.pyx:104:6: Reductions not allowed for parallel blocks
e_cython_parallel.pyx:110:7: local variable 'i' referenced before assignment
e_cython_parallel.pyx:119:17: Cannot read reduction variable in loop body
""" """
...@@ -79,10 +79,11 @@ def test_unsigned_operands(): ...@@ -79,10 +79,11 @@ def test_unsigned_operands():
cdef int step = 1 cdef int step = 1
cdef int steps_taken = 0 cdef int steps_taken = 0
cdef int *steps_takenp = &steps_taken
for i in prange(start, stop, step, nogil=True): for i in prange(start, stop, step, nogil=True):
steps_taken += 1 steps_taken += 1
if steps_taken > 10: if steps_takenp[0] > 10:
abort() abort()
return steps_taken return steps_taken
...@@ -168,7 +169,7 @@ def test_closure_parallel_with_gil(): ...@@ -168,7 +169,7 @@ def test_closure_parallel_with_gil():
for i in prange(10, nogil=True): for i in prange(10, nogil=True):
with gil: with gil:
sum += temp1 + temp2 + i sum += temp1 + temp2 + i
assert abs(sum - sum) == 0 # assert abs(sum - sum) == 0
return sum return sum
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment