[mlir][openmp] Added omp.taskloop

This patch adds omp.taskloop operation to OpenMP Dialect along with
tests.

Reviewed By: peixin

Differential Revision: https://reviews.llvm.org/D127380
This commit is contained in:
Shraiysh Vaishay 2022-07-04 10:38:58 +05:30
parent d36e09cfe5
commit 1063dfc028
5 changed files with 486 additions and 18 deletions

View File

@ -132,7 +132,10 @@ def ParallelOp : OpenMP_Op<"parallel", [
// TODO: remove this once emitAccessorPrefix is set to
// kEmitAccessorPrefix_Prefixed for the dialect.
/// Returns the reduction variables
operand_range getReductionVars() { return reduction_vars(); }
SmallVector<Value> getReductionVars() {
return SmallVector<Value>(reduction_vars().begin(),
reduction_vars().end());
}
}];
}
@ -237,7 +240,10 @@ def SectionsOp : OpenMP_Op<"sections", [AttrSizedOperandSegments,
// TODO: remove this once emitAccessorPrefix is set to
// kEmitAccessorPrefix_Prefixed for the dialect.
/// Returns the reduction variables
operand_range getReductionVars() { return reduction_vars(); }
SmallVector<Value> getReductionVars() {
return SmallVector<Value>(reduction_vars().begin(),
reduction_vars().end());
}
}];
}
@ -375,7 +381,10 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
// TODO: remove this once emitAccessorPrefix is set to
// kEmitAccessorPrefix_Prefixed for the dialect.
/// Returns the reduction variables
operand_range getReductionVars() { return reduction_vars(); }
SmallVector<Value> getReductionVars() {
return SmallVector<Value>(reduction_vars().begin(),
reduction_vars().end());
}
}];
let hasCustomAssemblyFormat = 1;
let assemblyFormat = [{
@ -393,7 +402,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
custom<ReductionVarList>(
$reduction_vars, type($reduction_vars), $reductions
) `)`
) `for` custom<WsLoopControl>($region, $lowerBound, $upperBound, $step,
) `for` custom<LoopControl>($region, $lowerBound, $upperBound, $step,
type($step), $inclusive) attr-dict
}];
let hasVerifier = 1;
@ -542,11 +551,172 @@ def TaskOp : OpenMP_Op<"task", [AttrSizedOperandSegments,
}];
let extraClassDeclaration = [{
/// Returns the reduction variables
operand_range getReductionVars() { return in_reduction_vars(); }
SmallVector<Value> getReductionVars() {
return SmallVector<Value>(in_reduction_vars().begin(),
in_reduction_vars().end());
}
}];
let hasVerifier = 1;
}
def TaskLoopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
AutomaticAllocationScope, RecursiveSideEffects,
AllTypesMatch<["lowerBound", "upperBound", "step"]>,
ReductionClauseInterface]> {
let summary = "taskloop construct";
let description = [{
The taskloop construct specifies that the iterations of one or more
associated loops will be executed in parallel using explicit tasks. The
iterations are distributed across tasks generated by the construct and
scheduled to be executed.
The `lowerBound` and `upperBound` specify a half-open range: the range
includes the lower bound but does not include the upper bound. If the
`inclusive` attribute is specified then the upper bound is also included.
The `step` specifies the loop step.
The body region can contain any number of blocks.
```
omp.taskloop <clauses>
for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
%a = load %arrA[%i1, %i2] : memref<?x?xf32>
%b = load %arrB[%i1, %i2] : memref<?x?xf32>
%sum = arith.addf %a, %b : f32
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.terminator
}
```
For definitions of "undeferred task", "included task", "final task" and
"mergeable task", please check OpenMP Specification.
When an `if` clause is present on a taskloop construct, and if the `if`
clause expression evaluates to `false`, undeferred tasks are generated. The
use of a variable in an `if` clause expression of a taskloop construct
causes an implicit reference to the variable in all enclosing constructs.
When a `final` clause is present on a taskloop construct and the `final`
clause expression evaluates to `true`, the generated tasks will be final
tasks. The use of a variable in a `final` clause expression of a taskloop
construct causes an implicit reference to the variable in all enclosing
constructs.
If the `untied` clause is specified, all tasks generated by the taskloop
construct are untied tasks.
When the `mergeable` clause is present on a taskloop construct, each
generated task is a mergeable task.
Reductions can be performed in a loop by specifying reduction accumulator
variables in `reduction_vars` or `in_reduction_vars` and symbols referring
to reduction declarations in the `reductions` or `in_reductions` attribute.
Each reduction is identified by the accumulator it uses and accumulators
must not be repeated in the same reduction. The `omp.reduction` operation
accepts the accumulator and a partial value which is considered to be
produced by the current loop iteration for the given reduction. If multiple
values are produced for the same accumulator, i.e. there are multiple
`omp.reduction`s, the last value is taken. The reduction declaration
specifies how to combine the values from each iteration into the final
value, which is available in the accumulator after the loop completes.
If an `in_reduction` clause is present on the taskloop construct, the
behavior is as if each generated task was defined by a task construct on
which an `in_reduction` clause with the same reduction operator and list
items is present. Thus, the generated tasks are participants of a reduction
previously defined by a reduction scoping clause.
If a `reduction` clause is present on the taskloop construct, the behavior
is as if a `task_reduction` clause with the same reduction operator and list
items was applied to the implicit taskgroup construct enclosing the taskloop
construct. The taskloop construct executes as if each generated task was
defined by a task construct on which an `in_reduction` clause with the same
reduction operator and list items is present. Thus, the generated tasks are
participants of the reduction defined by the `task_reduction` clause that
was applied to the implicit taskgroup construct.
When a `priority` clause is present on a taskloop construct, the generated
tasks use the `priority-value` as if it was specified for each individual
task. If the `priority` clause is not specified, tasks generated by the
taskloop construct have the default task priority (zero).
The `allocators_vars` and `allocate_vars` arguments are a variadic list of
values that specify the memory allocator to be used to obtain storage for
private values.
If a `grainsize` clause is present on the taskloop construct, the number of
logical loop iterations assigned to each generated task is greater than or
equal to the minimum of the value of the grain-size expression and the
number of logical loop iterations, but less than two times the value of the
grain-size expression.
If `num_tasks` is specified, the taskloop construct creates as many tasks as
the minimum of the num-tasks expression and the number of logical loop
iterations. Each task must have at least one logical loop iteration.
By default, the taskloop construct executes as if it was enclosed in a
taskgroup construct with no statements or directives outside of the taskloop
construct. Thus, the taskloop construct creates an implicit taskgroup
region. If the `nogroup` clause is present, no implicit taskgroup region is
created.
}];
let arguments = (ins Variadic<IntLikeType>:$lowerBound,
Variadic<IntLikeType>:$upperBound,
Variadic<IntLikeType>:$step,
UnitAttr:$inclusive,
Optional<I1>:$if_expr,
Optional<I1>:$final_expr,
UnitAttr:$untied,
UnitAttr:$mergeable,
Variadic<OpenMP_PointerLikeType>:$in_reduction_vars,
OptionalAttr<SymbolRefArrayAttr>:$in_reductions,
Variadic<OpenMP_PointerLikeType>:$reduction_vars,
OptionalAttr<SymbolRefArrayAttr>:$reductions,
Optional<IntLikeType>:$priority,
Variadic<AnyType>:$allocate_vars,
Variadic<AnyType>:$allocators_vars,
Optional<IntLikeType>: $grain_size,
Optional<IntLikeType>: $num_tasks,
UnitAttr: $nogroup);
let regions = (region AnyRegion:$region);
let assemblyFormat = [{
oilist(`if` `(` $if_expr `)`
|`final` `(` $final_expr `)`
|`untied` $untied
|`mergeable` $mergeable
|`in_reduction` `(`
custom<ReductionVarList>(
$in_reduction_vars, type($in_reduction_vars), $in_reductions
) `)`
|`reduction` `(`
custom<ReductionVarList>(
$reduction_vars, type($reduction_vars), $reductions
) `)`
|`priority` `(` $priority `:` type($priority) `)`
|`allocate` `(`
custom<AllocateAndAllocator>(
$allocate_vars, type($allocate_vars),
$allocators_vars, type($allocators_vars)
) `)`
|`grain_size` `(` $grain_size `:` type($grain_size) `)`
|`num_tasks` `(` $num_tasks `:` type($num_tasks) `)`
|`nogroup` $nogroup
) `for` custom<LoopControl>($region, $lowerBound, $upperBound, $step,
type($step), $inclusive) attr-dict
}];
let extraClassDeclaration = [{
/// Returns the reduction variables
SmallVector<Value> getReductionVars();
void getEffects(SmallVectorImpl<MemoryEffects::EffectInstance> &effects);
}];
let hasVerifier = 1;
}
def TaskGroupOp : OpenMP_Op<"taskgroup", [AttrSizedOperandSegments,
ReductionClauseInterface,
AutomaticAllocationScope]> {

View File

@ -40,7 +40,7 @@ def ReductionClauseInterface : OpInterface<"ReductionClauseInterface"> {
let methods = [
InterfaceMethod<
"Get reduction vars", "::mlir::Operation::operand_range",
"Get reduction vars", "::mlir::SmallVector<::mlir::Value>",
"getReductionVars">,
];
}

View File

@ -523,11 +523,11 @@ LogicalResult SingleOp::verify() {
/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` inclusive? steps
/// steps := `step` `(`ssa-id-list`)`
ParseResult
parseWsLoopControl(OpAsmParser &parser, Region &region,
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
SmallVectorImpl<Type> &loopVarTypes, UnitAttr &inclusive) {
parseLoopControl(OpAsmParser &parser, Region &region,
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
SmallVectorImpl<Type> &loopVarTypes, UnitAttr &inclusive) {
// Parse an opening `(` followed by induction variables followed by `)`
SmallVector<OpAsmParser::Argument> ivs;
Type loopVarType;
@ -557,10 +557,10 @@ parseWsLoopControl(OpAsmParser &parser, Region &region,
return parser.parseRegion(region, ivs);
}
void printWsLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
ValueRange lowerBound, ValueRange upperBound,
ValueRange steps, TypeRange loopVarTypes,
UnitAttr inclusive) {
void printLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
ValueRange lowerBound, ValueRange upperBound,
ValueRange steps, TypeRange loopVarTypes,
UnitAttr inclusive) {
auto args = region.front().getArguments();
p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound
<< ") to (" << upperBound << ") ";
@ -736,6 +736,43 @@ LogicalResult TaskGroupOp::verify() {
task_reduction_vars());
}
//===----------------------------------------------------------------------===//
// TaskLoopOp
//===----------------------------------------------------------------------===//
SmallVector<Value> TaskLoopOp::getReductionVars() {
SmallVector<Value> all_reduction_nvars(in_reduction_vars().begin(),
in_reduction_vars().end());
all_reduction_nvars.insert(all_reduction_nvars.end(),
reduction_vars().begin(), reduction_vars().end());
return all_reduction_nvars;
}
LogicalResult TaskLoopOp::verify() {
if (allocate_vars().size() != allocators_vars().size())
return emitError(
"expected equal sizes for allocate and allocator variables");
if (failed(verifyReductionVarList(*this, reductions(), reduction_vars())) ||
failed(
verifyReductionVarList(*this, in_reductions(), in_reduction_vars())))
return failure();
if (reduction_vars().size() > 0 && nogroup())
return emitError("if a reduction clause is present on the taskloop "
"directive, the nogroup clause must not be specified");
for (auto var : reduction_vars()) {
if (llvm::is_contained(in_reduction_vars(), var))
return emitError("the same list item cannot appear in both a reduction "
"and an in_reduction clause");
}
if (grain_size() && num_tasks()) {
return emitError(
"the grainsize clause and num_tasks clause are mutually exclusive and "
"may not appear on the same taskloop directive");
}
return success();
}
//===----------------------------------------------------------------------===//
// WsLoopOp
//===----------------------------------------------------------------------===//

View File

@ -1294,3 +1294,128 @@ func.func @omp_cancellationpoint2() {
}
return
}
// -----
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testmemref = "test.memref"() : () -> (memref<i32>)
// expected-error @below {{expected equal sizes for allocate and allocator variables}}
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testmemref) ({
^bb0(%arg3: i32, %arg4: i32):
"omp.terminator"() : () -> ()
}) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, memref<i32>) -> ()
return
}
// -----
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({
^bb0(%arg3: i32, %arg4: i32):
"omp.terminator"() : () -> ()
}) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0]> : vector<12xi32>, reductions = [@add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>, !llvm.ptr<f32>) -> ()
return
}
// -----
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32) ({
^bb0(%arg3: i32, %arg4: i32):
"omp.terminator"() : () -> ()
}) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0]> : vector<12xi32>, reductions = [@add_f32, @add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>) -> ()
return
}
// -----
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({
^bb0(%arg3: i32, %arg4: i32):
"omp.terminator"() : () -> ()
}) {in_reductions = [@add_f32], operand_segment_sizes = dense<[2, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>, !llvm.ptr<f32>) -> ()
return
}
// -----
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32_2) ({
^bb0(%arg3: i32, %arg4: i32):
"omp.terminator"() : () -> ()
}) {in_reductions = [@add_f32, @add_f32], operand_segment_sizes = dense<[2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>) -> ()
return
}
// -----
omp.reduction.declare @add_f32 : f32
init {
^bb0(%arg: f32):
%0 = arith.constant 0.0 : f32
omp.yield (%0 : f32)
}
combiner {
^bb1(%arg0: f32, %arg1: f32):
%1 = arith.addf %arg0, %arg1 : f32
omp.yield (%1 : f32)
}
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
// expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}}
omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>) nogroup
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.terminator
}
return
}
// -----
omp.reduction.declare @add_f32 : f32
init {
^bb0(%arg: f32):
%0 = arith.constant 0.0 : f32
omp.yield (%0 : f32)
}
combiner {
^bb1(%arg0: f32, %arg1: f32):
%1 = arith.addf %arg0, %arg1 : f32
omp.yield (%1 : f32)
}
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
// expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}}
omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>) in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.terminator
}
return
}
// -----
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testi64 = "test.i64"() : () -> (i64)
// expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}}
omp.taskloop grain_size(%testi64: i64) num_tasks(%testi64: i64)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.terminator
}
return
}

View File

@ -1,4 +1,4 @@
// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s
// RUN: mlir-opt %s | mlir-opt | FileCheck %s
func.func @omp_barrier() -> () {
// CHECK: omp.barrier
@ -1394,8 +1394,6 @@ func.func @omp_task(%bool_var: i1, %i64_var: i64, %i32_var: i32, %data_var: memr
return
}
// -----
func.func @omp_threadprivate() {
%0 = arith.constant 1 : i32
%1 = arith.constant 2 : i32
@ -1528,3 +1526,141 @@ func.func @omp_taskgroup_multiple_tasks() -> () {
}
return
}
// CHECK-LABEL: @omp_taskloop
func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
// CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step) {
// CHECK: omp.terminator
omp.terminator
}
// CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step) {
// CHECK: test.op1
"test.op1"(%lb) : (i32) -> ()
// CHECK: test.op2
"test.op2"() : () -> ()
// CHECK: omp.terminator
omp.terminator
}
// CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
// CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) inclusive step (%{{.+}}, %{{.+}}) {
omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) inclusive step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
%testbool = "test.bool"() : () -> (i1)
// CHECK: omp.taskloop if(%{{[^)]+}})
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop if(%testbool)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
// CHECK: omp.taskloop final(%{{[^)]+}})
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop final(%testbool)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
// CHECK: omp.taskloop untied
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop untied
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
// CHECK: omp.taskloop mergeable
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop mergeable
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
// CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>, @add_f32 -> %{{.+}} : !llvm.ptr<f32>)
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
// CHECK: omp.taskloop reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>, @add_f32 -> %{{.+}} : !llvm.ptr<f32>)
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
// CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>) reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>)
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>) reduction(@add_f32 -> %testf32_2 : !llvm.ptr<f32>)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
%testi32 = "test.i32"() : () -> (i32)
// CHECK: omp.taskloop priority(%{{[^:]+}}: i32)
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop priority(%testi32: i32)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
%testmemref = "test.memref"() : () -> (memref<i32>)
// CHECK: omp.taskloop allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>)
omp.taskloop allocate(%testmemref : memref<i32> -> %testmemref : memref<i32>)
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
%testi64 = "test.i64"() : () -> (i64)
// CHECK: omp.taskloop grain_size(%{{[^:]+}}: i64)
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop grain_size(%testi64: i64)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
// CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64)
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop num_tasks(%testi64: i64)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
// CHECK: omp.taskloop nogroup
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
omp.taskloop nogroup
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
// CHECK: omp.terminator
omp.terminator
}
// CHECK: return
return
}