forked from OSchip/llvm-project
[mlir][openmp] Added omp.taskloop
This patch adds omp.taskloop operation to OpenMP Dialect along with tests. Reviewed By: peixin Differential Revision: https://reviews.llvm.org/D127380
This commit is contained in:
parent
d36e09cfe5
commit
1063dfc028
|
@ -132,7 +132,10 @@ def ParallelOp : OpenMP_Op<"parallel", [
|
|||
// TODO: remove this once emitAccessorPrefix is set to
|
||||
// kEmitAccessorPrefix_Prefixed for the dialect.
|
||||
/// Returns the reduction variables
|
||||
operand_range getReductionVars() { return reduction_vars(); }
|
||||
SmallVector<Value> getReductionVars() {
|
||||
return SmallVector<Value>(reduction_vars().begin(),
|
||||
reduction_vars().end());
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
|
@ -237,7 +240,10 @@ def SectionsOp : OpenMP_Op<"sections", [AttrSizedOperandSegments,
|
|||
// TODO: remove this once emitAccessorPrefix is set to
|
||||
// kEmitAccessorPrefix_Prefixed for the dialect.
|
||||
/// Returns the reduction variables
|
||||
operand_range getReductionVars() { return reduction_vars(); }
|
||||
SmallVector<Value> getReductionVars() {
|
||||
return SmallVector<Value>(reduction_vars().begin(),
|
||||
reduction_vars().end());
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
|
@ -375,7 +381,10 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|
|||
// TODO: remove this once emitAccessorPrefix is set to
|
||||
// kEmitAccessorPrefix_Prefixed for the dialect.
|
||||
/// Returns the reduction variables
|
||||
operand_range getReductionVars() { return reduction_vars(); }
|
||||
SmallVector<Value> getReductionVars() {
|
||||
return SmallVector<Value>(reduction_vars().begin(),
|
||||
reduction_vars().end());
|
||||
}
|
||||
}];
|
||||
let hasCustomAssemblyFormat = 1;
|
||||
let assemblyFormat = [{
|
||||
|
@ -393,7 +402,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|
|||
custom<ReductionVarList>(
|
||||
$reduction_vars, type($reduction_vars), $reductions
|
||||
) `)`
|
||||
) `for` custom<WsLoopControl>($region, $lowerBound, $upperBound, $step,
|
||||
) `for` custom<LoopControl>($region, $lowerBound, $upperBound, $step,
|
||||
type($step), $inclusive) attr-dict
|
||||
}];
|
||||
let hasVerifier = 1;
|
||||
|
@ -542,11 +551,172 @@ def TaskOp : OpenMP_Op<"task", [AttrSizedOperandSegments,
|
|||
}];
|
||||
let extraClassDeclaration = [{
|
||||
/// Returns the reduction variables
|
||||
operand_range getReductionVars() { return in_reduction_vars(); }
|
||||
SmallVector<Value> getReductionVars() {
|
||||
return SmallVector<Value>(in_reduction_vars().begin(),
|
||||
in_reduction_vars().end());
|
||||
}
|
||||
}];
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def TaskLoopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
|
||||
AutomaticAllocationScope, RecursiveSideEffects,
|
||||
AllTypesMatch<["lowerBound", "upperBound", "step"]>,
|
||||
ReductionClauseInterface]> {
|
||||
let summary = "taskloop construct";
|
||||
let description = [{
|
||||
The taskloop construct specifies that the iterations of one or more
|
||||
associated loops will be executed in parallel using explicit tasks. The
|
||||
iterations are distributed across tasks generated by the construct and
|
||||
scheduled to be executed.
|
||||
|
||||
The `lowerBound` and `upperBound` specify a half-open range: the range
|
||||
includes the lower bound but does not include the upper bound. If the
|
||||
`inclusive` attribute is specified then the upper bound is also included.
|
||||
The `step` specifies the loop step.
|
||||
|
||||
The body region can contain any number of blocks.
|
||||
|
||||
```
|
||||
omp.taskloop <clauses>
|
||||
for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
|
||||
%a = load %arrA[%i1, %i2] : memref<?x?xf32>
|
||||
%b = load %arrB[%i1, %i2] : memref<?x?xf32>
|
||||
%sum = arith.addf %a, %b : f32
|
||||
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
|
||||
omp.terminator
|
||||
}
|
||||
```
|
||||
|
||||
For definitions of "undeferred task", "included task", "final task" and
|
||||
"mergeable task", please check OpenMP Specification.
|
||||
|
||||
When an `if` clause is present on a taskloop construct, and if the `if`
|
||||
clause expression evaluates to `false`, undeferred tasks are generated. The
|
||||
use of a variable in an `if` clause expression of a taskloop construct
|
||||
causes an implicit reference to the variable in all enclosing constructs.
|
||||
|
||||
When a `final` clause is present on a taskloop construct and the `final`
|
||||
clause expression evaluates to `true`, the generated tasks will be final
|
||||
tasks. The use of a variable in a `final` clause expression of a taskloop
|
||||
construct causes an implicit reference to the variable in all enclosing
|
||||
constructs.
|
||||
|
||||
If the `untied` clause is specified, all tasks generated by the taskloop
|
||||
construct are untied tasks.
|
||||
|
||||
When the `mergeable` clause is present on a taskloop construct, each
|
||||
generated task is a mergeable task.
|
||||
|
||||
Reductions can be performed in a loop by specifying reduction accumulator
|
||||
variables in `reduction_vars` or `in_reduction_vars` and symbols referring
|
||||
to reduction declarations in the `reductions` or `in_reductions` attribute.
|
||||
Each reduction is identified by the accumulator it uses and accumulators
|
||||
must not be repeated in the same reduction. The `omp.reduction` operation
|
||||
accepts the accumulator and a partial value which is considered to be
|
||||
produced by the current loop iteration for the given reduction. If multiple
|
||||
values are produced for the same accumulator, i.e. there are multiple
|
||||
`omp.reduction`s, the last value is taken. The reduction declaration
|
||||
specifies how to combine the values from each iteration into the final
|
||||
value, which is available in the accumulator after the loop completes.
|
||||
|
||||
If an `in_reduction` clause is present on the taskloop construct, the
|
||||
behavior is as if each generated task was defined by a task construct on
|
||||
which an `in_reduction` clause with the same reduction operator and list
|
||||
items is present. Thus, the generated tasks are participants of a reduction
|
||||
previously defined by a reduction scoping clause.
|
||||
|
||||
If a `reduction` clause is present on the taskloop construct, the behavior
|
||||
is as if a `task_reduction` clause with the same reduction operator and list
|
||||
items was applied to the implicit taskgroup construct enclosing the taskloop
|
||||
construct. The taskloop construct executes as if each generated task was
|
||||
defined by a task construct on which an `in_reduction` clause with the same
|
||||
reduction operator and list items is present. Thus, the generated tasks are
|
||||
participants of the reduction defined by the `task_reduction` clause that
|
||||
was applied to the implicit taskgroup construct.
|
||||
|
||||
When a `priority` clause is present on a taskloop construct, the generated
|
||||
tasks use the `priority-value` as if it was specified for each individual
|
||||
task. If the `priority` clause is not specified, tasks generated by the
|
||||
taskloop construct have the default task priority (zero).
|
||||
|
||||
The `allocators_vars` and `allocate_vars` arguments are a variadic list of
|
||||
values that specify the memory allocator to be used to obtain storage for
|
||||
private values.
|
||||
|
||||
If a `grainsize` clause is present on the taskloop construct, the number of
|
||||
logical loop iterations assigned to each generated task is greater than or
|
||||
equal to the minimum of the value of the grain-size expression and the
|
||||
number of logical loop iterations, but less than two times the value of the
|
||||
grain-size expression.
|
||||
|
||||
If `num_tasks` is specified, the taskloop construct creates as many tasks as
|
||||
the minimum of the num-tasks expression and the number of logical loop
|
||||
iterations. Each task must have at least one logical loop iteration.
|
||||
|
||||
By default, the taskloop construct executes as if it was enclosed in a
|
||||
taskgroup construct with no statements or directives outside of the taskloop
|
||||
construct. Thus, the taskloop construct creates an implicit taskgroup
|
||||
region. If the `nogroup` clause is present, no implicit taskgroup region is
|
||||
created.
|
||||
}];
|
||||
|
||||
let arguments = (ins Variadic<IntLikeType>:$lowerBound,
|
||||
Variadic<IntLikeType>:$upperBound,
|
||||
Variadic<IntLikeType>:$step,
|
||||
UnitAttr:$inclusive,
|
||||
Optional<I1>:$if_expr,
|
||||
Optional<I1>:$final_expr,
|
||||
UnitAttr:$untied,
|
||||
UnitAttr:$mergeable,
|
||||
Variadic<OpenMP_PointerLikeType>:$in_reduction_vars,
|
||||
OptionalAttr<SymbolRefArrayAttr>:$in_reductions,
|
||||
Variadic<OpenMP_PointerLikeType>:$reduction_vars,
|
||||
OptionalAttr<SymbolRefArrayAttr>:$reductions,
|
||||
Optional<IntLikeType>:$priority,
|
||||
Variadic<AnyType>:$allocate_vars,
|
||||
Variadic<AnyType>:$allocators_vars,
|
||||
Optional<IntLikeType>: $grain_size,
|
||||
Optional<IntLikeType>: $num_tasks,
|
||||
UnitAttr: $nogroup);
|
||||
|
||||
let regions = (region AnyRegion:$region);
|
||||
|
||||
let assemblyFormat = [{
|
||||
oilist(`if` `(` $if_expr `)`
|
||||
|`final` `(` $final_expr `)`
|
||||
|`untied` $untied
|
||||
|`mergeable` $mergeable
|
||||
|`in_reduction` `(`
|
||||
custom<ReductionVarList>(
|
||||
$in_reduction_vars, type($in_reduction_vars), $in_reductions
|
||||
) `)`
|
||||
|`reduction` `(`
|
||||
custom<ReductionVarList>(
|
||||
$reduction_vars, type($reduction_vars), $reductions
|
||||
) `)`
|
||||
|`priority` `(` $priority `:` type($priority) `)`
|
||||
|`allocate` `(`
|
||||
custom<AllocateAndAllocator>(
|
||||
$allocate_vars, type($allocate_vars),
|
||||
$allocators_vars, type($allocators_vars)
|
||||
) `)`
|
||||
|`grain_size` `(` $grain_size `:` type($grain_size) `)`
|
||||
|`num_tasks` `(` $num_tasks `:` type($num_tasks) `)`
|
||||
|`nogroup` $nogroup
|
||||
) `for` custom<LoopControl>($region, $lowerBound, $upperBound, $step,
|
||||
type($step), $inclusive) attr-dict
|
||||
}];
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
/// Returns the reduction variables
|
||||
SmallVector<Value> getReductionVars();
|
||||
void getEffects(SmallVectorImpl<MemoryEffects::EffectInstance> &effects);
|
||||
}];
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def TaskGroupOp : OpenMP_Op<"taskgroup", [AttrSizedOperandSegments,
|
||||
ReductionClauseInterface,
|
||||
AutomaticAllocationScope]> {
|
||||
|
|
|
@ -40,7 +40,7 @@ def ReductionClauseInterface : OpInterface<"ReductionClauseInterface"> {
|
|||
|
||||
let methods = [
|
||||
InterfaceMethod<
|
||||
"Get reduction vars", "::mlir::Operation::operand_range",
|
||||
"Get reduction vars", "::mlir::SmallVector<::mlir::Value>",
|
||||
"getReductionVars">,
|
||||
];
|
||||
}
|
||||
|
|
|
@ -523,11 +523,11 @@ LogicalResult SingleOp::verify() {
|
|||
/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` inclusive? steps
|
||||
/// steps := `step` `(`ssa-id-list`)`
|
||||
ParseResult
|
||||
parseWsLoopControl(OpAsmParser &parser, Region ®ion,
|
||||
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
|
||||
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
|
||||
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
|
||||
SmallVectorImpl<Type> &loopVarTypes, UnitAttr &inclusive) {
|
||||
parseLoopControl(OpAsmParser &parser, Region ®ion,
|
||||
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
|
||||
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
|
||||
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
|
||||
SmallVectorImpl<Type> &loopVarTypes, UnitAttr &inclusive) {
|
||||
// Parse an opening `(` followed by induction variables followed by `)`
|
||||
SmallVector<OpAsmParser::Argument> ivs;
|
||||
Type loopVarType;
|
||||
|
@ -557,10 +557,10 @@ parseWsLoopControl(OpAsmParser &parser, Region ®ion,
|
|||
return parser.parseRegion(region, ivs);
|
||||
}
|
||||
|
||||
void printWsLoopControl(OpAsmPrinter &p, Operation *op, Region ®ion,
|
||||
ValueRange lowerBound, ValueRange upperBound,
|
||||
ValueRange steps, TypeRange loopVarTypes,
|
||||
UnitAttr inclusive) {
|
||||
void printLoopControl(OpAsmPrinter &p, Operation *op, Region ®ion,
|
||||
ValueRange lowerBound, ValueRange upperBound,
|
||||
ValueRange steps, TypeRange loopVarTypes,
|
||||
UnitAttr inclusive) {
|
||||
auto args = region.front().getArguments();
|
||||
p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound
|
||||
<< ") to (" << upperBound << ") ";
|
||||
|
@ -736,6 +736,43 @@ LogicalResult TaskGroupOp::verify() {
|
|||
task_reduction_vars());
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TaskLoopOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
SmallVector<Value> TaskLoopOp::getReductionVars() {
|
||||
SmallVector<Value> all_reduction_nvars(in_reduction_vars().begin(),
|
||||
in_reduction_vars().end());
|
||||
all_reduction_nvars.insert(all_reduction_nvars.end(),
|
||||
reduction_vars().begin(), reduction_vars().end());
|
||||
return all_reduction_nvars;
|
||||
}
|
||||
|
||||
LogicalResult TaskLoopOp::verify() {
|
||||
if (allocate_vars().size() != allocators_vars().size())
|
||||
return emitError(
|
||||
"expected equal sizes for allocate and allocator variables");
|
||||
if (failed(verifyReductionVarList(*this, reductions(), reduction_vars())) ||
|
||||
failed(
|
||||
verifyReductionVarList(*this, in_reductions(), in_reduction_vars())))
|
||||
return failure();
|
||||
|
||||
if (reduction_vars().size() > 0 && nogroup())
|
||||
return emitError("if a reduction clause is present on the taskloop "
|
||||
"directive, the nogroup clause must not be specified");
|
||||
for (auto var : reduction_vars()) {
|
||||
if (llvm::is_contained(in_reduction_vars(), var))
|
||||
return emitError("the same list item cannot appear in both a reduction "
|
||||
"and an in_reduction clause");
|
||||
}
|
||||
|
||||
if (grain_size() && num_tasks()) {
|
||||
return emitError(
|
||||
"the grainsize clause and num_tasks clause are mutually exclusive and "
|
||||
"may not appear on the same taskloop directive");
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// WsLoopOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -1294,3 +1294,128 @@ func.func @omp_cancellationpoint2() {
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
|
||||
%testmemref = "test.memref"() : () -> (memref<i32>)
|
||||
// expected-error @below {{expected equal sizes for allocate and allocator variables}}
|
||||
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testmemref) ({
|
||||
^bb0(%arg3: i32, %arg4: i32):
|
||||
"omp.terminator"() : () -> ()
|
||||
}) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, memref<i32>) -> ()
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
|
||||
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
|
||||
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({
|
||||
^bb0(%arg3: i32, %arg4: i32):
|
||||
"omp.terminator"() : () -> ()
|
||||
}) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0]> : vector<12xi32>, reductions = [@add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>, !llvm.ptr<f32>) -> ()
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
|
||||
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
|
||||
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32) ({
|
||||
^bb0(%arg3: i32, %arg4: i32):
|
||||
"omp.terminator"() : () -> ()
|
||||
}) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0]> : vector<12xi32>, reductions = [@add_f32, @add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>) -> ()
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
|
||||
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
|
||||
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({
|
||||
^bb0(%arg3: i32, %arg4: i32):
|
||||
"omp.terminator"() : () -> ()
|
||||
}) {in_reductions = [@add_f32], operand_segment_sizes = dense<[2, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>, !llvm.ptr<f32>) -> ()
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
|
||||
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
|
||||
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32_2) ({
|
||||
^bb0(%arg3: i32, %arg4: i32):
|
||||
"omp.terminator"() : () -> ()
|
||||
}) {in_reductions = [@add_f32, @add_f32], operand_segment_sizes = dense<[2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>) -> ()
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
omp.reduction.declare @add_f32 : f32
|
||||
init {
|
||||
^bb0(%arg: f32):
|
||||
%0 = arith.constant 0.0 : f32
|
||||
omp.yield (%0 : f32)
|
||||
}
|
||||
combiner {
|
||||
^bb1(%arg0: f32, %arg1: f32):
|
||||
%1 = arith.addf %arg0, %arg1 : f32
|
||||
omp.yield (%1 : f32)
|
||||
}
|
||||
|
||||
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
|
||||
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
// expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}}
|
||||
omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>) nogroup
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
omp.terminator
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
omp.reduction.declare @add_f32 : f32
|
||||
init {
|
||||
^bb0(%arg: f32):
|
||||
%0 = arith.constant 0.0 : f32
|
||||
omp.yield (%0 : f32)
|
||||
}
|
||||
combiner {
|
||||
^bb1(%arg0: f32, %arg1: f32):
|
||||
%1 = arith.addf %arg0, %arg1 : f32
|
||||
omp.yield (%1 : f32)
|
||||
}
|
||||
|
||||
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
|
||||
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
// expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}}
|
||||
omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>) in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>)
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
omp.terminator
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
|
||||
%testi64 = "test.i64"() : () -> (i64)
|
||||
// expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}}
|
||||
omp.taskloop grain_size(%testi64: i64) num_tasks(%testi64: i64)
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
omp.terminator
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s
|
||||
// RUN: mlir-opt %s | mlir-opt | FileCheck %s
|
||||
|
||||
func.func @omp_barrier() -> () {
|
||||
// CHECK: omp.barrier
|
||||
|
@ -1394,8 +1394,6 @@ func.func @omp_task(%bool_var: i1, %i64_var: i64, %i32_var: i32, %data_var: memr
|
|||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @omp_threadprivate() {
|
||||
%0 = arith.constant 1 : i32
|
||||
%1 = arith.constant 2 : i32
|
||||
|
@ -1528,3 +1526,141 @@ func.func @omp_taskgroup_multiple_tasks() -> () {
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @omp_taskloop
|
||||
func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
|
||||
|
||||
// CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
|
||||
omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
|
||||
omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step) {
|
||||
// CHECK: test.op1
|
||||
"test.op1"(%lb) : (i32) -> ()
|
||||
// CHECK: test.op2
|
||||
"test.op2"() : () -> ()
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) inclusive step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) inclusive step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
%testbool = "test.bool"() : () -> (i1)
|
||||
|
||||
// CHECK: omp.taskloop if(%{{[^)]+}})
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop if(%testbool)
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: omp.taskloop final(%{{[^)]+}})
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop final(%testbool)
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: omp.taskloop untied
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop untied
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: omp.taskloop mergeable
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop mergeable
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
%testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
%testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
|
||||
// CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>, @add_f32 -> %{{.+}} : !llvm.ptr<f32>)
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>)
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: omp.taskloop reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>, @add_f32 -> %{{.+}} : !llvm.ptr<f32>)
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>)
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>) reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>)
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>) reduction(@add_f32 -> %testf32_2 : !llvm.ptr<f32>)
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
%testi32 = "test.i32"() : () -> (i32)
|
||||
// CHECK: omp.taskloop priority(%{{[^:]+}}: i32)
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop priority(%testi32: i32)
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
%testmemref = "test.memref"() : () -> (memref<i32>)
|
||||
// CHECK: omp.taskloop allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>)
|
||||
omp.taskloop allocate(%testmemref : memref<i32> -> %testmemref : memref<i32>)
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
%testi64 = "test.i64"() : () -> (i64)
|
||||
// CHECK: omp.taskloop grain_size(%{{[^:]+}}: i64)
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop grain_size(%testi64: i64)
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64)
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop num_tasks(%testi64: i64)
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: omp.taskloop nogroup
|
||||
// CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
|
||||
omp.taskloop nogroup
|
||||
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
// CHECK: return
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue