forked from OSchip/llvm-project
[NFC][OpenMP] Fix worksharing-loop
1. Remove the redundant collapse clause in MLIR OpenMP worksharing-loop operation. 2. Fix several typos. 3. Refactor the chunk size type conversion since CreateSExtOrTrunc has both type check and type conversion. Reviewed By: kiranchandramohan Differential Revision: https://reviews.llvm.org/D128338
This commit is contained in:
parent
370127b7d5
commit
1795f8cd2e
|
@ -746,8 +746,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
llvm::SmallVector<mlir::Value> lowerBound, upperBound, step, linearVars,
|
||||
linearStepVars, reductionVars;
|
||||
mlir::Value scheduleChunkClauseOperand;
|
||||
mlir::Attribute scheduleClauseOperand, collapseClauseOperand,
|
||||
noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
|
||||
mlir::Attribute scheduleClauseOperand, noWaitClauseOperand,
|
||||
orderedClauseOperand, orderClauseOperand;
|
||||
const auto &loopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
|
||||
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);
|
||||
|
||||
|
@ -848,7 +848,6 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
scheduleClauseOperand.dyn_cast_or_null<omp::ClauseScheduleKindAttr>(),
|
||||
scheduleChunkClauseOperand, /*schedule_modifiers=*/nullptr,
|
||||
/*simd_modifier=*/nullptr,
|
||||
collapseClauseOperand.dyn_cast_or_null<IntegerAttr>(),
|
||||
noWaitClauseOperand.dyn_cast_or_null<UnitAttr>(),
|
||||
orderedClauseOperand.dyn_cast_or_null<IntegerAttr>(),
|
||||
orderClauseOperand.dyn_cast_or_null<omp::ClauseOrderKindAttr>(),
|
||||
|
@ -867,13 +866,6 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
} else {
|
||||
wsLoopOp.ordered_valAttr(firOpBuilder.getI64IntegerAttr(0));
|
||||
}
|
||||
} else if (const auto &collapseClause =
|
||||
std::get_if<Fortran::parser::OmpClause::Collapse>(
|
||||
&clause.u)) {
|
||||
const auto *expr = Fortran::semantics::GetExpr(collapseClause->v);
|
||||
const std::optional<std::int64_t> collapseValue =
|
||||
Fortran::evaluate::ToInt64(*expr);
|
||||
wsLoopOp.collapse_valAttr(firOpBuilder.getI64IntegerAttr(*collapseValue));
|
||||
} else if (const auto &scheduleClause =
|
||||
std::get_if<Fortran::parser::OmpClause::Schedule>(
|
||||
&clause.u)) {
|
||||
|
|
|
@ -39,7 +39,7 @@ program wsloop_collapse
|
|||
do i = 1, a
|
||||
do j= 1, b
|
||||
do k = 1, c
|
||||
! CHECK: omp.wsloop collapse(3) for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
|
||||
! CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
|
||||
! CHECK: fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i32>
|
||||
! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
|
||||
! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
|
||||
|
|
|
@ -22,7 +22,7 @@ program wsloop_variable
|
|||
!CHECK: %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
|
||||
!CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
|
||||
!CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
|
||||
!CHECK: omp.wsloop collapse(2) for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
|
||||
!CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
|
||||
!CHECK: fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i64>
|
||||
!CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i64>
|
||||
!CHECK: %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i64>
|
||||
|
|
|
@ -308,7 +308,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|
|||
`linear_step_vars` variadic lists should contain the same number of
|
||||
elements.
|
||||
|
||||
Reductions can be performed in a workshare loop by specifying reduction
|
||||
Reductions can be performed in a worksharing-loop by specifying reduction
|
||||
accumulator variables in `reduction_vars` and symbols referring to reduction
|
||||
declarations in the `reductions` attribute. Each reduction is identified
|
||||
by the accumulator it uses and accumulators must not be repeated in the same
|
||||
|
@ -325,8 +325,9 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|
|||
The optional `schedule_chunk_var` associated with this determines further
|
||||
controls this distribution.
|
||||
|
||||
The optional `collapse_val` attribute specifies the number of loops which
|
||||
are collapsed to form the worksharing loop.
|
||||
Collapsed loops are represented by the worksharing-loop having a list of
|
||||
indices, bounds and steps where the size of the list is equal to the
|
||||
collapse value.
|
||||
|
||||
The `nowait` attribute, when present, signifies that there should be no
|
||||
implicit barrier at the end of the loop.
|
||||
|
@ -351,7 +352,6 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|
|||
Optional<AnyType>:$schedule_chunk_var,
|
||||
OptionalAttr<ScheduleModifierAttr>:$schedule_modifier,
|
||||
UnitAttr:$simd_modifier,
|
||||
Confined<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$collapse_val,
|
||||
UnitAttr:$nowait,
|
||||
Confined<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$ordered_val,
|
||||
OptionalAttr<OrderKindAttr>:$order_val,
|
||||
|
@ -366,7 +366,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|
|||
let regions = (region AnyRegion:$region);
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
/// Returns the number of loops in the workshape loop nest.
|
||||
/// Returns the number of loops in the worksharing-loop nest.
|
||||
unsigned getNumLoops() { return lowerBound().size(); }
|
||||
|
||||
/// Returns the number of reduction variables.
|
||||
|
@ -386,7 +386,6 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|
|||
custom<ScheduleClause>(
|
||||
$schedule_val, $schedule_modifier, $simd_modifier,
|
||||
$schedule_chunk_var, type($schedule_chunk_var)) `)`
|
||||
|`collapse` `(` $collapse_val `)`
|
||||
|`nowait` $nowait
|
||||
|`ordered` `(` $ordered_val `)`
|
||||
|`order` `(` custom<ClauseAttr>($order_val) `)`
|
||||
|
|
|
@ -748,8 +748,8 @@ void WsLoopOp::build(OpBuilder &builder, OperationState &state,
|
|||
/*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(),
|
||||
/*reductions=*/nullptr, /*schedule_val=*/nullptr,
|
||||
/*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
|
||||
/*simd_modifier=*/false, /*collapse_val=*/nullptr, /*nowait=*/false,
|
||||
/*ordered_val=*/nullptr, /*order_val=*/nullptr, /*inclusive=*/false);
|
||||
/*simd_modifier=*/false, /*nowait=*/false, /*ordered_val=*/nullptr,
|
||||
/*order_val=*/nullptr, /*inclusive=*/false);
|
||||
state.addAttributes(attributes);
|
||||
}
|
||||
|
||||
|
|
|
@ -696,15 +696,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
|
|||
if (loop.schedule_chunk_var()) {
|
||||
llvm::Value *chunkVar =
|
||||
moduleTranslation.lookupValue(loop.schedule_chunk_var());
|
||||
llvm::Type *chunkVarType = chunkVar->getType();
|
||||
assert(chunkVarType->isIntegerTy() &&
|
||||
"chunk size must be one integer expression");
|
||||
if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth())
|
||||
chunk = builder.CreateSExt(chunkVar, ivType);
|
||||
else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth())
|
||||
chunk = builder.CreateTrunc(chunkVar, ivType);
|
||||
else
|
||||
chunk = chunkVar;
|
||||
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
|
||||
}
|
||||
|
||||
SmallVector<omp::ReductionDeclareOp> reductionDecls;
|
||||
|
|
|
@ -136,12 +136,12 @@ func.func @omp_parallel_pretty(%data_var : memref<i32>, %if_cond : i1, %num_thre
|
|||
// CHECK-LABEL: omp_wsloop
|
||||
func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32) -> () {
|
||||
|
||||
// CHECK: omp.wsloop collapse(2) ordered(1)
|
||||
// CHECK: omp.wsloop ordered(1)
|
||||
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
|
||||
"omp.wsloop" (%lb, %ub, %step) ({
|
||||
^bb0(%iv: index):
|
||||
omp.yield
|
||||
}) {operand_segment_sizes = dense<[1,1,1,0,0,0,0]> : vector<7xi32>, collapse_val = 2, ordered_val = 1} :
|
||||
}) {operand_segment_sizes = dense<[1,1,1,0,0,0,0]> : vector<7xi32>, ordered_val = 1} :
|
||||
(index, index, index) -> ()
|
||||
|
||||
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
|
||||
|
@ -160,12 +160,12 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre
|
|||
}) {operand_segment_sizes = dense<[1,1,1,2,2,0,0]> : vector<7xi32>, schedule_val = #omp<"schedulekind static">} :
|
||||
(index, index, index, memref<i32>, memref<i32>, i32, i32) -> ()
|
||||
|
||||
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) collapse(3) ordered(2)
|
||||
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) ordered(2)
|
||||
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
|
||||
"omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var, %chunk_var) ({
|
||||
^bb0(%iv: index):
|
||||
omp.yield
|
||||
}) {operand_segment_sizes = dense<[1,1,1,1,1,0,1]> : vector<7xi32>, schedule_val = #omp<"schedulekind dynamic">, collapse_val = 3, ordered_val = 2} :
|
||||
}) {operand_segment_sizes = dense<[1,1,1,1,1,0,1]> : vector<7xi32>, schedule_val = #omp<"schedulekind dynamic">, ordered_val = 2} :
|
||||
(index, index, index, memref<i32>, i32, i32) -> ()
|
||||
|
||||
// CHECK: omp.wsloop schedule(auto) nowait
|
||||
|
@ -182,9 +182,9 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre
|
|||
// CHECK-LABEL: omp_wsloop_pretty
|
||||
func.func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32, %chunk_var2 : i16) -> () {
|
||||
|
||||
// CHECK: omp.wsloop collapse(2) ordered(2)
|
||||
// CHECK: omp.wsloop ordered(2)
|
||||
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
|
||||
omp.wsloop collapse(2) ordered(2)
|
||||
omp.wsloop ordered(2)
|
||||
for (%iv) : index = (%lb) to (%ub) step (%step) {
|
||||
omp.yield
|
||||
}
|
||||
|
@ -196,23 +196,23 @@ func.func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var
|
|||
omp.yield
|
||||
}
|
||||
|
||||
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) collapse(3) ordered(2)
|
||||
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) ordered(2)
|
||||
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
|
||||
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32) collapse(3)
|
||||
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32)
|
||||
for (%iv) : index = (%lb) to (%ub) step (%step) {
|
||||
omp.yield
|
||||
}
|
||||
|
||||
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) collapse(3) ordered(2)
|
||||
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) ordered(2)
|
||||
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
|
||||
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic) collapse(3)
|
||||
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic)
|
||||
for (%iv) : index = (%lb) to (%ub) step (%step) {
|
||||
omp.yield
|
||||
}
|
||||
|
||||
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) collapse(3) ordered(2)
|
||||
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) ordered(2)
|
||||
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
|
||||
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic) collapse(3)
|
||||
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic)
|
||||
for (%iv) : index = (%lb) to (%ub) step (%step) {
|
||||
omp.yield
|
||||
}
|
||||
|
|
|
@ -1052,7 +1052,7 @@ llvm.func @collapse_wsloop(
|
|||
// CHECK: %[[TOTAL_SUB_1:.*]] = sub i32 %[[TOTAL]], 1
|
||||
// CHECK: store i32 %[[TOTAL_SUB_1]], ptr
|
||||
// CHECK: call void @__kmpc_for_static_init_4u
|
||||
omp.wsloop collapse(3)
|
||||
omp.wsloop
|
||||
for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
|
||||
%31 = llvm.load %20 : !llvm.ptr<i32>
|
||||
%32 = llvm.add %31, %arg0 : i32
|
||||
|
@ -1113,7 +1113,7 @@ llvm.func @collapse_wsloop_dynamic(
|
|||
// CHECK: store i32 1, ptr
|
||||
// CHECK: store i32 %[[TOTAL]], ptr
|
||||
// CHECK: call void @__kmpc_dispatch_init_4u
|
||||
omp.wsloop collapse(3) schedule(dynamic)
|
||||
omp.wsloop schedule(dynamic)
|
||||
for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
|
||||
%31 = llvm.load %20 : !llvm.ptr<i32>
|
||||
%32 = llvm.add %31, %arg0 : i32
|
||||
|
|
Loading…
Reference in New Issue