[NFC][OpenMP] Fix worksharing-loop

1. Remove the redundant collapse clause in MLIR OpenMP worksharing-loop
   operation.
2. Fix several typos.
3. Refactor the chunk size type conversion since CreateSExtOrTrunc has
   both type check and type conversion.

Reviewed By: kiranchandramohan

Differential Revision: https://reviews.llvm.org/D128338
This commit is contained in:
Peixin-Qiao 2022-06-29 12:20:03 +08:00
parent 370127b7d5
commit 1795f8cd2e
8 changed files with 26 additions and 43 deletions

View File

@ -746,8 +746,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
llvm::SmallVector<mlir::Value> lowerBound, upperBound, step, linearVars,
linearStepVars, reductionVars;
mlir::Value scheduleChunkClauseOperand;
mlir::Attribute scheduleClauseOperand, collapseClauseOperand,
noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
mlir::Attribute scheduleClauseOperand, noWaitClauseOperand,
orderedClauseOperand, orderClauseOperand;
const auto &loopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);
@ -848,7 +848,6 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
scheduleClauseOperand.dyn_cast_or_null<omp::ClauseScheduleKindAttr>(),
scheduleChunkClauseOperand, /*schedule_modifiers=*/nullptr,
/*simd_modifier=*/nullptr,
collapseClauseOperand.dyn_cast_or_null<IntegerAttr>(),
noWaitClauseOperand.dyn_cast_or_null<UnitAttr>(),
orderedClauseOperand.dyn_cast_or_null<IntegerAttr>(),
orderClauseOperand.dyn_cast_or_null<omp::ClauseOrderKindAttr>(),
@ -867,13 +866,6 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
} else {
wsLoopOp.ordered_valAttr(firOpBuilder.getI64IntegerAttr(0));
}
} else if (const auto &collapseClause =
std::get_if<Fortran::parser::OmpClause::Collapse>(
&clause.u)) {
const auto *expr = Fortran::semantics::GetExpr(collapseClause->v);
const std::optional<std::int64_t> collapseValue =
Fortran::evaluate::ToInt64(*expr);
wsLoopOp.collapse_valAttr(firOpBuilder.getI64IntegerAttr(*collapseValue));
} else if (const auto &scheduleClause =
std::get_if<Fortran::parser::OmpClause::Schedule>(
&clause.u)) {

View File

@ -39,7 +39,7 @@ program wsloop_collapse
do i = 1, a
do j= 1, b
do k = 1, c
! CHECK: omp.wsloop collapse(3) for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
! CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
! CHECK: fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i32>
! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>

View File

@ -22,7 +22,7 @@ program wsloop_variable
!CHECK: %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
!CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
!CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
!CHECK: omp.wsloop collapse(2) for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
!CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
!CHECK: fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i64>
!CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i64>
!CHECK: %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i64>

View File

@ -308,7 +308,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
`linear_step_vars` variadic lists should contain the same number of
elements.
Reductions can be performed in a workshare loop by specifying reduction
Reductions can be performed in a worksharing-loop by specifying reduction
accumulator variables in `reduction_vars` and symbols referring to reduction
declarations in the `reductions` attribute. Each reduction is identified
by the accumulator it uses and accumulators must not be repeated in the same
@ -325,8 +325,9 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
The optional `schedule_chunk_var` associated with this determines further
controls this distribution.
The optional `collapse_val` attribute specifies the number of loops which
are collapsed to form the worksharing loop.
Collapsed loops are represented by the worksharing-loop having a list of
indices, bounds and steps where the size of the list is equal to the
collapse value.
The `nowait` attribute, when present, signifies that there should be no
implicit barrier at the end of the loop.
@ -351,7 +352,6 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
Optional<AnyType>:$schedule_chunk_var,
OptionalAttr<ScheduleModifierAttr>:$schedule_modifier,
UnitAttr:$simd_modifier,
Confined<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$collapse_val,
UnitAttr:$nowait,
Confined<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$ordered_val,
OptionalAttr<OrderKindAttr>:$order_val,
@ -366,7 +366,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
let regions = (region AnyRegion:$region);
let extraClassDeclaration = [{
/// Returns the number of loops in the workshape loop nest.
/// Returns the number of loops in the worksharing-loop nest.
unsigned getNumLoops() { return lowerBound().size(); }
/// Returns the number of reduction variables.
@ -386,7 +386,6 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
custom<ScheduleClause>(
$schedule_val, $schedule_modifier, $simd_modifier,
$schedule_chunk_var, type($schedule_chunk_var)) `)`
|`collapse` `(` $collapse_val `)`
|`nowait` $nowait
|`ordered` `(` $ordered_val `)`
|`order` `(` custom<ClauseAttr>($order_val) `)`

View File

@ -748,8 +748,8 @@ void WsLoopOp::build(OpBuilder &builder, OperationState &state,
/*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(),
/*reductions=*/nullptr, /*schedule_val=*/nullptr,
/*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
/*simd_modifier=*/false, /*collapse_val=*/nullptr, /*nowait=*/false,
/*ordered_val=*/nullptr, /*order_val=*/nullptr, /*inclusive=*/false);
/*simd_modifier=*/false, /*nowait=*/false, /*ordered_val=*/nullptr,
/*order_val=*/nullptr, /*inclusive=*/false);
state.addAttributes(attributes);
}

View File

@ -696,15 +696,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
if (loop.schedule_chunk_var()) {
llvm::Value *chunkVar =
moduleTranslation.lookupValue(loop.schedule_chunk_var());
llvm::Type *chunkVarType = chunkVar->getType();
assert(chunkVarType->isIntegerTy() &&
"chunk size must be one integer expression");
if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth())
chunk = builder.CreateSExt(chunkVar, ivType);
else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth())
chunk = builder.CreateTrunc(chunkVar, ivType);
else
chunk = chunkVar;
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
}
SmallVector<omp::ReductionDeclareOp> reductionDecls;

View File

@ -136,12 +136,12 @@ func.func @omp_parallel_pretty(%data_var : memref<i32>, %if_cond : i1, %num_thre
// CHECK-LABEL: omp_wsloop
func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32) -> () {
// CHECK: omp.wsloop collapse(2) ordered(1)
// CHECK: omp.wsloop ordered(1)
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
"omp.wsloop" (%lb, %ub, %step) ({
^bb0(%iv: index):
omp.yield
}) {operand_segment_sizes = dense<[1,1,1,0,0,0,0]> : vector<7xi32>, collapse_val = 2, ordered_val = 1} :
}) {operand_segment_sizes = dense<[1,1,1,0,0,0,0]> : vector<7xi32>, ordered_val = 1} :
(index, index, index) -> ()
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
@ -160,12 +160,12 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre
}) {operand_segment_sizes = dense<[1,1,1,2,2,0,0]> : vector<7xi32>, schedule_val = #omp<"schedulekind static">} :
(index, index, index, memref<i32>, memref<i32>, i32, i32) -> ()
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) collapse(3) ordered(2)
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) ordered(2)
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
"omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var, %chunk_var) ({
^bb0(%iv: index):
omp.yield
}) {operand_segment_sizes = dense<[1,1,1,1,1,0,1]> : vector<7xi32>, schedule_val = #omp<"schedulekind dynamic">, collapse_val = 3, ordered_val = 2} :
}) {operand_segment_sizes = dense<[1,1,1,1,1,0,1]> : vector<7xi32>, schedule_val = #omp<"schedulekind dynamic">, ordered_val = 2} :
(index, index, index, memref<i32>, i32, i32) -> ()
// CHECK: omp.wsloop schedule(auto) nowait
@ -182,9 +182,9 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre
// CHECK-LABEL: omp_wsloop_pretty
func.func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32, %chunk_var2 : i16) -> () {
// CHECK: omp.wsloop collapse(2) ordered(2)
// CHECK: omp.wsloop ordered(2)
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
omp.wsloop collapse(2) ordered(2)
omp.wsloop ordered(2)
for (%iv) : index = (%lb) to (%ub) step (%step) {
omp.yield
}
@ -196,23 +196,23 @@ func.func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var
omp.yield
}
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) collapse(3) ordered(2)
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) ordered(2)
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32) collapse(3)
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32)
for (%iv) : index = (%lb) to (%ub) step (%step) {
omp.yield
}
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) collapse(3) ordered(2)
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) ordered(2)
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic) collapse(3)
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic)
for (%iv) : index = (%lb) to (%ub) step (%step) {
omp.yield
}
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) collapse(3) ordered(2)
// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) ordered(2)
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic) collapse(3)
omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic)
for (%iv) : index = (%lb) to (%ub) step (%step) {
omp.yield
}

View File

@ -1052,7 +1052,7 @@ llvm.func @collapse_wsloop(
// CHECK: %[[TOTAL_SUB_1:.*]] = sub i32 %[[TOTAL]], 1
// CHECK: store i32 %[[TOTAL_SUB_1]], ptr
// CHECK: call void @__kmpc_for_static_init_4u
omp.wsloop collapse(3)
omp.wsloop
for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
%31 = llvm.load %20 : !llvm.ptr<i32>
%32 = llvm.add %31, %arg0 : i32
@ -1113,7 +1113,7 @@ llvm.func @collapse_wsloop_dynamic(
// CHECK: store i32 1, ptr
// CHECK: store i32 %[[TOTAL]], ptr
// CHECK: call void @__kmpc_dispatch_init_4u
omp.wsloop collapse(3) schedule(dynamic)
omp.wsloop schedule(dynamic)
for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
%31 = llvm.load %20 : !llvm.ptr<i32>
%32 = llvm.add %31, %arg0 : i32