[NFC][OpenMP] Fix worksharing-loop

1. Remove the redundant collapse clause in MLIR OpenMP worksharing-loop operation. 2. Fix several typos. 3. Refactor the chunk size type conversion since CreateSExtOrTrunc has both type check and type conversion. Reviewed By: kiranchandramohan Differential Revision: https://reviews.llvm.org/D128338
2022-06-29 12:20:03 +08:00 · 2022-06-29 12:20:03 +08:00 · 1795f8cd2e
parent 370127b7d5
commit 1795f8cd2e
8 changed files with 26 additions and 43 deletions
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@ -746,8 +746,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
  llvm::SmallVector<mlir::Value> lowerBound, upperBound, step, linearVars,
      linearStepVars, reductionVars;
  mlir::Value scheduleChunkClauseOperand;
-  mlir::Attribute scheduleClauseOperand, collapseClauseOperand,
-      noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
+  mlir::Attribute scheduleClauseOperand, noWaitClauseOperand,
+      orderedClauseOperand, orderClauseOperand;
  const auto &loopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
      std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);

@ -848,7 +848,6 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
      scheduleClauseOperand.dyn_cast_or_null<omp::ClauseScheduleKindAttr>(),
      scheduleChunkClauseOperand, /*schedule_modifiers=*/nullptr,
      /*simd_modifier=*/nullptr,
-      collapseClauseOperand.dyn_cast_or_null<IntegerAttr>(),
      noWaitClauseOperand.dyn_cast_or_null<UnitAttr>(),
      orderedClauseOperand.dyn_cast_or_null<IntegerAttr>(),
      orderClauseOperand.dyn_cast_or_null<omp::ClauseOrderKindAttr>(),
@ -867,13 +866,6 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
      } else {
        wsLoopOp.ordered_valAttr(firOpBuilder.getI64IntegerAttr(0));
      }
-    } else if (const auto &collapseClause =
-                   std::get_if<Fortran::parser::OmpClause::Collapse>(
-                       &clause.u)) {
-      const auto *expr = Fortran::semantics::GetExpr(collapseClause->v);
-      const std::optional<std::int64_t> collapseValue =
-          Fortran::evaluate::ToInt64(*expr);
-      wsLoopOp.collapse_valAttr(firOpBuilder.getI64IntegerAttr(*collapseValue));
    } else if (const auto &scheduleClause =
                   std::get_if<Fortran::parser::OmpClause::Schedule>(
                       &clause.u)) {
--- a/flang/test/Lower/OpenMP/omp-wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/omp-wsloop-collapse.f90
@ -39,7 +39,7 @@ program wsloop_collapse
  do i = 1, a
     do j= 1, b
        do k = 1, c
-! CHECK:           omp.wsloop collapse(3) for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
+! CHECK:           omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
 ! CHECK:             fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
--- a/flang/test/Lower/OpenMP/omp-wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/omp-wsloop-variable.f90
@ -22,7 +22,7 @@ program wsloop_variable
 !CHECK:  %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
 !CHECK:  %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
 !CHECK:  %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop collapse(2) for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK:  omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
 !CHECK:    fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i64>
 !CHECK:    fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i64>
 !CHECK:    %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i64>
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@ -308,7 +308,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
    `linear_step_vars` variadic lists should contain the same number of
    elements.

-    Reductions can be performed in a workshare loop by specifying reduction
+    Reductions can be performed in a worksharing-loop by specifying reduction
    accumulator variables in `reduction_vars` and symbols referring to reduction
    declarations in the `reductions` attribute. Each reduction is identified
    by the accumulator it uses and accumulators must not be repeated in the same
@ -325,8 +325,9 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
    The optional `schedule_chunk_var` associated with this determines further
    controls this distribution.

-    The optional `collapse_val` attribute specifies the number of loops which
-    are collapsed to form the worksharing loop.
+    Collapsed loops are represented by the worksharing-loop having a list of
+    indices, bounds and steps where the size of the list is equal to the
+    collapse value.

    The `nowait` attribute, when present, signifies that there should be no
    implicit barrier at the end of the loop.
@ -351,7 +352,6 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
             Optional<AnyType>:$schedule_chunk_var,
             OptionalAttr<ScheduleModifierAttr>:$schedule_modifier,
             UnitAttr:$simd_modifier,
-             Confined<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$collapse_val,
             UnitAttr:$nowait,
             Confined<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$ordered_val,
             OptionalAttr<OrderKindAttr>:$order_val,
@ -366,7 +366,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
  let regions = (region AnyRegion:$region);

  let extraClassDeclaration = [{
-    /// Returns the number of loops in the workshape loop nest.
+    /// Returns the number of loops in the worksharing-loop nest.
    unsigned getNumLoops() { return lowerBound().size(); }

    /// Returns the number of reduction variables.
@ -386,7 +386,6 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
              custom<ScheduleClause>(
                $schedule_val, $schedule_modifier, $simd_modifier,
                $schedule_chunk_var, type($schedule_chunk_var)) `)`
-          |`collapse` `(` $collapse_val `)`
          |`nowait` $nowait
          |`ordered` `(` $ordered_val `)`
          |`order` `(` custom<ClauseAttr>($order_val) `)`
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@ -748,8 +748,8 @@ void WsLoopOp::build(OpBuilder &builder, OperationState &state,
        /*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(),
        /*reductions=*/nullptr, /*schedule_val=*/nullptr,
        /*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
-        /*simd_modifier=*/false, /*collapse_val=*/nullptr, /*nowait=*/false,
-        /*ordered_val=*/nullptr, /*order_val=*/nullptr, /*inclusive=*/false);
+        /*simd_modifier=*/false, /*nowait=*/false, /*ordered_val=*/nullptr,
+        /*order_val=*/nullptr, /*inclusive=*/false);
  state.addAttributes(attributes);
 }

--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@ -696,15 +696,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
  if (loop.schedule_chunk_var()) {
    llvm::Value *chunkVar =
        moduleTranslation.lookupValue(loop.schedule_chunk_var());
-    llvm::Type *chunkVarType = chunkVar->getType();
-    assert(chunkVarType->isIntegerTy() &&
-           "chunk size must be one integer expression");
-    if (chunkVarType->getIntegerBitWidth() < ivType->getIntegerBitWidth())
-      chunk = builder.CreateSExt(chunkVar, ivType);
-    else if (chunkVarType->getIntegerBitWidth() > ivType->getIntegerBitWidth())
-      chunk = builder.CreateTrunc(chunkVar, ivType);
-    else
-      chunk = chunkVar;
+    chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
  }

  SmallVector<omp::ReductionDeclareOp> reductionDecls;
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@ -136,12 +136,12 @@ func.func @omp_parallel_pretty(%data_var : memref<i32>, %if_cond : i1, %num_thre
 // CHECK-LABEL: omp_wsloop
 func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32) -> () {

-  // CHECK: omp.wsloop collapse(2) ordered(1)
+  // CHECK: omp.wsloop ordered(1)
  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
  "omp.wsloop" (%lb, %ub, %step) ({
    ^bb0(%iv: index):
      omp.yield
-  }) {operand_segment_sizes = dense<[1,1,1,0,0,0,0]> : vector<7xi32>, collapse_val = 2, ordered_val = 1} :
+  }) {operand_segment_sizes = dense<[1,1,1,0,0,0,0]> : vector<7xi32>, ordered_val = 1} :
    (index, index, index) -> ()

  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
@ -160,12 +160,12 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre
  }) {operand_segment_sizes = dense<[1,1,1,2,2,0,0]> : vector<7xi32>, schedule_val = #omp<"schedulekind static">} :
    (index, index, index, memref<i32>, memref<i32>, i32, i32) -> ()

-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) collapse(3) ordered(2)
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) ordered(2)
  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
  "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var, %chunk_var) ({
    ^bb0(%iv: index):
      omp.yield
-  }) {operand_segment_sizes = dense<[1,1,1,1,1,0,1]> : vector<7xi32>, schedule_val = #omp<"schedulekind dynamic">, collapse_val = 3, ordered_val = 2} :
+  }) {operand_segment_sizes = dense<[1,1,1,1,1,0,1]> : vector<7xi32>, schedule_val = #omp<"schedulekind dynamic">, ordered_val = 2} :
    (index, index, index, memref<i32>, i32, i32) -> ()

  // CHECK: omp.wsloop schedule(auto) nowait
@ -182,9 +182,9 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre
 // CHECK-LABEL: omp_wsloop_pretty
 func.func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32, %chunk_var2 : i16) -> () {

-  // CHECK: omp.wsloop collapse(2) ordered(2)
+  // CHECK: omp.wsloop ordered(2)
  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop collapse(2) ordered(2)
+  omp.wsloop ordered(2)
  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }
@ -196,23 +196,23 @@ func.func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var
    omp.yield
  }

-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) collapse(3) ordered(2)
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) ordered(2)
  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32) collapse(3)
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32)
  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }

-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) collapse(3) ordered(2)
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) ordered(2)
  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic) collapse(3)
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic)
  for (%iv) : index = (%lb) to (%ub) step (%step)  {
    omp.yield
  }

-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) collapse(3) ordered(2)
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) ordered(2)
  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic) collapse(3)
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic)
  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@ -1052,7 +1052,7 @@ llvm.func @collapse_wsloop(
    // CHECK: %[[TOTAL_SUB_1:.*]] = sub i32 %[[TOTAL]], 1
    // CHECK: store i32 %[[TOTAL_SUB_1]], ptr
    // CHECK: call void @__kmpc_for_static_init_4u
-    omp.wsloop collapse(3)
+    omp.wsloop
    for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
      %31 = llvm.load %20 : !llvm.ptr<i32>
      %32 = llvm.add %31, %arg0 : i32
@ -1113,7 +1113,7 @@ llvm.func @collapse_wsloop_dynamic(
    // CHECK: store i32 1, ptr
    // CHECK: store i32 %[[TOTAL]], ptr
    // CHECK: call void @__kmpc_dispatch_init_4u
-    omp.wsloop collapse(3) schedule(dynamic)
+    omp.wsloop schedule(dynamic)
    for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
      %31 = llvm.load %20 : !llvm.ptr<i32>
      %32 = llvm.add %31, %arg0 : i32