[flang][OpenMP] Lowering support for atomic update construct

This patch adds lowering support for atomic update construct. A region is associated with every `omp.atomic.update` operation wherein resides: (1) the evaluation of the expression on the RHS of the atomic assignment statement, and (2) a `omp.yield` operation that yields the extended value of expression evaluated in (1). Reviewed By: peixin Differential Revision: https://reviews.llvm.org/D125668
2022-07-14 18:20:28 +05:30 · 2022-07-14 18:20:28 +05:30 · a56b76d9ca
parent 9e6e631b38
commit a56b76d9ca
2 changed files with 230 additions and 2 deletions
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@ -1113,6 +1113,68 @@ static void genOmpAtomicHintAndMemoryOrderClauses(
  }
 }

+static void genOmpAtomicUpdateStatement(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::Variable &assignmentStmtVariable,
+    const Fortran::parser::Expr &assignmentStmtExpr,
+    const Fortran::parser::OmpAtomicClauseList *leftHandClauseList,
+    const Fortran::parser::OmpAtomicClauseList *rightHandClauseList) {
+  // Generate `omp.atomic.update` operation for atomic assignment statements
+  auto &firOpBuilder = converter.getFirOpBuilder();
+  auto currentLocation = converter.getCurrentLocation();
+  Fortran::lower::StatementContext stmtCtx;
+
+  mlir::Value address = fir::getBase(converter.genExprAddr(
+      *Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx));
+  const Fortran::semantics::Symbol *updateSymbol;
+  if (auto varDesignator = std::get_if<
+          Fortran::common::Indirection<Fortran::parser::Designator>>(
+          &assignmentStmtVariable.u)) {
+    if (const auto *name = getDesignatorNameIfDataRef(varDesignator->value())) {
+      updateSymbol = name->symbol;
+    }
+  }
+  // If no hint clause is specified, the effect is as if
+  // hint(omp_sync_hint_none) had been specified.
+  mlir::IntegerAttr hint = nullptr;
+  mlir::omp::ClauseMemoryOrderKindAttr memory_order = nullptr;
+  if (leftHandClauseList)
+    genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
+                                          memory_order);
+  if (rightHandClauseList)
+    genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
+                                          memory_order);
+  auto atomicUpdateOp = firOpBuilder.create<mlir::omp::AtomicUpdateOp>(
+      currentLocation, address, hint, memory_order);
+
+  //// Generate body of Atomic Update operation
+  // If an argument for the region is provided then create the block with that
+  // argument. Also update the symbol's address with the argument mlir value.
+  mlir::Type varType =
+      fir::getBase(
+          converter.genExprValue(
+              *Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx))
+          .getType();
+  SmallVector<Type> varTys = {varType};
+  SmallVector<Location> locs = {currentLocation};
+  firOpBuilder.createBlock(&atomicUpdateOp.getRegion(), {}, varTys, locs);
+  mlir::Value val =
+      fir::getBase(atomicUpdateOp.getRegion().front().getArgument(0));
+  converter.bindSymbol(*updateSymbol, val);
+  // Set the insert for the terminator operation to go at the end of the
+  // block.
+  mlir::Block &block = atomicUpdateOp.getRegion().back();
+  firOpBuilder.setInsertionPointToEnd(&block);
+
+  mlir::Value result = fir::getBase(converter.genExprValue(
+      *Fortran::semantics::GetExpr(assignmentStmtExpr), stmtCtx));
+  // Insert the terminator: YieldOp.
+  firOpBuilder.create<mlir::omp::YieldOp>(currentLocation, result);
+  // Reset the insert point to before the terminator.
+  firOpBuilder.setInsertionPointToStart(&block);
+}
+
 static void
 genOmpAtomicWrite(Fortran::lower::AbstractConverter &converter,
                  Fortran::lower::pft::Evaluation &eval,
@ -1176,6 +1238,43 @@ static void genOmpAtomicRead(Fortran::lower::AbstractConverter &converter,
                                               to_address, hint, memory_order);
 }

+static void
+genOmpAtomicUpdate(Fortran::lower::AbstractConverter &converter,
+                   Fortran::lower::pft::Evaluation &eval,
+                   const Fortran::parser::OmpAtomicUpdate &atomicUpdate) {
+  const Fortran::parser::OmpAtomicClauseList &rightHandClauseList =
+      std::get<2>(atomicUpdate.t);
+  const Fortran::parser::OmpAtomicClauseList &leftHandClauseList =
+      std::get<0>(atomicUpdate.t);
+  const auto &assignmentStmtExpr =
+      std::get<Fortran::parser::Expr>(std::get<3>(atomicUpdate.t).statement.t);
+  const auto &assignmentStmtVariable = std::get<Fortran::parser::Variable>(
+      std::get<3>(atomicUpdate.t).statement.t);
+
+  genOmpAtomicUpdateStatement(converter, eval, assignmentStmtVariable,
+                              assignmentStmtExpr, &leftHandClauseList,
+                              &rightHandClauseList);
+}
+
+static void genOmpAtomic(Fortran::lower::AbstractConverter &converter,
+                         Fortran::lower::pft::Evaluation &eval,
+                         const Fortran::parser::OmpAtomic &atomicConstruct) {
+  const Fortran::parser::OmpAtomicClauseList &atomicClauseList =
+      std::get<Fortran::parser::OmpAtomicClauseList>(atomicConstruct.t);
+  const auto &assignmentStmtExpr = std::get<Fortran::parser::Expr>(
+      std::get<Fortran::parser::Statement<Fortran::parser::AssignmentStmt>>(
+          atomicConstruct.t)
+          .statement.t);
+  const auto &assignmentStmtVariable = std::get<Fortran::parser::Variable>(
+      std::get<Fortran::parser::Statement<Fortran::parser::AssignmentStmt>>(
+          atomicConstruct.t)
+          .statement.t);
+  // If atomic-clause is not present on the construct, the behaviour is as if
+  // the update clause is specified
+  genOmpAtomicUpdateStatement(converter, eval, assignmentStmtVariable,
+                              assignmentStmtExpr, &atomicClauseList, nullptr);
+}
+
 static void
 genOMP(Fortran::lower::AbstractConverter &converter,
       Fortran::lower::pft::Evaluation &eval,
@ -1187,9 +1286,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
                 [&](const Fortran::parser::OmpAtomicWrite &atomicWrite) {
                   genOmpAtomicWrite(converter, eval, atomicWrite);
                 },
+                 [&](const Fortran::parser::OmpAtomic &atomicConstruct) {
+                   genOmpAtomic(converter, eval, atomicConstruct);
+                 },
+                 [&](const Fortran::parser::OmpAtomicUpdate &atomicUpdate) {
+                   genOmpAtomicUpdate(converter, eval, atomicUpdate);
+                 },
                 [&](const auto &) {
-                   TODO(converter.getCurrentLocation(),
-                        "Atomic update & capture");
+                   TODO(converter.getCurrentLocation(), "Atomic capture");
                 },
             },
             atomicConstruct.u);
--- a/flang/test/Lower/OpenMP/atomic-update.f90
+++ b/flang/test/Lower/OpenMP/atomic-update.f90
@ -0,0 +1,124 @@
+! This test checks lowering of atomic and atomic update constructs
+! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s
+
+program OmpAtomicUpdate
+    use omp_lib
+    integer :: x, y, z
+    integer, pointer :: a, b
+    integer, target :: c, d
+    a=>c
+    b=>d
+
+!CHECK: func.func @_QQmain() {
+!CHECK: %[[A:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "a", uniq_name = "_QFEa"}
+!CHECK: %[[A_ADDR:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFEa.addr"}
+!CHECK: %{{.*}} = fir.zero_bits !fir.ptr<i32>
+!CHECK: fir.store %{{.*}} to %[[A_ADDR]] : !fir.ref<!fir.ptr<i32>>
+!CHECK: %[[B:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "b", uniq_name = "_QFEb"}
+!CHECK: %[[B_ADDR:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFEb.addr"}
+!CHECK: %{{.*}} = fir.zero_bits !fir.ptr<i32>
+!CHECK: fir.store %{{.*}} to %[[B_ADDR]] : !fir.ref<!fir.ptr<i32>>
+!CHECK: %[[C_ADDR:.*]] = fir.address_of(@_QFEc) : !fir.ref<i32>
+!CHECK: %[[D_ADDR:.*]] = fir.address_of(@_QFEd) : !fir.ref<i32>
+!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"}
+!CHECK: %[[Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
+!CHECK: %[[Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFEz"}
+!CHECK: %{{.*}} = fir.convert %[[C_ADDR]] : (!fir.ref<i32>) -> !fir.ptr<i32>
+!CHECK: fir.store %{{.*}} to %[[A_ADDR]] : !fir.ref<!fir.ptr<i32>>
+!CHECK: %{{.*}} = fir.convert %[[D_ADDR]] : (!fir.ref<i32>) -> !fir.ptr<i32>
+!CHECK: fir.store {{.*}} to %[[B_ADDR]] : !fir.ref<!fir.ptr<i32>>
+!CHECK: %[[LOADED_A:.*]] = fir.load %[[A_ADDR]] : !fir.ref<!fir.ptr<i32>>
+!CHECK:  omp.atomic.update   %[[LOADED_A]] : !fir.ptr<i32> {
+!CHECK:  ^bb0(%[[ARG:.*]]: i32):
+!CHECK:    %[[LOADED_B:.*]] = fir.load %[[B_ADDR]] : !fir.ref<!fir.ptr<i32>>
+!CHECK:    %{{.*}} = fir.load %[[LOADED_B]] : !fir.ptr<i32>
+!CHECK:    %[[RESULT:.*]] = arith.addi %[[ARG]], %{{.*}} : i32
+!CHECK:    omp.yield(%[[RESULT]] : i32)
+!CHECK: }
+    !$omp atomic update
+        a = a + b 
+
+!CHECK: omp.atomic.update   %[[Y]] : !fir.ref<i32> {
+!CHECK:  ^bb0(%[[ARG:.*]]: i32):
+!CHECK:    {{.*}} = arith.constant 1 : i32
+!CHECK:    %[[RESULT:.*]] = arith.addi %[[ARG]], {{.*}} : i32
+!CHECK:    omp.yield(%[[RESULT]] : i32)
+!CHECK:  }
+!CHECK:  omp.atomic.update   %[[Z]] : !fir.ref<i32> {
+!CHECK:  ^bb0(%[[ARG:.*]]: i32):
+!CHECK:    %[[LOADED_X:.*]] = fir.load %[[X]] : !fir.ref<i32>
+!CHECK:    %[[RESULT:.*]] = arith.muli %[[LOADED_X]], %[[ARG]] : i32
+!CHECK:    omp.yield(%[[RESULT]] : i32)
+!CHECK:  }
+    !$omp atomic 
+        y = y + 1
+    !$omp atomic update
+        z = x * z 
+
+!CHECK:  omp.atomic.update   memory_order(relaxed) hint(uncontended) %[[X]] : !fir.ref<i32> {
+!CHECK:  ^bb0(%[[ARG:.*]]: i32):
+!CHECK:    %{{.*}} = arith.constant 1 : i32
+!CHECK:    %[[RESULT:.*]] = arith.subi %[[ARG]], {{.*}} : i32
+!CHECK:    omp.yield(%[[RESULT]] : i32)
+!CHECK:  }
+!CHECK:  omp.atomic.update   memory_order(relaxed) %[[Y]] : !fir.ref<i32> {
+!CHECK:  ^bb0(%[[ARG:.*]]: i32):
+!CHECK:    %[[LOADED_X:.*]] = fir.load %[[X]] : !fir.ref<i32>
+!CHECK:    %[[LOADED_Z:.*]] = fir.load %[[Z]] : !fir.ref<i32>
+!CHECK:    %{{.*}} = arith.cmpi sgt, %[[LOADED_X]], %[[ARG]] : i32
+!CHECK:    %{{.*}} = arith.select %{{.*}}, %[[LOADED_X]], %[[ARG]] : i32
+!CHECK:    %{{.*}} = arith.cmpi sgt, %{{.*}}, %[[LOADED_Z]] : i32
+!CHECK:    %[[RESULT:.*]] = arith.select %{{.*}}, %{{.*}}, %[[LOADED_Z]] : i32
+!CHECK:    omp.yield(%[[RESULT]] : i32)
+!CHECK:  }
+!CHECK:  omp.atomic.update   memory_order(relaxed) hint(contended) %[[Z]] : !fir.ref<i32> {
+!CHECK:  ^bb0(%[[ARG:.*]]: i32):
+!CHECK:    %[[LOADED_X:.*]] = fir.load %[[X]] : !fir.ref<i32>
+!CHECK:    %[[RESULT:.*]] = arith.addi %[[ARG]], %[[LOADED_X]] : i32
+!CHECK:    omp.yield(%[[RESULT]] : i32)
+!CHECK:  }
+    !$omp atomic relaxed update hint(omp_sync_hint_uncontended)
+        x = x - 1
+    !$omp atomic update relaxed 
+        y = max(x, y, z)
+    !$omp atomic relaxed hint(omp_sync_hint_contended)
+        z = z + x
+
+!CHECK:  omp.atomic.update   memory_order(release) hint(contended) %[[Z]] : !fir.ref<i32> {
+!CHECK:  ^bb0(%[[ARG:.*]]: i32):
+!CHECK:    %{{.*}} = arith.constant 10 : i32
+!CHECK:   %[[RESULT:.*]] = arith.muli {{.*}}, %[[ARG]] : i32
+!CHECK:    omp.yield(%[[RESULT]] : i32)
+!CHECK:  }
+!CHECK:  omp.atomic.update   memory_order(release) hint(speculative) %[[X]] : !fir.ref<i32> {
+!CHECK:  ^bb0(%[[ARG:.*]]: i32):
+!CHECK:    %[[LOADED_Z:.*]] = fir.load %[[Z]] : !fir.ref<i32>
+!CHECK:    %[[RESULT:.*]] = arith.divsi %[[ARG]], %[[LOADED_Z]] : i32
+!CHECK:    omp.yield(%[[RESULT]] : i32)
+!CHECK:  }
+
+    !$omp atomic release update hint(omp_lock_hint_contended)
+        z = z * 10
+    !$omp atomic hint(omp_lock_hint_speculative) update release
+        x = x / z
+
+!CHECK:  omp.atomic.update   memory_order(seq_cst) hint(nonspeculative) %[[Y]] : !fir.ref<i32> {
+!CHECK:  ^bb0(%[[ARG:.*]]: i32):
+!CHECK:    %{{.*}} = arith.constant 10 : i32
+!CHECK:    %[[RESULT:.*]] = arith.addi %{{.*}}, %[[ARG]] : i32
+!CHECK:   omp.yield(%[[RESULT]] : i32)
+!CHECK:  }
+!CHECK:  omp.atomic.update   memory_order(seq_cst) %[[Z]] : !fir.ref<i32> {
+!CHECK:  ^bb0(%[[ARG:.*]]: i32):
+!CHECK:    %[[LOADED_Y:.*]] = fir.load %[[Y]] : !fir.ref<i32>
+!CHECK:    %[[RESULT:.*]] = arith.addi %[[LOADED_Y]], %[[ARG]] : i32
+!CHECK:    omp.yield(%[[RESULT]] : i32)
+!CHECK:  }
+!CHECK:  return
+!CHECK: }
+    !$omp atomic hint(omp_sync_hint_nonspeculative) seq_cst
+        y = 10 + y
+    !$omp atomic seq_cst update
+        z = y + z
+end program OmpAtomicUpdate