[mlir][OpenMP] Added translation from `omp.atomic.capture` to LLVM IR

This patch adds translation from `omp.atomic.capture` to LLVM IR. Also added tests for the same. Depends on D121546 Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D121554
2022-03-21 16:20:54 +05:30 · 2022-03-21 16:20:54 +05:30 · 31486a9fc2
parent b6f85d8539
commit 31486a9fc2
6 changed files with 784 additions and 1 deletions
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@ -3591,6 +3591,7 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
  case AtomicRMWInst::Nand:
  case AtomicRMWInst::Or:
  case AtomicRMWInst::Xor:
+  case AtomicRMWInst::Xchg:
    emitRMWOp = XElemTy;
    break;
  case AtomicRMWInst::Sub:
@ -3606,7 +3607,11 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
    Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
    // not needed except in case of postfix captures. Generate anyway for
    // consistency with the else part. Will be removed with any DCE pass.
-    Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
+    // AtomicRMWInst::Xchg does not have a coressponding instruction.
+    if (RMWOp == AtomicRMWInst::Xchg)
+      Res.second = Res.first;
+    else
+      Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
  } else {
    unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
    IntegerType *IntCastTy =
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@ -717,6 +717,11 @@ def AtomicUpdateOp : OpenMP_Op<"atomic.update",
  }];
  let hasVerifier = 1;
  let hasRegionVerifier = 1;
+  let extraClassDeclaration = [{
+    Operation* getFirstOp() {
+      return &getRegion().front().getOperations().front();
+    }
+  }];
 }

 def AtomicCaptureOp : OpenMP_Op<"atomic.capture",
@ -764,6 +769,25 @@ def AtomicCaptureOp : OpenMP_Op<"atomic.capture",
    $region attr-dict
  }];
  let hasRegionVerifier = 1;
+  let extraClassDeclaration = [{
+    /// Returns the first operation in atomic capture region
+    Operation* getFirstOp();
+
+    /// Returns the second operation in atomic capture region
+    Operation* getSecondOp();
+
+    /// Returns the `atomic.read` operation inside the region, if any.
+    /// Otherwise, it returns nullptr.
+    AtomicReadOp getAtomicReadOp();
+
+    /// Returns the `atomic.write` operation inside the region, if any.
+    /// Otherwise, it returns nullptr.
+    AtomicWriteOp getAtomicWriteOp();
+
+    /// Returns the `atomic.update` operation inside the region, if any.
+    /// Otherwise, it returns nullptr.
+    AtomicUpdateOp getAtomicUpdateOp();
+  }];
 }

 //===----------------------------------------------------------------------===//
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@ -1149,6 +1149,33 @@ LogicalResult AtomicUpdateOp::verifyRegions() {
 // Verifier for AtomicCaptureOp
 //===----------------------------------------------------------------------===//

+Operation *AtomicCaptureOp::getFirstOp() {
+  return &getRegion().front().getOperations().front();
+}
+
+Operation *AtomicCaptureOp::getSecondOp() {
+  auto &ops = getRegion().front().getOperations();
+  return ops.getNextNode(ops.front());
+}
+
+AtomicReadOp AtomicCaptureOp::getAtomicReadOp() {
+  if (auto op = dyn_cast<AtomicReadOp>(getFirstOp()))
+    return op;
+  return dyn_cast<AtomicReadOp>(getSecondOp());
+}
+
+AtomicWriteOp AtomicCaptureOp::getAtomicWriteOp() {
+  if (auto op = dyn_cast<AtomicWriteOp>(getFirstOp()))
+    return op;
+  return dyn_cast<AtomicWriteOp>(getSecondOp());
+}
+
+AtomicUpdateOp AtomicCaptureOp::getAtomicUpdateOp() {
+  if (auto op = dyn_cast<AtomicUpdateOp>(getFirstOp()))
+    return op;
+  return dyn_cast<AtomicUpdateOp>(getSecondOp());
+}
+
 LogicalResult AtomicCaptureOp::verifyRegions() {
  Block::OpListType &ops = region().front().getOperations();
  if (ops.size() != 3)
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@ -1114,6 +1114,108 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
  return updateGenStatus;
 }

+static LogicalResult
+convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
+                        llvm::IRBuilderBase &builder,
+                        LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  mlir::Value mlirExpr;
+  bool isXBinopExpr = false, isPostfixUpdate = false;
+  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
+
+  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
+  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
+
+  assert((atomicUpdateOp || atomicWriteOp) &&
+         "internal op must be an atomic.update or atomic.write op");
+
+  if (atomicWriteOp) {
+    isPostfixUpdate = true;
+    mlirExpr = atomicWriteOp.value();
+  } else {
+    isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
+                      atomicCaptureOp.getAtomicUpdateOp().getOperation();
+    auto &innerOpList = atomicUpdateOp.region().front().getOperations();
+    if (innerOpList.size() != 2)
+      return atomicUpdateOp.emitError(
+          "exactly two operations are allowed inside an "
+          "atomic update region while lowering to LLVM IR");
+    Operation *innerUpdateOp = atomicUpdateOp.getFirstOp();
+    if (innerUpdateOp->getNumOperands() != 2 ||
+        !llvm::is_contained(innerUpdateOp->getOperands(),
+                            atomicUpdateOp.getRegion().getArgument(0)))
+      return atomicUpdateOp.emitError(
+          "the update operation inside the region must be a binary operation "
+          "and that update operation must have the region argument as an "
+          "operand");
+    binop = convertBinOpToAtomic(*innerUpdateOp);
+
+    isXBinopExpr = innerUpdateOp->getOperand(0) ==
+                   atomicUpdateOp.getRegion().getArgument(0);
+
+    mlirExpr = (isXBinopExpr ? innerUpdateOp->getOperand(1)
+                             : innerUpdateOp->getOperand(0));
+  }
+
+  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
+  llvm::Value *llvmX =
+      moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().x());
+  llvm::Value *llvmV =
+      moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().v());
+  auto mlirXType = atomicCaptureOp.getAtomicReadOp()
+                       .x()
+                       .getType()
+                       .cast<LLVM::LLVMPointerType>();
+  llvm::Type *llvmXElementType =
+      moduleTranslation.convertType(mlirXType.getElementType());
+  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
+                                                      /*isSigned=*/false,
+                                                      /*isVolatile=*/false};
+  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
+                                                      /*isSigned=*/false,
+                                                      /*isVolatile=*/false};
+
+  llvm::AtomicOrdering atomicOrdering =
+      convertAtomicOrdering(atomicCaptureOp.memory_order_val());
+
+  LogicalResult updateGenStatus = success();
+  auto updateFn = [&](llvm::Value *atomicx,
+                      llvm::IRBuilder<> &builder) -> llvm::Value * {
+    if (atomicWriteOp)
+      return moduleTranslation.lookupValue(atomicWriteOp.value());
+    Block &bb = *atomicUpdateOp.region().begin();
+    moduleTranslation.mapValue(*atomicUpdateOp.region().args_begin(), atomicx);
+    moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
+    if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
+      updateGenStatus = (atomicUpdateOp.emitError()
+                         << "unable to convert update operation to llvm IR");
+      return nullptr;
+    }
+    omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
+    assert(yieldop && yieldop.results().size() == 1 &&
+           "terminator must be omp.yield op and it must have exactly one "
+           "argument");
+    return moduleTranslation.lookupValue(yieldop.results()[0]);
+  };
+  // Handle ambiguous alloca, if any.
+  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
+  llvm::UnreachableInst *unreachableInst;
+  if (allocaIP.getPoint() == ompLoc.IP.getPoint()) {
+    // Same point => split basic block and make them unambigous.
+    unreachableInst = builder.CreateUnreachable();
+    builder.SetInsertPoint(builder.GetInsertBlock()->splitBasicBlock(
+        unreachableInst, "alloca_split"));
+    ompLoc.IP = builder.saveIP();
+    unreachableInst->removeFromParent();
+  }
+  builder.restoreIP(ompBuilder->createAtomicCapture(
+      ompLoc, findAllocaInsertPoint(builder, moduleTranslation), llvmAtomicX,
+      llvmAtomicV, llvmExpr, atomicOrdering, binop, updateFn, atomicUpdateOp,
+      isPostfixUpdate, isXBinopExpr));
+  return updateGenStatus;
+}
+
 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
 /// mapping between reduction variables and their private equivalents to have
 /// been stored on the ModuleTranslation stack. Currently only supports
@ -1247,6 +1349,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
      .Case([&](omp::AtomicUpdateOp op) {
        return convertOmpAtomicUpdate(op, builder, moduleTranslation);
      })
+      .Case([&](omp::AtomicCaptureOp op) {
+        return convertOmpAtomicCapture(op, builder, moduleTranslation);
+      })
      .Case([&](omp::SectionsOp) {
        return convertOmpSections(*op, builder, moduleTranslation);
      })
--- a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
@ -29,3 +29,41 @@ llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %expr: i32
  }
  llvm.return
 }
+
+// -----
+
+// Checking translation when the update is carried out by using more than one
+// operations in the atomic capture region.
+llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32) {
+  // expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.capture}}
+  omp.atomic.capture memory_order(seq_cst) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    // expected-error @+1 {{the update operation inside the region must be a binary operation and that update operation must have the region argument as an operand}}
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.mul %expr, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+  llvm.return
+}
+
+// -----
+
+// Checking translation when the captured variable is not used in the inner
+// update operation
+llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32) {
+  // expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.capture}}
+  omp.atomic.capture memory_order(seq_cst) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    // expected-error @+1 {{exactly two operations are allowed inside an atomic update region while lowering to LLVM IR}}
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %t1 = llvm.mul %xval, %expr : i32
+      %t2 = llvm.sdiv %t1, %expr : i32
+      %newval = llvm.add %xval, %t2 : i32
+      omp.yield(%newval : i32)
+    }
+  }
+  llvm.return
+}
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@ -1063,6 +1063,590 @@ llvm.func @omp_atomic_update_intrinsic(%x:!llvm.ptr<i32>, %expr: i32) {

 // -----

+// CHECK-LABEL: @omp_atomic_capture_prefix_update
+// CHECK-SAME: (i32* %[[x:.*]], i32* %[[v:.*]], i32 %[[expr:.*]], float* %[[xf:.*]], float* %[[vf:.*]], float %[[exprf:.*]])
+llvm.func @omp_atomic_capture_prefix_update(
+  %x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32,
+  %xf: !llvm.ptr<f32>, %vf: !llvm.ptr<f32>, %exprf: f32) -> () {
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK-NEXT: %[[newval:.*]] = add i32 %[[res]], %[[expr]]
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw sub i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK-NEXT: %[[newval:.*]] = sub i32 %[[res]], %[[expr]]
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.sub %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw and i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK-NEXT: %[[newval:.*]] = and i32 %[[res]], %[[expr]]
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.and %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw or i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK-NEXT: %[[newval:.*]] = or i32 %[[res]], %[[expr]]
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.or %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw xor i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK-NEXT: %[[newval:.*]] = xor i32 %[[res]], %[[expr]]
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.xor %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = mul i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.mul %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = sdiv i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.sdiv %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = udiv i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.udiv %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = shl i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.shl %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = lshr i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.lshr %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = ashr i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.ashr %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smax.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.smax"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smin.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.smin"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umax.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.umax"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umin.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[newval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.umin"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK: %[[newval:.*]] = fadd float %{{.*}}, %[[exprf]]
+  // CHECK: store float %[[newval]], float* %{{.*}}
+  // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32*
+  // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store float %[[newval]], float* %[[vf]]
+  omp.atomic.capture {
+    omp.atomic.update %xf : !llvm.ptr<f32> {
+    ^bb0(%xval: f32):
+      %newval = llvm.fadd %xval, %exprf : f32
+      omp.yield(%newval : f32)
+    }
+    omp.atomic.read %vf = %xf : !llvm.ptr<f32>
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK: %[[newval:.*]] = fsub float %{{.*}}, %[[exprf]]
+  // CHECK: store float %[[newval]], float* %{{.*}}
+  // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32*
+  // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store float %[[newval]], float* %[[vf]]
+  omp.atomic.capture {
+    omp.atomic.update %xf : !llvm.ptr<f32> {
+    ^bb0(%xval: f32):
+      %newval = llvm.fsub %xval, %exprf : f32
+      omp.yield(%newval : f32)
+    }
+    omp.atomic.read %vf = %xf : !llvm.ptr<f32>
+  }
+
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: @omp_atomic_capture_postfix_update
+// CHECK-SAME: (i32* %[[x:.*]], i32* %[[v:.*]], i32 %[[expr:.*]], float* %[[xf:.*]], float* %[[vf:.*]], float %[[exprf:.*]])
+llvm.func @omp_atomic_capture_postfix_update(
+  %x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32,
+  %xf: !llvm.ptr<f32>, %vf: !llvm.ptr<f32>, %exprf: f32) -> () {
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw sub i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.sub %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw and i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.and %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw or i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.or %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw xor i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.xor %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = mul i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.mul %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = sdiv i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.sdiv %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = udiv i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.udiv %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = shl i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.shl %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = lshr i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.lshr %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = ashr i32 %[[xval]], %[[expr]]
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.ashr %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smax.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.smax"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.smin.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.smin"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umax.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.umax"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK-NEXT: %[[newval:.*]] = call i32 @llvm.umin.i32(i32 %[[xval]], i32 %[[expr]])
+  // CHECK-NEXT: store i32 %[[newval]], i32* %{{.*}}
+  // CHECK-NEXT: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK-NEXT: %{{.*}} = cmpxchg i32* %[[x]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = "llvm.intr.umin"(%xval, %expr) : (i32, i32) -> i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK: %[[xvalf:.*]] = bitcast i32 %[[xval]] to float
+  // CHECK: %[[newval:.*]] = fadd float %{{.*}}, %[[exprf]]
+  // CHECK: store float %[[newval]], float* %{{.*}}
+  // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32*
+  // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store float %[[xvalf]], float* %[[vf]]
+  omp.atomic.capture {
+    omp.atomic.read %vf = %xf : !llvm.ptr<f32>
+    omp.atomic.update %xf : !llvm.ptr<f32> {
+    ^bb0(%xval: f32):
+      %newval = llvm.fadd %xval, %exprf : f32
+      omp.yield(%newval : f32)
+    }
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK: %[[xvalf:.*]] = bitcast i32 %[[xval]] to float
+  // CHECK: %[[newval:.*]] = fsub float %{{.*}}, %[[exprf]]
+  // CHECK: store float %[[newval]], float* %{{.*}}
+  // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32*
+  // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store float %[[xvalf]], float* %[[vf]]
+  omp.atomic.capture {
+    omp.atomic.read %vf = %xf : !llvm.ptr<f32>
+    omp.atomic.update %xf : !llvm.ptr<f32> {
+    ^bb0(%xval: f32):
+      %newval = llvm.fsub %xval, %exprf : f32
+      omp.yield(%newval : f32)
+    }
+  }
+
+  llvm.return
+}
+
+// -----
+// CHECK-LABEL: @omp_atomic_capture_misc
+// CHECK-SAME: (i32* %[[x:.*]], i32* %[[v:.*]], i32 %[[expr:.*]], float* %[[xf:.*]], float* %[[vf:.*]], float %[[exprf:.*]])
+llvm.func @omp_atomic_capture_misc(
+  %x: !llvm.ptr<i32>, %v: !llvm.ptr<i32>, %expr: i32,
+  %xf: !llvm.ptr<f32>, %vf: !llvm.ptr<f32>, %exprf: f32) -> () {
+  // CHECK: %[[xval:.*]] = atomicrmw xchg i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[xval]], i32* %[[v]]
+  omp.atomic.capture{
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.write %x = %expr : !llvm.ptr<i32>, i32
+  }
+
+  // CHECK: %[[xval:.*]] = phi i32
+  // CHECK: %[[xvalf:.*]] = bitcast i32 %[[xval]] to float
+  // CHECK: store float %[[exprf]], float* %{{.*}}
+  // CHECK: %[[newval_:.*]] = load i32, i32* %{{.*}}
+  // CHECK: %[[xf_bitcast:.*]] = bitcast float* %[[xf]] to i32*
+  // CHECK: %{{.*}} = cmpxchg i32* %[[xf_bitcast]], i32 %[[xval]], i32 %[[newval_]] monotonic monotonic
+  // CHECK: store float %[[xvalf]], float* %[[vf]]
+  omp.atomic.capture{
+    omp.atomic.read %vf = %xf : !llvm.ptr<f32>
+    omp.atomic.write %xf = %exprf : !llvm.ptr<f32>, f32
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] seq_cst
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture memory_order(seq_cst) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] acquire
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture memory_order(acquire) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] release
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture memory_order(release) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture memory_order(relaxed) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  // CHECK: %[[res:.*]] = atomicrmw add i32* %[[x]], i32 %[[expr]] acq_rel
+  // CHECK: store i32 %[[res]], i32* %[[v]]
+  omp.atomic.capture memory_order(acq_rel) {
+    omp.atomic.read %v = %x : !llvm.ptr<i32>
+    omp.atomic.update %x : !llvm.ptr<i32> {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+  }
+
+  llvm.return
+}
+
+// -----
+
 // CHECK-LABEL: @omp_sections_empty
 llvm.func @omp_sections_empty() -> () {
  omp.sections {