[RISCV] Add target feature to force-enable atomics

This adds a +forced-atomics target feature with the same semantics as +atomics-32 on ARM (D130480). For RISCV targets without the +a extension, this forces LLVM to assume that lock-free atomics (up to 32/64 bits for riscv32/64 respectively) are available. This means that atomic load/store are lowered to a simple load/store (and fence as necessary), as these are guaranteed to be atomic (as long as they're aligned). Atomic RMW/CAS are lowered to __sync (rather than __atomic) libcalls. Responsibility for providing the __sync libcalls lies with the user (for privileged single-core code they can be implemented by disabling interrupts). Code using +forced-atomics and -forced-atomics are not ABI compatible if atomic variables cross the ABI boundary. For context, the difference between __sync and __atomic is that the former are required to be lock-free, while the latter requires a shared global lock provided by a shared object library. See https://llvm.org/docs/Atomics.html#libcalls-atomic for a detailed discussion on the topic. This target feature will be used by Rust's riscv32i target family to support the use of atomic load/store without atomic RMW/CAS. Differential Revision: https://reviews.llvm.org/D130621
2022-07-27 11:29:54 +02:00 · 2022-07-27 11:29:54 +02:00 · f5ed0cb217
parent 6da3f90195
commit f5ed0cb217
6 changed files with 3697 additions and 17 deletions
--- a/llvm/docs/Atomics.rst
+++ b/llvm/docs/Atomics.rst
@ -581,6 +581,13 @@ case. The only common architecture without that property is SPARC -- SPARCV8 SMP
 systems were common, yet it doesn't support any sort of compare-and-swap
 operation.

+Some targets (like RISCV) support a ``+forced-atomics`` target feature, which
+enables the use of lock-free atomics even if LLVM is not aware of any specific
+OS support for them. In this case, the user is responsible for ensuring that
+necessary ``__sync_*`` implementations are available. Code using
+``+forced-atomics`` is ABI-incompatible with code not using the feature, if
+atomic variables cross the ABI boundary.
+
 In either of these cases, the Target in LLVM can claim support for atomics of an
 appropriate size, and then implement some subset of the operations via libcalls
 to a ``__sync_*`` function. Such functions *must* not use locks in their
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@ -481,6 +481,17 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
                                   "SiFive 7-Series processors",
                                   [TuneNoDefaultUnroll]>;

+// Assume that lock-free native-width atomics are available, even if the target
+// and operating system combination would not usually provide them. The user
+// is responsible for providing any necessary __sync implementations. Code
+// built with this feature is not ABI-compatible with code built without this
+// feature, if atomic variables are exposed across the ABI boundary.
+def FeatureForcedAtomics : SubtargetFeature<
+    "forced-atomics", "HasForcedAtomics", "true",
+    "Assume that lock-free native-width atomics are available">;
+def HasAtomicLdSt
+    : Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;
+
 //===----------------------------------------------------------------------===//
 // Named operands for CSR instructions.
 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@ -411,6 +411,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
  if (Subtarget.hasStdExtA()) {
    setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
    setMinCmpXchgSizeInBits(32);
+  } else if (Subtarget.hasForcedAtomics()) {
+    setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
  } else {
    setMaxAtomicSizeInBitsSupported(0);
  }
@ -929,6 +931,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
    }
  }

+  if (Subtarget.hasForcedAtomics()) {
+    // Set atomic rmw/cas operations to expand to force __sync libcalls.
+    setOperationAction(
+        {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
+         ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
+         ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
+         ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
+        XLenVT, Expand);
+  }
+
  // Function alignments.
  const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
  setMinFunctionAlignment(FunctionAlignment);
@ -12286,6 +12298,10 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
  if (AI->isFloatingPointOperation())
    return AtomicExpansionKind::CmpXChg;

+  // Don't expand forced atomics, we want to have __sync libcalls instead.
+  if (Subtarget.hasForcedAtomics())
+    return AtomicExpansionKind::None;
+
  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
  if (Size == 8 || Size == 16)
    return AtomicExpansionKind::MaskedIntrinsic;
@ -12389,6 +12405,10 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
 TargetLowering::AtomicExpansionKind
 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
    AtomicCmpXchgInst *CI) const {
+  // Don't expand forced atomics, we want to have __sync libcalls instead.
+  if (Subtarget.hasForcedAtomics())
+    return AtomicExpansionKind::None;
+
  unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
  if (Size == 8 || Size == 16)
    return AtomicExpansionKind::MaskedIntrinsic;
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@ -105,20 +105,25 @@ defm AMOMAXU_D  : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">,
 // Pseudo-instructions and codegen patterns
 //===----------------------------------------------------------------------===//

-let Predicates = [HasStdExtA] in {
-
-/// Atomic loads and stores
-
+// Atomic load/store are available under both +a and +force-atomics.
 // Fences will be inserted for atomic load/stores according to the logic in
 // RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
+let Predicates = [HasAtomicLdSt] in {
+  defm : LdPat<atomic_load_8,  LB>;
+  defm : LdPat<atomic_load_16, LH>;
+  defm : LdPat<atomic_load_32, LW>;

-defm : LdPat<atomic_load_8,  LB>;
-defm : LdPat<atomic_load_16, LH>;
-defm : LdPat<atomic_load_32, LW>;
+  defm : AtomicStPat<atomic_store_8,  SB, GPR>;
+  defm : AtomicStPat<atomic_store_16, SH, GPR>;
+  defm : AtomicStPat<atomic_store_32, SW, GPR>;
+}

-defm : AtomicStPat<atomic_store_8,  SB, GPR>;
-defm : AtomicStPat<atomic_store_16, SH, GPR>;
-defm : AtomicStPat<atomic_store_32, SW, GPR>;
+let Predicates = [HasAtomicLdSt, IsRV64] in {
+  defm : LdPat<atomic_load_64, LD, i64>;
+  defm : AtomicStPat<atomic_store_64, SD, GPR, i64>;
+}
+
+let Predicates = [HasStdExtA] in {

 /// AMOs

@ -304,13 +309,6 @@ def : Pat<(int_riscv_masked_cmpxchg_i32

 let Predicates = [HasStdExtA, IsRV64] in {

-/// 64-bit atomic loads and stores
-
-// Fences will be inserted for atomic load/stores according to the logic in
-// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
-defm : LdPat<atomic_load_64, LD, i64>;
-defm : AtomicStPat<atomic_store_64, SD, GPR, i64>;
-
 defm : AMOPat<"atomic_swap_64", "AMOSWAP_D">;
 defm : AMOPat<"atomic_load_add_64", "AMOADD_D">;
 defm : AMOPat<"atomic_load_and_64", "AMOAND_D">;
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@ -98,6 +98,7 @@ private:
  bool EnableSaveRestore = false;
  bool EnableUnalignedScalarMem = false;
  bool HasLUIADDIFusion = false;
+  bool HasForcedAtomics = false;
  unsigned XLen = 32;
  unsigned ZvlLen = 0;
  MVT XLenVT = MVT::i32;
@ -194,6 +195,7 @@ public:
  bool enableSaveRestore() const { return EnableSaveRestore; }
  bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
  bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
+  bool hasForcedAtomics() const { return HasForcedAtomics; }
  MVT getXLenVT() const { return XLenVT; }
  unsigned getXLen() const { return XLen; }
  unsigned getFLen() const {
--- a/llvm/test/CodeGen/RISCV/forced-atomics.ll
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll