[RISCV] Add target feature to force-enable atomics

This adds a +forced-atomics target feature with the same semantics
as +atomics-32 on ARM (D130480). For RISCV targets without the +a
extension, this forces LLVM to assume that lock-free atomics
(up to 32/64 bits for riscv32/64 respectively) are available.

This means that atomic load/store are lowered to a simple load/store
(and fence as necessary), as these are guaranteed to be atomic
(as long as they're aligned). Atomic RMW/CAS are lowered to __sync
(rather than __atomic) libcalls. Responsibility for providing the
__sync libcalls lies with the user (for privileged single-core code
they can be implemented by disabling interrupts). Code using
+forced-atomics and -forced-atomics are not ABI compatible if atomic
variables cross the ABI boundary.

For context, the difference between __sync and __atomic is that the
former are required to be lock-free, while the latter requires a
shared global lock provided by a shared object library. See
https://llvm.org/docs/Atomics.html#libcalls-atomic for a detailed
discussion on the topic.

This target feature will be used by Rust's riscv32i target family
to support the use of atomic load/store without atomic RMW/CAS.

Differential Revision: https://reviews.llvm.org/D130621
This commit is contained in:
Nikita Popov 2022-07-27 11:29:54 +02:00
parent 6da3f90195
commit f5ed0cb217
6 changed files with 3697 additions and 17 deletions

View File

@ -581,6 +581,13 @@ case. The only common architecture without that property is SPARC -- SPARCV8 SMP
systems were common, yet it doesn't support any sort of compare-and-swap
operation.
Some targets (like RISCV) support a ``+forced-atomics`` target feature, which
enables the use of lock-free atomics even if LLVM is not aware of any specific
OS support for them. In this case, the user is responsible for ensuring that
necessary ``__sync_*`` implementations are available. Code using
``+forced-atomics`` is ABI-incompatible with code not using the feature, if
atomic variables cross the ABI boundary.
In either of these cases, the Target in LLVM can claim support for atomics of an
appropriate size, and then implement some subset of the operations via libcalls
to a ``__sync_*`` function. Such functions *must* not use locks in their

View File

@ -481,6 +481,17 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
"SiFive 7-Series processors",
[TuneNoDefaultUnroll]>;
// Assume that lock-free native-width atomics are available, even if the target
// and operating system combination would not usually provide them. The user
// is responsible for providing any necessary __sync implementations. Code
// built with this feature is not ABI-compatible with code built without this
// feature, if atomic variables are exposed across the ABI boundary.
def FeatureForcedAtomics : SubtargetFeature<
"forced-atomics", "HasForcedAtomics", "true",
"Assume that lock-free native-width atomics are available">;
def HasAtomicLdSt
: Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;
//===----------------------------------------------------------------------===//
// Named operands for CSR instructions.
//===----------------------------------------------------------------------===//

View File

@ -411,6 +411,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
} else if (Subtarget.hasForcedAtomics()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
} else {
setMaxAtomicSizeInBitsSupported(0);
}
@ -929,6 +931,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
if (Subtarget.hasForcedAtomics()) {
// Set atomic rmw/cas operations to expand to force __sync libcalls.
setOperationAction(
{ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
XLenVT, Expand);
}
// Function alignments.
const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
setMinFunctionAlignment(FunctionAlignment);
@ -12286,6 +12298,10 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;
// Don't expand forced atomics, we want to have __sync libcalls instead.
if (Subtarget.hasForcedAtomics())
return AtomicExpansionKind::None;
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
@ -12389,6 +12405,10 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
TargetLowering::AtomicExpansionKind
RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *CI) const {
// Don't expand forced atomics, we want to have __sync libcalls instead.
if (Subtarget.hasForcedAtomics())
return AtomicExpansionKind::None;
unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;

View File

@ -105,20 +105,25 @@ defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">,
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtA] in {
/// Atomic loads and stores
// Atomic load/store are available under both +a and +force-atomics.
// Fences will be inserted for atomic load/stores according to the logic in
// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
let Predicates = [HasAtomicLdSt] in {
defm : LdPat<atomic_load_8, LB>;
defm : LdPat<atomic_load_16, LH>;
defm : LdPat<atomic_load_32, LW>;
defm : LdPat<atomic_load_8, LB>;
defm : LdPat<atomic_load_16, LH>;
defm : LdPat<atomic_load_32, LW>;
defm : AtomicStPat<atomic_store_8, SB, GPR>;
defm : AtomicStPat<atomic_store_16, SH, GPR>;
defm : AtomicStPat<atomic_store_32, SW, GPR>;
}
defm : AtomicStPat<atomic_store_8, SB, GPR>;
defm : AtomicStPat<atomic_store_16, SH, GPR>;
defm : AtomicStPat<atomic_store_32, SW, GPR>;
let Predicates = [HasAtomicLdSt, IsRV64] in {
defm : LdPat<atomic_load_64, LD, i64>;
defm : AtomicStPat<atomic_store_64, SD, GPR, i64>;
}
let Predicates = [HasStdExtA] in {
/// AMOs
@ -304,13 +309,6 @@ def : Pat<(int_riscv_masked_cmpxchg_i32
let Predicates = [HasStdExtA, IsRV64] in {
/// 64-bit atomic loads and stores
// Fences will be inserted for atomic load/stores according to the logic in
// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
defm : LdPat<atomic_load_64, LD, i64>;
defm : AtomicStPat<atomic_store_64, SD, GPR, i64>;
defm : AMOPat<"atomic_swap_64", "AMOSWAP_D">;
defm : AMOPat<"atomic_load_add_64", "AMOADD_D">;
defm : AMOPat<"atomic_load_and_64", "AMOAND_D">;

View File

@ -98,6 +98,7 @@ private:
bool EnableSaveRestore = false;
bool EnableUnalignedScalarMem = false;
bool HasLUIADDIFusion = false;
bool HasForcedAtomics = false;
unsigned XLen = 32;
unsigned ZvlLen = 0;
MVT XLenVT = MVT::i32;
@ -194,6 +195,7 @@ public:
bool enableSaveRestore() const { return EnableSaveRestore; }
bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
bool hasForcedAtomics() const { return HasForcedAtomics; }
MVT getXLenVT() const { return XLenVT; }
unsigned getXLen() const { return XLen; }
unsigned getFLen() const {

File diff suppressed because it is too large Load Diff