From 2f497ec3a0056f15727ee6008211aeb2c4a8f455 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 17 Feb 2022 01:24:16 -0800 Subject: [PATCH] [ARM] Fix ARM backend to correctly use atomic expansion routines. Without this patch, clang would generate calls to __sync_* routines on targets where it does not make sense; we can't assume the routines exist on unknown targets. Linux has special implementations of the routines that work on old ARM targets; other targets have no such routines. In general, atomics operations which aren't natively supported should go through libatomic (__atomic_*) APIs, which can support arbitrary atomics through locks. ARM targets older than v6, where this patch makes a difference, are rare in practice, but not completely extinct. See, for example, discussion on D116088. This also affects Cortex-M0, but I don't think __sync_* routines actually exist in any Cortex-M0 libraries. So in practice this just leads to a slightly different linker error for those cases, I think. Mechanically, this patch does the following: - Ensures we run atomic expansion unconditionally; it never makes sense to completely skip it. - Fixes getMaxAtomicSizeInBitsSupported() so it returns an appropriate number on all ARM subtargets. - Fixes shouldExpandAtomicRMWInIR() and shouldExpandAtomicCmpXchgInIR() to correctly handle subtargets that don't have atomic instructions. Differential Revision: https://reviews.llvm.org/D120026 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 62 ++++++++++++++----- llvm/lib/Target/ARM/ARMSubtarget.cpp | 2 - llvm/lib/Target/ARM/ARMSubtarget.h | 3 - llvm/test/CodeGen/ARM/atomic-64bit.ll | 26 ++++---- llvm/test/CodeGen/ARM/atomic-load-store.ll | 4 +- llvm/test/CodeGen/ARM/atomic-op.ll | 62 +++++++++---------- llvm/test/CodeGen/ARM/atomic-ops-m33.ll | 2 +- .../ARM/atomicrmw_exclusive_monitor_ints.ll | 22 +++---- 8 files changed, 104 insertions(+), 79 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index cdf1cbf183aa..11c3c3192eb3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1369,6 +1369,29 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } } + // Compute supported atomic widths. + if (Subtarget->isTargetLinux() || + (!Subtarget->isMClass() && Subtarget->hasV6Ops())) { + // For targets where __sync_* routines are reliably available, we use them + // if necessary. + // + // ARM Linux always supports 64-bit atomics through kernel-assisted atomic + // routines (kernel 3.1 or later). FIXME: Not with compiler-rt? + // + // ARMv6 targets have native instructions in ARM mode. For Thumb mode, + // such targets should provide __sync_* routines, which use the ARM mode + // instructions. (ARMv6 doesn't have dmb, but it has an equivalent + // encoding; see ARMISD::MEMBARRIER_MCR.) + setMaxAtomicSizeInBitsSupported(64); + } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) { + // Cortex-M (besides Cortex-M0) have 32-bit atomics. + setMaxAtomicSizeInBitsSupported(32); + } else { + // We can't assume anything about other targets; just use libatomic + // routines. + setMaxAtomicSizeInBitsSupported(0); + } + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. @@ -20978,19 +21001,25 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { if (AI->isFloatingPointOperation()) return AtomicExpansionKind::CmpXChg; - // At -O0, fast-regalloc cannot cope with the live vregs necessary to - // implement atomicrmw without spilling. If the target address is also on the - // stack and close enough to the spill slot, this can lead to a situation - // where the monitor always gets cleared and the atomic operation can never - // succeed. So at -O0 lower this operation to a CAS loop. - if (getTargetMachine().getOptLevel() == CodeGenOpt::None) - return AtomicExpansionKind::CmpXChg; - unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); - return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) - ? AtomicExpansionKind::LLSC - : AtomicExpansionKind::None; + bool hasAtomicRMW; + if (Subtarget->isMClass()) + hasAtomicRMW = Subtarget->hasV8MBaselineOps(); + else if (Subtarget->isThumb()) + hasAtomicRMW = Subtarget->hasV7Ops(); + else + hasAtomicRMW = Subtarget->hasV6Ops(); + if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) { + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on + // the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic operation + // can never succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + return AtomicExpansionKind::LLSC; + } + return AtomicExpansionKind::None; } // Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32 @@ -21003,8 +21032,13 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits(); - bool HasAtomicCmpXchg = - !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); + bool HasAtomicCmpXchg; + if (Subtarget->isMClass()) + HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps(); + else if (Subtarget->isThumb()) + HasAtomicCmpXchg = Subtarget->hasV7Ops(); + else + HasAtomicCmpXchg = Subtarget->hasV6Ops(); if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U)) return AtomicExpansionKind::LLSC; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index f1040ee8c790..b62f447e8d58 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -411,8 +411,6 @@ bool ARMSubtarget::enablePostRAMachineScheduler() const { return !isThumb1Only(); } -bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } - bool ARMSubtarget::useStride4VFPs() const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index b9c6245179a9..d426157c5453 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -478,9 +478,6 @@ public: /// scheduling, DAGCombine, etc.). bool useAA() const override { return true; } - // enableAtomicExpand- True if we need to expand our atomics. - bool enableAtomicExpand() const override; - /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. const InstrItineraryData *getInstrItineraryData() const override { diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll index eadefcd23bc6..f000b8a82188 100644 --- a/llvm/test/CodeGen/ARM/atomic-64bit.ll +++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll @@ -30,7 +30,7 @@ define i64 @test1(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_add_8 +; CHECK-M: __atomic_fetch_add_8 %r = atomicrmw add i64* %ptr, i64 %val seq_cst ret i64 %r @@ -61,7 +61,7 @@ define i64 @test2(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_sub_8 +; CHECK-M: __atomic_fetch_sub_8 %r = atomicrmw sub i64* %ptr, i64 %val seq_cst ret i64 %r @@ -92,7 +92,7 @@ define i64 @test3(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_and_8 +; CHECK-M: _atomic_fetch_and_8 %r = atomicrmw and i64* %ptr, i64 %val seq_cst ret i64 %r @@ -123,7 +123,7 @@ define i64 @test4(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_or_8 +; CHECK-M: __atomic_fetch_or_8 %r = atomicrmw or i64* %ptr, i64 %val seq_cst ret i64 %r @@ -154,7 +154,7 @@ define i64 @test5(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_xor_8 +; CHECK-M: __atomic_fetch_xor_8 %r = atomicrmw xor i64* %ptr, i64 %val seq_cst ret i64 %r @@ -177,7 +177,7 @@ define i64 @test6(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_lock_test_and_set_8 +; CHECK-M: __atomic_exchange_8 %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst ret i64 %r @@ -213,7 +213,7 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK-THUMB: beq ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_val_compare_and_swap_8 +; CHECK-M: __atomic_compare_exchange_8 %pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst %r = extractvalue { i64, i1 } %pair, 0 @@ -237,7 +237,7 @@ define i64 @test8(i64* %ptr) { ; CHECK-THUMB-NOT: strexd ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_val_compare_and_swap_8 +; CHECK-M: __atomic_load_8 %r = load atomic i64, i64* %ptr seq_cst, align 8 ret i64 %r @@ -263,7 +263,7 @@ define void @test9(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_lock_test_and_set_8 +; CHECK-M: __atomic_store_8 store atomic i64 %val, i64* %ptr seq_cst, align 8 ret void @@ -308,7 +308,7 @@ define i64 @test10(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_min_8 +; CHECK-M: __atomic_compare_exchange_8 %r = atomicrmw min i64* %ptr, i64 %val seq_cst ret i64 %r @@ -353,7 +353,7 @@ define i64 @test11(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_umin_8 +; CHECK-M: __atomic_compare_exchange_8 %r = atomicrmw umin i64* %ptr, i64 %val seq_cst ret i64 %r @@ -398,7 +398,7 @@ define i64 @test12(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_max_8 +; CHECK-M: __atomic_compare_exchange_8 %r = atomicrmw max i64* %ptr, i64 %val seq_cst ret i64 %r @@ -443,7 +443,7 @@ define i64 @test13(i64* %ptr, i64 %val) { ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_umax_8 +; CHECK-M: __atomic_compare_exchange_8 %r = atomicrmw umax i64* %ptr, i64 %val seq_cst ret i64 %r diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 5db81781a7f7..576882eaaa0b 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -94,14 +94,14 @@ define void @test4(i8* %ptr1, i8* %ptr2) { define i64 @test_old_load_64bit(i64* %p) { ; ARMV4-LABEL: test_old_load_64bit -; ARMV4: ___sync_val_compare_and_swap_8 +; ARMV4: ___atomic_load_8 %1 = load atomic i64, i64* %p seq_cst, align 8 ret i64 %1 } define void @test_old_store_64bit(i64* %p, i64 %v) { ; ARMV4-LABEL: test_old_store_64bit -; ARMV4: ___sync_lock_test_and_set_8 +; ARMV4: ___atomic_store_8 store atomic i64 %v, i64* %p seq_cst, align 8 ret void } diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll index 8ab20267a18f..be4f1868b44a 100644 --- a/llvm/test/CodeGen/ARM/atomic-op.ll +++ b/llvm/test/CodeGen/ARM/atomic-op.ll @@ -31,7 +31,7 @@ entry: ; CHECK: add ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_add_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_add_4 + ; CHECK-T1-M0: bl ___atomic_fetch_add_4 ; CHECK-BAREMETAL: add ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw add i32* %val1, i32 %tmp monotonic @@ -41,7 +41,7 @@ entry: ; CHECK: sub ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_sub_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_sub_4 + ; CHECK-T1-M0: bl ___atomic_fetch_sub_4 ; CHECK-BAREMETAL: sub ; CHECK-BAREMETAL-NOT: __sync %1 = atomicrmw sub i32* %val2, i32 30 monotonic @@ -51,7 +51,7 @@ entry: ; CHECK: add ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_add_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_add_4 + ; CHECK-T1-M0: bl ___atomic_fetch_add_4 ; CHECK-BAREMETAL: add ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw add i32* %val2, i32 1 monotonic @@ -61,7 +61,7 @@ entry: ; CHECK: sub ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_sub_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_sub_4 + ; CHECK-T1-M0: bl ___atomic_fetch_sub_4 ; CHECK-BAREMETAL: sub ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw sub i32* %val2, i32 1 monotonic @@ -71,7 +71,7 @@ entry: ; CHECK: and ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_and_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_and_4 + ; CHECK-T1-M0: bl ___atomic_fetch_and_4 ; CHECK-BAREMETAL: and ; CHECK-BAREMETAL-NOT: __sync %4 = atomicrmw and i32* %andt, i32 4080 monotonic @@ -81,7 +81,7 @@ entry: ; CHECK: or ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_or_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_or_4 + ; CHECK-T1-M0: bl ___atomic_fetch_or_4 ; CHECK-BAREMETAL: or ; CHECK-BAREMETAL-NOT: __sync %5 = atomicrmw or i32* %ort, i32 4080 monotonic @@ -91,7 +91,7 @@ entry: ; CHECK: eor ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_xor_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_xor_4 + ; CHECK-T1-M0: bl ___atomic_fetch_xor_4 ; CHECK-BAREMETAL: eor ; CHECK-BAREMETAL-NOT: __sync %6 = atomicrmw xor i32* %xort, i32 4080 monotonic @@ -101,7 +101,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_min_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_min_4 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %7 = atomicrmw min i32* %val2, i32 16 monotonic @@ -112,7 +112,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_min_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_min_4 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %8 = atomicrmw min i32* %val2, i32 %neg monotonic @@ -122,7 +122,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_max_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_max_4 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %9 = atomicrmw max i32* %val2, i32 1 monotonic @@ -133,7 +133,7 @@ entry: ; CHECK-NOT: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_max_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_max_4 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_4 ; CHECK-BAREMETAL: bic ; CHECK-BAREMETAL-NOT: __sync %10 = atomicrmw max i32* %val2, i32 0 monotonic @@ -143,7 +143,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_umin_4 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %11 = atomicrmw umin i32* %val2, i32 16 monotonic @@ -154,7 +154,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_umin_4 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic @@ -164,7 +164,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_umax_4 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %13 = atomicrmw umax i32* %val2, i32 1 monotonic @@ -174,7 +174,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_4 - ; CHECK-T1-M0: bl ___sync_fetch_and_umax_4 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %14 = atomicrmw umax i32* %val2, i32 0 monotonic @@ -192,7 +192,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_2 - ; CHECK-T1-M0: bl ___sync_fetch_and_umin_2 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_2 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i16* %val, i16 16 monotonic @@ -202,7 +202,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_2 - ; CHECK-T1-M0: bl ___sync_fetch_and_umin_2 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_2 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %1 = atomicrmw umin i16* %val, i16 %uneg monotonic @@ -211,7 +211,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_2 - ; CHECK-T1-M0: bl ___sync_fetch_and_umax_2 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_2 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw umax i16* %val, i16 1 monotonic @@ -220,7 +220,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_2 - ; CHECK-T1-M0: bl ___sync_fetch_and_umax_2 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_2 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw umax i16* %val, i16 0 monotonic @@ -237,7 +237,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_1 - ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i8* %val, i8 16 monotonic @@ -246,7 +246,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_1 - ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %uneg = sub i8 0, 1 @@ -256,7 +256,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_1 - ; CHECK-T1-M0: bl ___sync_fetch_and_umax_1 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw umax i8* %val, i8 1 monotonic @@ -265,7 +265,7 @@ entry: ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_1 - ; CHECK-T1-M0: bl ___sync_fetch_and_umax_1 + ; CHECK-T1-M0: bl ___atomic_compare_exchange_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw umax i8* %val, i8 0 monotonic @@ -360,10 +360,8 @@ define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind { ; CHECK: dmb ; CHECK: add r0, -; CHECK-T1-M0: ldr {{r[0-9]}}, [r0] -; CHECK-T1-M0: dmb -; CHECK-T1-M0: ldr {{r[0-9]}}, [r1] -; CHECK-T1-M0: dmb +; CHECK-T1-M0: __atomic_load_4 +; CHECK-T1-M0: __atomic_load_4 ; CHECK-T1: ___sync_val_compare_and_swap_4 ; CHECK-T1: ___sync_val_compare_and_swap_4 @@ -390,10 +388,8 @@ define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) { ; CHECK-T1: ___sync_lock_test_and_set ; CHECK-T1: ___sync_lock_test_and_set -; CHECK-T1-M0: dmb -; CHECK-T1-M0: str r1, [r0] -; CHECK-T1-M0: dmb -; CHECK-T1-M0: str r3, [r2] +; CHECK-T1-M0: __atomic_store_4 +; CHECK-T1-M0: __atomic_store_4 ; CHECK-BAREMETAL-NOT: dmb ; CHECK-BAREMETAL: str r1, [r0] @@ -413,9 +409,9 @@ define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) { ; CHECK: dmb ; CHECK: str [[R0]], [r1] -; CHECK-T1-M0: ldr [[R0:r[0-9]]], [r0] +; CHECK-T1-M0: __atomic_load_4 ; CHECK-T1-M0: dmb -; CHECK-T1-M0: str [[R0]], [r1] +; CHECK-T1-M0: __atomic_store_4 ; CHECK-T1: ldr [[R0:r[0-9]]], [{{r[0-9]+}}] ; CHECK-T1: {{dmb|bl ___sync_synchronize}} diff --git a/llvm/test/CodeGen/ARM/atomic-ops-m33.ll b/llvm/test/CodeGen/ARM/atomic-ops-m33.ll index 474ad8960cf5..4eadded66226 100644 --- a/llvm/test/CodeGen/ARM/atomic-ops-m33.ll +++ b/llvm/test/CodeGen/ARM/atomic-ops-m33.ll @@ -71,7 +71,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { define void @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64: -; CHECK: bl __sync_fetch_and_add_8 +; CHECK: bl __atomic_fetch_add_8 %old = atomicrmw add i64* @var64, i64 %offset monotonic store i64 %old, i64* @var64 ret void diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll index 277843cb76e7..dfb4af722954 100644 --- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll @@ -355,7 +355,7 @@ define i64 @test_xchg_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_lock_test_and_set_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_exchange_8 entry: %0 = atomicrmw xchg i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -366,7 +366,7 @@ define i64 @test_add_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_add_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_fetch_add_8 entry: %0 = atomicrmw add i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -377,7 +377,7 @@ define i64 @test_sub_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_sub_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_fetch_sub_8 entry: %0 = atomicrmw sub i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -388,7 +388,7 @@ define i64 @test_and_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_and_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_fetch_and_8 entry: %0 = atomicrmw and i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -399,7 +399,7 @@ define i64 @test_nand_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_nand_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_fetch_nand_8 entry: %0 = atomicrmw nand i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -410,7 +410,7 @@ define i64 @test_or_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_or_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_fetch_or_8 entry: %0 = atomicrmw or i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -421,7 +421,7 @@ define i64 @test_xor_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_xor_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_fetch_xor_8 entry: %0 = atomicrmw xor i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -433,7 +433,7 @@ define i64 @test_max_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_max_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_compare_exchange_8 entry: %0 = atomicrmw max i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -444,7 +444,7 @@ define i64 @test_min_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_min_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_compare_exchange_8 entry: %0 = atomicrmw min i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -455,7 +455,7 @@ define i64 @test_umax_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_umax_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_compare_exchange_8 entry: %0 = atomicrmw umax i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -466,7 +466,7 @@ define i64 @test_umin_i64() { ; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_umin_8 -; BASELINE64: bl __sync_val_compare_and_swap_8 +; BASELINE64: bl __atomic_compare_exchange_8 entry: %0 = atomicrmw umin i64* @atomic_i64, i64 1 monotonic ret i64 %0