forked from OSchip/llvm-project
[ARM] Fix ARM backend to correctly use atomic expansion routines.
Without this patch, clang would generate calls to __sync_* routines on targets where it does not make sense; we can't assume the routines exist on unknown targets. Linux has special implementations of the routines that work on old ARM targets; other targets have no such routines. In general, atomics operations which aren't natively supported should go through libatomic (__atomic_*) APIs, which can support arbitrary atomics through locks. ARM targets older than v6, where this patch makes a difference, are rare in practice, but not completely extinct. See, for example, discussion on D116088. This also affects Cortex-M0, but I don't think __sync_* routines actually exist in any Cortex-M0 libraries. So in practice this just leads to a slightly different linker error for those cases, I think. Mechanically, this patch does the following: - Ensures we run atomic expansion unconditionally; it never makes sense to completely skip it. - Fixes getMaxAtomicSizeInBitsSupported() so it returns an appropriate number on all ARM subtargets. - Fixes shouldExpandAtomicRMWInIR() and shouldExpandAtomicCmpXchgInIR() to correctly handle subtargets that don't have atomic instructions. Differential Revision: https://reviews.llvm.org/D120026
This commit is contained in:
parent
3ac84c430b
commit
2f497ec3a0
|
@ -1369,6 +1369,29 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
}
|
||||
}
|
||||
|
||||
// Compute supported atomic widths.
|
||||
if (Subtarget->isTargetLinux() ||
|
||||
(!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
|
||||
// For targets where __sync_* routines are reliably available, we use them
|
||||
// if necessary.
|
||||
//
|
||||
// ARM Linux always supports 64-bit atomics through kernel-assisted atomic
|
||||
// routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
|
||||
//
|
||||
// ARMv6 targets have native instructions in ARM mode. For Thumb mode,
|
||||
// such targets should provide __sync_* routines, which use the ARM mode
|
||||
// instructions. (ARMv6 doesn't have dmb, but it has an equivalent
|
||||
// encoding; see ARMISD::MEMBARRIER_MCR.)
|
||||
setMaxAtomicSizeInBitsSupported(64);
|
||||
} else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) {
|
||||
// Cortex-M (besides Cortex-M0) have 32-bit atomics.
|
||||
setMaxAtomicSizeInBitsSupported(32);
|
||||
} else {
|
||||
// We can't assume anything about other targets; just use libatomic
|
||||
// routines.
|
||||
setMaxAtomicSizeInBitsSupported(0);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
|
||||
|
||||
// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
|
||||
|
@ -20978,19 +21001,25 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
|||
if (AI->isFloatingPointOperation())
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
|
||||
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
|
||||
// implement atomicrmw without spilling. If the target address is also on the
|
||||
// stack and close enough to the spill slot, this can lead to a situation
|
||||
// where the monitor always gets cleared and the atomic operation can never
|
||||
// succeed. So at -O0 lower this operation to a CAS loop.
|
||||
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
|
||||
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
|
||||
bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
|
||||
return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
|
||||
? AtomicExpansionKind::LLSC
|
||||
: AtomicExpansionKind::None;
|
||||
bool hasAtomicRMW;
|
||||
if (Subtarget->isMClass())
|
||||
hasAtomicRMW = Subtarget->hasV8MBaselineOps();
|
||||
else if (Subtarget->isThumb())
|
||||
hasAtomicRMW = Subtarget->hasV7Ops();
|
||||
else
|
||||
hasAtomicRMW = Subtarget->hasV6Ops();
|
||||
if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
|
||||
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
|
||||
// implement atomicrmw without spilling. If the target address is also on
|
||||
// the stack and close enough to the spill slot, this can lead to a
|
||||
// situation where the monitor always gets cleared and the atomic operation
|
||||
// can never succeed. So at -O0 lower this operation to a CAS loop.
|
||||
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
return AtomicExpansionKind::LLSC;
|
||||
}
|
||||
return AtomicExpansionKind::None;
|
||||
}
|
||||
|
||||
// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
|
||||
|
@ -21003,8 +21032,13 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
|
|||
// situation where the monitor always gets cleared and the atomic operation
|
||||
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
|
||||
unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
|
||||
bool HasAtomicCmpXchg =
|
||||
!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
|
||||
bool HasAtomicCmpXchg;
|
||||
if (Subtarget->isMClass())
|
||||
HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
|
||||
else if (Subtarget->isThumb())
|
||||
HasAtomicCmpXchg = Subtarget->hasV7Ops();
|
||||
else
|
||||
HasAtomicCmpXchg = Subtarget->hasV6Ops();
|
||||
if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
|
||||
Size <= (Subtarget->isMClass() ? 32U : 64U))
|
||||
return AtomicExpansionKind::LLSC;
|
||||
|
|
|
@ -411,8 +411,6 @@ bool ARMSubtarget::enablePostRAMachineScheduler() const {
|
|||
return !isThumb1Only();
|
||||
}
|
||||
|
||||
bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
|
||||
|
||||
bool ARMSubtarget::useStride4VFPs() const {
|
||||
// For general targets, the prologue can grow when VFPs are allocated with
|
||||
// stride 4 (more vpush instructions). But WatchOS uses a compact unwind
|
||||
|
|
|
@ -478,9 +478,6 @@ public:
|
|||
/// scheduling, DAGCombine, etc.).
|
||||
bool useAA() const override { return true; }
|
||||
|
||||
// enableAtomicExpand- True if we need to expand our atomics.
|
||||
bool enableAtomicExpand() const override;
|
||||
|
||||
/// getInstrItins - Return the instruction itineraries based on subtarget
|
||||
/// selection.
|
||||
const InstrItineraryData *getInstrItineraryData() const override {
|
||||
|
|
|
@ -30,7 +30,7 @@ define i64 @test1(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_fetch_and_add_8
|
||||
; CHECK-M: __atomic_fetch_add_8
|
||||
|
||||
%r = atomicrmw add i64* %ptr, i64 %val seq_cst
|
||||
ret i64 %r
|
||||
|
@ -61,7 +61,7 @@ define i64 @test2(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_fetch_and_sub_8
|
||||
; CHECK-M: __atomic_fetch_sub_8
|
||||
|
||||
%r = atomicrmw sub i64* %ptr, i64 %val seq_cst
|
||||
ret i64 %r
|
||||
|
@ -92,7 +92,7 @@ define i64 @test3(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_fetch_and_and_8
|
||||
; CHECK-M: _atomic_fetch_and_8
|
||||
|
||||
%r = atomicrmw and i64* %ptr, i64 %val seq_cst
|
||||
ret i64 %r
|
||||
|
@ -123,7 +123,7 @@ define i64 @test4(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_fetch_and_or_8
|
||||
; CHECK-M: __atomic_fetch_or_8
|
||||
|
||||
%r = atomicrmw or i64* %ptr, i64 %val seq_cst
|
||||
ret i64 %r
|
||||
|
@ -154,7 +154,7 @@ define i64 @test5(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_fetch_and_xor_8
|
||||
; CHECK-M: __atomic_fetch_xor_8
|
||||
|
||||
%r = atomicrmw xor i64* %ptr, i64 %val seq_cst
|
||||
ret i64 %r
|
||||
|
@ -177,7 +177,7 @@ define i64 @test6(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_lock_test_and_set_8
|
||||
; CHECK-M: __atomic_exchange_8
|
||||
|
||||
%r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
|
||||
ret i64 %r
|
||||
|
@ -213,7 +213,7 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
|
|||
; CHECK-THUMB: beq
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_val_compare_and_swap_8
|
||||
; CHECK-M: __atomic_compare_exchange_8
|
||||
|
||||
%pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
|
||||
%r = extractvalue { i64, i1 } %pair, 0
|
||||
|
@ -237,7 +237,7 @@ define i64 @test8(i64* %ptr) {
|
|||
; CHECK-THUMB-NOT: strexd
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_val_compare_and_swap_8
|
||||
; CHECK-M: __atomic_load_8
|
||||
|
||||
%r = load atomic i64, i64* %ptr seq_cst, align 8
|
||||
ret i64 %r
|
||||
|
@ -263,7 +263,7 @@ define void @test9(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_lock_test_and_set_8
|
||||
; CHECK-M: __atomic_store_8
|
||||
|
||||
store atomic i64 %val, i64* %ptr seq_cst, align 8
|
||||
ret void
|
||||
|
@ -308,7 +308,7 @@ define i64 @test10(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_fetch_and_min_8
|
||||
; CHECK-M: __atomic_compare_exchange_8
|
||||
|
||||
%r = atomicrmw min i64* %ptr, i64 %val seq_cst
|
||||
ret i64 %r
|
||||
|
@ -353,7 +353,7 @@ define i64 @test11(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_fetch_and_umin_8
|
||||
; CHECK-M: __atomic_compare_exchange_8
|
||||
|
||||
%r = atomicrmw umin i64* %ptr, i64 %val seq_cst
|
||||
ret i64 %r
|
||||
|
@ -398,7 +398,7 @@ define i64 @test12(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_fetch_and_max_8
|
||||
; CHECK-M: __atomic_compare_exchange_8
|
||||
|
||||
%r = atomicrmw max i64* %ptr, i64 %val seq_cst
|
||||
ret i64 %r
|
||||
|
@ -443,7 +443,7 @@ define i64 @test13(i64* %ptr, i64 %val) {
|
|||
; CHECK-THUMB: bne
|
||||
; CHECK-THUMB: dmb {{ish$}}
|
||||
|
||||
; CHECK-M: __sync_fetch_and_umax_8
|
||||
; CHECK-M: __atomic_compare_exchange_8
|
||||
|
||||
%r = atomicrmw umax i64* %ptr, i64 %val seq_cst
|
||||
ret i64 %r
|
||||
|
|
|
@ -94,14 +94,14 @@ define void @test4(i8* %ptr1, i8* %ptr2) {
|
|||
|
||||
define i64 @test_old_load_64bit(i64* %p) {
|
||||
; ARMV4-LABEL: test_old_load_64bit
|
||||
; ARMV4: ___sync_val_compare_and_swap_8
|
||||
; ARMV4: ___atomic_load_8
|
||||
%1 = load atomic i64, i64* %p seq_cst, align 8
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define void @test_old_store_64bit(i64* %p, i64 %v) {
|
||||
; ARMV4-LABEL: test_old_store_64bit
|
||||
; ARMV4: ___sync_lock_test_and_set_8
|
||||
; ARMV4: ___atomic_store_8
|
||||
store atomic i64 %v, i64* %p seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ entry:
|
|||
; CHECK: add
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_add_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_add_4
|
||||
; CHECK-T1-M0: bl ___atomic_fetch_add_4
|
||||
; CHECK-BAREMETAL: add
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%0 = atomicrmw add i32* %val1, i32 %tmp monotonic
|
||||
|
@ -41,7 +41,7 @@ entry:
|
|||
; CHECK: sub
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_sub_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_sub_4
|
||||
; CHECK-T1-M0: bl ___atomic_fetch_sub_4
|
||||
; CHECK-BAREMETAL: sub
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%1 = atomicrmw sub i32* %val2, i32 30 monotonic
|
||||
|
@ -51,7 +51,7 @@ entry:
|
|||
; CHECK: add
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_add_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_add_4
|
||||
; CHECK-T1-M0: bl ___atomic_fetch_add_4
|
||||
; CHECK-BAREMETAL: add
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%2 = atomicrmw add i32* %val2, i32 1 monotonic
|
||||
|
@ -61,7 +61,7 @@ entry:
|
|||
; CHECK: sub
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_sub_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_sub_4
|
||||
; CHECK-T1-M0: bl ___atomic_fetch_sub_4
|
||||
; CHECK-BAREMETAL: sub
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%3 = atomicrmw sub i32* %val2, i32 1 monotonic
|
||||
|
@ -71,7 +71,7 @@ entry:
|
|||
; CHECK: and
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_and_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_and_4
|
||||
; CHECK-T1-M0: bl ___atomic_fetch_and_4
|
||||
; CHECK-BAREMETAL: and
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%4 = atomicrmw and i32* %andt, i32 4080 monotonic
|
||||
|
@ -81,7 +81,7 @@ entry:
|
|||
; CHECK: or
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_or_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_or_4
|
||||
; CHECK-T1-M0: bl ___atomic_fetch_or_4
|
||||
; CHECK-BAREMETAL: or
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%5 = atomicrmw or i32* %ort, i32 4080 monotonic
|
||||
|
@ -91,7 +91,7 @@ entry:
|
|||
; CHECK: eor
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_xor_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_xor_4
|
||||
; CHECK-T1-M0: bl ___atomic_fetch_xor_4
|
||||
; CHECK-BAREMETAL: eor
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%6 = atomicrmw xor i32* %xort, i32 4080 monotonic
|
||||
|
@ -101,7 +101,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_min_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_min_4
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_4
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%7 = atomicrmw min i32* %val2, i32 16 monotonic
|
||||
|
@ -112,7 +112,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_min_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_min_4
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_4
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%8 = atomicrmw min i32* %val2, i32 %neg monotonic
|
||||
|
@ -122,7 +122,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_max_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_max_4
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_4
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%9 = atomicrmw max i32* %val2, i32 1 monotonic
|
||||
|
@ -133,7 +133,7 @@ entry:
|
|||
; CHECK-NOT: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_max_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_max_4
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_4
|
||||
; CHECK-BAREMETAL: bic
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%10 = atomicrmw max i32* %val2, i32 0 monotonic
|
||||
|
@ -143,7 +143,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umin_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umin_4
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_4
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%11 = atomicrmw umin i32* %val2, i32 16 monotonic
|
||||
|
@ -154,7 +154,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umin_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umin_4
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_4
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
|
||||
|
@ -164,7 +164,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umax_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umax_4
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_4
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%13 = atomicrmw umax i32* %val2, i32 1 monotonic
|
||||
|
@ -174,7 +174,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umax_4
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umax_4
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_4
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%14 = atomicrmw umax i32* %val2, i32 0 monotonic
|
||||
|
@ -192,7 +192,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umin_2
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umin_2
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_2
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%0 = atomicrmw umin i16* %val, i16 16 monotonic
|
||||
|
@ -202,7 +202,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umin_2
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umin_2
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_2
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%1 = atomicrmw umin i16* %val, i16 %uneg monotonic
|
||||
|
@ -211,7 +211,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umax_2
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umax_2
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_2
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%2 = atomicrmw umax i16* %val, i16 1 monotonic
|
||||
|
@ -220,7 +220,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umax_2
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umax_2
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_2
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%3 = atomicrmw umax i16* %val, i16 0 monotonic
|
||||
|
@ -237,7 +237,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umin_1
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_1
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%0 = atomicrmw umin i8* %val, i8 16 monotonic
|
||||
|
@ -246,7 +246,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umin_1
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_1
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%uneg = sub i8 0, 1
|
||||
|
@ -256,7 +256,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umax_1
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umax_1
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_1
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%2 = atomicrmw umax i8* %val, i8 1 monotonic
|
||||
|
@ -265,7 +265,7 @@ entry:
|
|||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
; CHECK-T1: bl ___sync_fetch_and_umax_1
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umax_1
|
||||
; CHECK-T1-M0: bl ___atomic_compare_exchange_1
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%3 = atomicrmw umax i8* %val, i8 0 monotonic
|
||||
|
@ -360,10 +360,8 @@ define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind {
|
|||
; CHECK: dmb
|
||||
; CHECK: add r0,
|
||||
|
||||
; CHECK-T1-M0: ldr {{r[0-9]}}, [r0]
|
||||
; CHECK-T1-M0: dmb
|
||||
; CHECK-T1-M0: ldr {{r[0-9]}}, [r1]
|
||||
; CHECK-T1-M0: dmb
|
||||
; CHECK-T1-M0: __atomic_load_4
|
||||
; CHECK-T1-M0: __atomic_load_4
|
||||
|
||||
; CHECK-T1: ___sync_val_compare_and_swap_4
|
||||
; CHECK-T1: ___sync_val_compare_and_swap_4
|
||||
|
@ -390,10 +388,8 @@ define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) {
|
|||
; CHECK-T1: ___sync_lock_test_and_set
|
||||
; CHECK-T1: ___sync_lock_test_and_set
|
||||
|
||||
; CHECK-T1-M0: dmb
|
||||
; CHECK-T1-M0: str r1, [r0]
|
||||
; CHECK-T1-M0: dmb
|
||||
; CHECK-T1-M0: str r3, [r2]
|
||||
; CHECK-T1-M0: __atomic_store_4
|
||||
; CHECK-T1-M0: __atomic_store_4
|
||||
|
||||
; CHECK-BAREMETAL-NOT: dmb
|
||||
; CHECK-BAREMETAL: str r1, [r0]
|
||||
|
@ -413,9 +409,9 @@ define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) {
|
|||
; CHECK: dmb
|
||||
; CHECK: str [[R0]], [r1]
|
||||
|
||||
; CHECK-T1-M0: ldr [[R0:r[0-9]]], [r0]
|
||||
; CHECK-T1-M0: __atomic_load_4
|
||||
; CHECK-T1-M0: dmb
|
||||
; CHECK-T1-M0: str [[R0]], [r1]
|
||||
; CHECK-T1-M0: __atomic_store_4
|
||||
|
||||
; CHECK-T1: ldr [[R0:r[0-9]]], [{{r[0-9]+}}]
|
||||
; CHECK-T1: {{dmb|bl ___sync_synchronize}}
|
||||
|
|
|
@ -71,7 +71,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
|
|||
|
||||
define void @test_atomic_load_add_i64(i64 %offset) nounwind {
|
||||
; CHECK-LABEL: test_atomic_load_add_i64:
|
||||
; CHECK: bl __sync_fetch_and_add_8
|
||||
; CHECK: bl __atomic_fetch_add_8
|
||||
%old = atomicrmw add i64* @var64, i64 %offset monotonic
|
||||
store i64 %old, i64* @var64
|
||||
ret void
|
||||
|
|
|
@ -355,7 +355,7 @@ define i64 @test_xchg_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_lock_test_and_set_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_exchange_8
|
||||
entry:
|
||||
%0 = atomicrmw xchg i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
@ -366,7 +366,7 @@ define i64 @test_add_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_fetch_and_add_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_fetch_add_8
|
||||
entry:
|
||||
%0 = atomicrmw add i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
@ -377,7 +377,7 @@ define i64 @test_sub_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_fetch_and_sub_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_fetch_sub_8
|
||||
entry:
|
||||
%0 = atomicrmw sub i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
@ -388,7 +388,7 @@ define i64 @test_and_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_fetch_and_and_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_fetch_and_8
|
||||
entry:
|
||||
%0 = atomicrmw and i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
@ -399,7 +399,7 @@ define i64 @test_nand_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_fetch_and_nand_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_fetch_nand_8
|
||||
entry:
|
||||
%0 = atomicrmw nand i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
@ -410,7 +410,7 @@ define i64 @test_or_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_fetch_and_or_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_fetch_or_8
|
||||
entry:
|
||||
%0 = atomicrmw or i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
@ -421,7 +421,7 @@ define i64 @test_xor_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_fetch_and_xor_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_fetch_xor_8
|
||||
entry:
|
||||
%0 = atomicrmw xor i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
@ -433,7 +433,7 @@ define i64 @test_max_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_fetch_and_max_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_compare_exchange_8
|
||||
entry:
|
||||
%0 = atomicrmw max i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
@ -444,7 +444,7 @@ define i64 @test_min_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_fetch_and_min_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_compare_exchange_8
|
||||
entry:
|
||||
%0 = atomicrmw min i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
@ -455,7 +455,7 @@ define i64 @test_umax_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_fetch_and_umax_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_compare_exchange_8
|
||||
entry:
|
||||
%0 = atomicrmw umax i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
@ -466,7 +466,7 @@ define i64 @test_umin_i64() {
|
|||
; EXPAND64-NOT: str
|
||||
; EXPAND64: strexd
|
||||
; THUMB1: bl __sync_fetch_and_umin_8
|
||||
; BASELINE64: bl __sync_val_compare_and_swap_8
|
||||
; BASELINE64: bl __atomic_compare_exchange_8
|
||||
entry:
|
||||
%0 = atomicrmw umin i64* @atomic_i64, i64 1 monotonic
|
||||
ret i64 %0
|
||||
|
|
Loading…
Reference in New Issue