forked from OSchip/llvm-project
ARM: use acquire/release instruction variants when available.
These features (fairly) recently got split out into their own feature, so we should make CodeGen use them when available. The main change here is that the check used to be based on the triple, but now it's based on CPU features. llvm-svn: 349355
This commit is contained in:
parent
4c73711069
commit
ae3b66b7b0
|
@ -984,7 +984,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
|
||||
// On v8, we have particularly efficient implementations of atomic fences
|
||||
// if they can be combined with nearby atomic loads and stores.
|
||||
if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
|
||||
if (!Subtarget->hasAcquireRelease() ||
|
||||
getTargetMachine().getOptLevel() == 0) {
|
||||
// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
|
||||
InsertFencesForAtomic = true;
|
||||
}
|
||||
|
|
|
@ -4451,13 +4451,13 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
|
|||
def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
|
||||
(t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
|
||||
|
||||
let AddedComplexity = 8 in {
|
||||
def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
|
||||
def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
|
||||
def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
|
||||
def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
|
||||
def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
|
||||
def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
|
||||
let AddedComplexity = 8, Predicates = [IsThumb, HasAcquireRelease, HasV7Clrex] in {
|
||||
def : Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
|
||||
def : Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
|
||||
def : Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
|
||||
def : Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
|
||||
def : Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
|
||||
def : Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,140 @@
|
|||
; RUN: llc -mtriple=thumbv7-none-eabi -mcpu=cortex-m33 -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
|
||||
; CHECK-LABEL: test_atomic_load_add_i8:
|
||||
%old = atomicrmw add i8* @var8, i8 %offset seq_cst
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
|
||||
; CHECK: movt r[[ADDR]], :upper16:var8
|
||||
|
||||
; CHECK: .LBB{{[0-9]+}}_1:
|
||||
; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
|
||||
; r0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
|
||||
; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
|
||||
; CHECK-NEXT: cmp [[STATUS]], #0
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
|
||||
; CHECK: mov r0, r[[OLD]]
|
||||
ret i8 %old
|
||||
}
|
||||
|
||||
define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
|
||||
; CHECK-LABEL: test_atomic_load_add_i16:
|
||||
%old = atomicrmw add i16* @var16, i16 %offset acquire
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
|
||||
; CHECK: movt r[[ADDR]], :upper16:var16
|
||||
|
||||
; CHECK: .LBB{{[0-9]+}}_1:
|
||||
; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
|
||||
; r0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
|
||||
; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
|
||||
; CHECK-NEXT: cmp [[STATUS]], #0
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
|
||||
; CHECK: mov r0, r[[OLD]]
|
||||
ret i16 %old
|
||||
}
|
||||
|
||||
define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
|
||||
; CHECK-LABEL: test_atomic_load_add_i32:
|
||||
%old = atomicrmw add i32* @var32, i32 %offset release
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
|
||||
; CHECK: movt r[[ADDR]], :upper16:var32
|
||||
|
||||
; CHECK: .LBB{{[0-9]+}}_1:
|
||||
; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
|
||||
; r0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
|
||||
; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
|
||||
; CHECK-NEXT: cmp [[STATUS]], #0
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
|
||||
; CHECK: mov r0, r[[OLD]]
|
||||
ret i32 %old
|
||||
}
|
||||
|
||||
define void @test_atomic_load_add_i64(i64 %offset) nounwind {
|
||||
; CHECK-LABEL: test_atomic_load_add_i64:
|
||||
; CHECK: bl __sync_fetch_and_add_8
|
||||
%old = atomicrmw add i64* @var64, i64 %offset monotonic
|
||||
store i64 %old, i64* @var64
|
||||
ret void
|
||||
}
|
||||
|
||||
define i8 @test_load_acquire_i8(i8* %ptr) {
|
||||
; CHECK-LABEL: test_load_acquire_i8:
|
||||
; CHECK: ldab r0, [r0]
|
||||
%val = load atomic i8, i8* %ptr seq_cst, align 1
|
||||
ret i8 %val
|
||||
}
|
||||
|
||||
define i16 @test_load_acquire_i16(i16* %ptr) {
|
||||
; CHECK-LABEL: test_load_acquire_i16:
|
||||
; CHECK: ldah r0, [r0]
|
||||
%val = load atomic i16, i16* %ptr acquire, align 2
|
||||
ret i16 %val
|
||||
}
|
||||
|
||||
define i32 @test_load_acquire_i32(i32* %ptr) {
|
||||
; CHECK-LABEL: test_load_acquire_i32:
|
||||
; CHECK: lda r0, [r0]
|
||||
%val = load atomic i32, i32* %ptr acquire, align 4
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i64 @test_load_acquire_i64(i64* %ptr) {
|
||||
; CHECK-LABEL: test_load_acquire_i64:
|
||||
; CHECK: bl __atomic_load
|
||||
%val = load atomic i64, i64* %ptr acquire, align 4
|
||||
ret i64 %val
|
||||
}
|
||||
|
||||
define void @test_store_release_i8(i8 %val, i8* %ptr) {
|
||||
; CHECK-LABEL: test_store_release_i8:
|
||||
; CHECK: stlb r0, [r1]
|
||||
store atomic i8 %val, i8* %ptr seq_cst, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_store_release_i16(i16 %val, i16* %ptr) {
|
||||
; CHECK-LABEL: test_store_release_i16:
|
||||
; CHECK: stlh r0, [r1]
|
||||
store atomic i16 %val, i16* %ptr release, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_store_release_i32(i32 %val, i32* %ptr) {
|
||||
; CHECK-LABEL: test_store_release_i32:
|
||||
; CHECK: stl r0, [r1]
|
||||
store atomic i32 %val, i32* %ptr seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_store_release_i64(i64 %val, i64* %ptr) {
|
||||
; CHECK-LABEL: test_store_release_i64:
|
||||
; CHECK: bl __atomic_store
|
||||
store atomic i64 %val, i64* %ptr seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@var8 = global i8 0
|
||||
@var16 = global i16 0
|
||||
@var32 = global i32 0
|
||||
@var64 = global i64 0
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=thumbv8m.main-none-eabi | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=thumbv8m.base-none-eabi | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V7
|
||||
; RUN: llc < %s -mtriple=thumbv8m.main-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V8
|
||||
; RUN: llc < %s -mtriple=thumbv8m.base-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V8
|
||||
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK-NOT: ldrexd
|
||||
|
@ -28,7 +28,8 @@ entry:
|
|||
}
|
||||
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: ldr
|
||||
; CHECK-V7: ldr
|
||||
; CHECK-V8: lda
|
||||
define i32 @f3(i32* %p) nounwind readonly {
|
||||
entry:
|
||||
%0 = load atomic i32, i32* %p seq_cst, align 4
|
||||
|
@ -36,7 +37,8 @@ entry:
|
|||
}
|
||||
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: ldrb
|
||||
; CHECK-V7: ldrb
|
||||
; CHECK-V8: ldab
|
||||
define i8 @f4(i8* %p) nounwind readonly {
|
||||
entry:
|
||||
%0 = load atomic i8, i8* %p seq_cst, align 4
|
||||
|
@ -44,7 +46,8 @@ entry:
|
|||
}
|
||||
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: str
|
||||
; CHECK-V7: str
|
||||
; CHECK-V8: stl
|
||||
define void @f5(i32* %p) nounwind readonly {
|
||||
entry:
|
||||
store atomic i32 0, i32* %p seq_cst, align 4
|
||||
|
@ -52,8 +55,10 @@ entry:
|
|||
}
|
||||
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: ldrex
|
||||
; CHECK: strex
|
||||
; CHECK-V7: ldrex
|
||||
; CHECK-V7: strex
|
||||
; CHECK-V8: ldaex
|
||||
; CHECK-V8: stlex
|
||||
define i32 @f6(i32* %p) nounwind readonly {
|
||||
entry:
|
||||
%0 = atomicrmw add i32* %p, i32 1 seq_cst
|
||||
|
|
Loading…
Reference in New Issue