forked from OSchip/llvm-project
ARM: skip cmpxchg failure barrier if ordering is monotonic.
The terminal barrier of a cmpxchg expansion will be either Acquire or SequentiallyConsistent. In either case it can be skipped if the operation has Monotonic requirements on failure. rdar://problem/15996804 llvm-svn: 205535
This commit is contained in:
parent
425314a65f
commit
70450c59a4
|
@ -230,7 +230,8 @@ bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
|
|||
}
|
||||
|
||||
bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
||||
AtomicOrdering Order = CI->getSuccessOrdering();
|
||||
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
|
||||
AtomicOrdering FailureOrder = CI->getFailureOrdering();
|
||||
Value *Addr = CI->getPointerOperand();
|
||||
BasicBlock *BB = CI->getParent();
|
||||
Function *F = BB->getParent();
|
||||
|
@ -238,24 +239,27 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
|||
|
||||
// Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
|
||||
//
|
||||
// The standard expansion we produce is:
|
||||
// The full expansion we produce is:
|
||||
// [...]
|
||||
// fence?
|
||||
// cmpxchg.start:
|
||||
// %loaded = @load.linked(%addr)
|
||||
// %should_store = icmp eq %loaded, %desired
|
||||
// br i1 %should_store, label %cmpxchg.trystore, label %cmpxchg.end
|
||||
// br i1 %should_store, label %cmpxchg.trystore,
|
||||
// label %cmpxchg.end/%cmpxchg.barrier
|
||||
// cmpxchg.trystore:
|
||||
// %stored = @store_conditional(%new, %addr)
|
||||
// %try_again = icmp i32 ne %stored, 0
|
||||
// br i1 %try_again, label %loop, label %cmpxchg.end
|
||||
// cmpxchg.end:
|
||||
// cmpxchg.barrier:
|
||||
// fence?
|
||||
// br label %cmpxchg.end
|
||||
// cmpxchg.end:
|
||||
// [...]
|
||||
BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
|
||||
BasicBlock *TryStoreBB =
|
||||
BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB);
|
||||
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
|
||||
auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB);
|
||||
auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, BarrierBB);
|
||||
auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
|
||||
|
||||
// This grabs the DebugLoc from CI
|
||||
IRBuilder<> Builder(CI);
|
||||
|
@ -265,7 +269,7 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
|||
// the branch entirely.
|
||||
std::prev(BB->end())->eraseFromParent();
|
||||
Builder.SetInsertPoint(BB);
|
||||
AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
|
||||
AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
|
||||
Builder.CreateBr(LoopBB);
|
||||
|
||||
// Start the main loop block now that we've taken care of the preliminaries.
|
||||
|
@ -273,19 +277,24 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
|||
Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
|
||||
Value *ShouldStore =
|
||||
Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
|
||||
Builder.CreateCondBr(ShouldStore, TryStoreBB, ExitBB);
|
||||
|
||||
// If the the cmpxchg doesn't actually need any ordering when it fails, we can
|
||||
// jump straight past that fence instruction (if it exists).
|
||||
BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB;
|
||||
Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
|
||||
|
||||
Builder.SetInsertPoint(TryStoreBB);
|
||||
Value *StoreSuccess =
|
||||
storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder);
|
||||
Value *TryAgain = Builder.CreateICmpNE(
|
||||
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
|
||||
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
|
||||
Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
|
||||
|
||||
// Finally, make sure later instructions don't get reordered with a fence if
|
||||
// necessary.
|
||||
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
|
||||
insertTrailingFence(Builder, Order);
|
||||
Builder.SetInsertPoint(BarrierBB);
|
||||
insertTrailingFence(Builder, SuccessOrder);
|
||||
Builder.CreateBr(ExitBB);
|
||||
|
||||
CI->replaceAllUsesWith(Loaded);
|
||||
CI->eraseFromParent();
|
||||
|
|
|
@ -194,3 +194,40 @@ entry:
|
|||
%0 = atomicrmw add i32* %p, i32 1 monotonic
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) {
|
||||
; CHECK-LABEL: test_cmpxchg_fail_order:
|
||||
|
||||
%oldval = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
|
||||
; CHECK: dmb ish
|
||||
; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
|
||||
; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
|
||||
; CHECK: cmp [[OLDVAL]], r1
|
||||
; CHECK: bxne lr
|
||||
; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
|
||||
; CHECK: cmp [[SUCCESS]], #0
|
||||
; CHECK: bne [[LOOP_BB]]
|
||||
; CHECK: dmb ish
|
||||
; CHECK: bx lr
|
||||
|
||||
ret i32 %oldval
|
||||
}
|
||||
|
||||
define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
|
||||
; CHECK-LABEL: test_cmpxchg_fail_order1:
|
||||
|
||||
%oldval = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire
|
||||
; CHECK-NOT: dmb ish
|
||||
; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
|
||||
; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
|
||||
; CHECK: cmp [[OLDVAL]], r1
|
||||
; CHECK: bne [[END_BB:\.?LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
|
||||
; CHECK: cmp [[SUCCESS]], #0
|
||||
; CHECK: bne [[LOOP_BB]]
|
||||
; CHECK: [[END_BB]]:
|
||||
; CHECK: dmb ish
|
||||
; CHECK: bx lr
|
||||
|
||||
ret i32 %oldval
|
||||
}
|
||||
|
|
|
@ -1075,9 +1075,10 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw
|
|||
ret i16 %old
|
||||
}
|
||||
|
||||
define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
|
||||
define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
|
||||
; CHECK-LABEL: test_atomic_cmpxchg_i32:
|
||||
%old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
|
||||
store i32 %old, i32* @var32
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
|
||||
|
@ -1097,8 +1098,8 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
|
|||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
|
||||
; CHECK: mov r0, r[[OLD]]
|
||||
ret i32 %old
|
||||
; CHECK: str{{(.w)?}} r[[OLD]],
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
|
||||
|
|
Loading…
Reference in New Issue