From 70450c59a4b3212e72892c6932b7e77eeeaea086 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 3 Apr 2014 13:06:54 +0000 Subject: [PATCH] ARM: skip cmpxchg failure barrier if ordering is monotonic. The terminal barrier of a cmpxchg expansion will be either Acquire or SequentiallyConsistent. In either case it can be skipped if the operation has Monotonic requirements on failure. rdar://problem/15996804 llvm-svn: 205535 --- llvm/lib/Target/ARM/ARMAtomicExpandPass.cpp | 33 +++++++++++------- llvm/test/CodeGen/ARM/atomic-op.ll | 37 +++++++++++++++++++++ llvm/test/CodeGen/ARM/atomic-ops-v8.ll | 7 ++-- 3 files changed, 62 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMAtomicExpandPass.cpp b/llvm/lib/Target/ARM/ARMAtomicExpandPass.cpp index 33cdda5d6e14..18e07837018a 100644 --- a/llvm/lib/Target/ARM/ARMAtomicExpandPass.cpp +++ b/llvm/lib/Target/ARM/ARMAtomicExpandPass.cpp @@ -230,7 +230,8 @@ bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) { } bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { - AtomicOrdering Order = CI->getSuccessOrdering(); + AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); + AtomicOrdering FailureOrder = CI->getFailureOrdering(); Value *Addr = CI->getPointerOperand(); BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); @@ -238,24 +239,27 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord // - // The standard expansion we produce is: + // The full expansion we produce is: // [...] // fence? // cmpxchg.start: // %loaded = @load.linked(%addr) // %should_store = icmp eq %loaded, %desired - // br i1 %should_store, label %cmpxchg.trystore, label %cmpxchg.end + // br i1 %should_store, label %cmpxchg.trystore, + // label %cmpxchg.end/%cmpxchg.barrier // cmpxchg.trystore: // %stored = @store_conditional(%new, %addr) // %try_again = icmp i32 ne %stored, 0 // br i1 %try_again, label %loop, label %cmpxchg.end - // cmpxchg.end: + // cmpxchg.barrier: // fence? + // br label %cmpxchg.end + // cmpxchg.end: // [...] BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); - BasicBlock *TryStoreBB = - BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB); + auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, BarrierBB); + auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); // This grabs the DebugLoc from CI IRBuilder<> Builder(CI); @@ -265,7 +269,7 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -273,19 +277,24 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Value *Loaded = loadLinked(Builder, Addr, MemOpOrder); Value *ShouldStore = Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); - Builder.CreateCondBr(ShouldStore, TryStoreBB, ExitBB); + + // If the the cmpxchg doesn't actually need any ordering when it fails, we can + // jump straight past that fence instruction (if it exists). + BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB; + Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); Builder.SetInsertPoint(TryStoreBB); Value *StoreSuccess = storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder); Value *TryAgain = Builder.CreateICmpNE( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); - Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB); // Finally, make sure later instructions don't get reordered with a fence if // necessary. - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - insertTrailingFence(Builder, Order); + Builder.SetInsertPoint(BarrierBB); + insertTrailingFence(Builder, SuccessOrder); + Builder.CreateBr(ExitBB); CI->replaceAllUsesWith(Loaded); CI->eraseFromParent(); diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll index 9a79c9fd7b1b..ac8e949cf18c 100644 --- a/llvm/test/CodeGen/ARM/atomic-op.ll +++ b/llvm/test/CodeGen/ARM/atomic-op.ll @@ -194,3 +194,40 @@ entry: %0 = atomicrmw add i32* %p, i32 1 monotonic ret i32 %0 } + +define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) { +; CHECK-LABEL: test_cmpxchg_fail_order: + + %oldval = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic +; CHECK: dmb ish +; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]: +; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] +; CHECK: cmp [[OLDVAL]], r1 +; CHECK: bxne lr +; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] +; CHECK: cmp [[SUCCESS]], #0 +; CHECK: bne [[LOOP_BB]] +; CHECK: dmb ish +; CHECK: bx lr + + ret i32 %oldval +} + +define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) { +; CHECK-LABEL: test_cmpxchg_fail_order1: + + %oldval = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire +; CHECK-NOT: dmb ish +; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]: +; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] +; CHECK: cmp [[OLDVAL]], r1 +; CHECK: bne [[END_BB:\.?LBB[0-9]+_[0-9]+]] +; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] +; CHECK: cmp [[SUCCESS]], #0 +; CHECK: bne [[LOOP_BB]] +; CHECK: [[END_BB]]: +; CHECK: dmb ish +; CHECK: bx lr + + ret i32 %oldval +} diff --git a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll index 1ca78bfd1e32..00f9006c1555 100644 --- a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll +++ b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll @@ -1075,9 +1075,10 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw ret i16 %old } -define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { +define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32: %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic + store i32 %old, i32* @var32 ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 @@ -1097,8 +1098,8 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD]] - ret i32 %old +; CHECK: str{{(.w)?}} r[[OLD]], + ret void } define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {