AArch64: Fix cmpxchg O0 expansion

- Rewrite livein calculation to use the computeLiveIns() helper
  function. This is slightly less efficient but easier to reason about
  and doesn't unnecessarily add pristine and reserved registers[1]
- Zero the status register at the beginning of the loop to make sure it
  has a defined value.
- Remove kill flags of values that need to stay alive throughout the loop.

[1] An upcoming commit of mine will tighten the MachineVerifier to catch
    these.

llvm-svn: 304048
This commit is contained in:
Matthias Braun 2017-05-26 23:48:59 +00:00
parent 69fd2b7802
commit b4f74224ff
3 changed files with 74 additions and 64 deletions

View File

@ -584,27 +584,21 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
return true; return true;
} }
static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
MBB->addLiveIn(*I);
}
bool AArch64ExpandPseudo::expandCMP_SWAP( bool AArch64ExpandPseudo::expandCMP_SWAP(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
MachineBasicBlock::iterator &NextMBBI) { MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI; MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc(); DebugLoc DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0); const MachineOperand &Dest = MI.getOperand(0);
unsigned StatusReg = MI.getOperand(1).getReg(); unsigned StatusReg = MI.getOperand(1).getReg();
MachineOperand &Addr = MI.getOperand(2); bool StatusDead = MI.getOperand(1).isDead();
MachineOperand &Desired = MI.getOperand(3); // Duplicating undef operands into 2 instructions does not guarantee the same
MachineOperand &New = MI.getOperand(4); // value on both; However undef should be replaced by xzr anyway.
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
LivePhysRegs LiveRegs(TII->getRegisterInfo()); unsigned AddrReg = MI.getOperand(2).getReg();
LiveRegs.addLiveOuts(MBB); unsigned DesiredReg = MI.getOperand(3).getReg();
for (auto I = std::prev(MBB.end()); I != MBBI; --I) unsigned NewReg = MI.getOperand(4).getReg();
LiveRegs.stepBackward(*I);
MachineFunction *MF = MBB.getParent(); MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@ -616,19 +610,18 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
MF->insert(++StoreBB->getIterator(), DoneBB); MF->insert(++StoreBB->getIterator(), DoneBB);
// .Lloadcmp: // .Lloadcmp:
// mov wStatus, 0
// ldaxr xDest, [xAddr] // ldaxr xDest, [xAddr]
// cmp xDest, xDesired // cmp xDest, xDesired
// b.ne .Ldone // b.ne .Ldone
LoadCmpBB->addLiveIn(Addr.getReg()); if (!StatusDead)
LoadCmpBB->addLiveIn(Dest.getReg()); BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
LoadCmpBB->addLiveIn(Desired.getReg()); .addImm(0).addImm(0);
addPostLoopLiveIns(LoadCmpBB, LiveRegs);
BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
.addReg(Addr.getReg()); .addReg(AddrReg);
BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
.addReg(Dest.getReg(), getKillRegState(Dest.isDead())) .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
.add(Desired) .addReg(DesiredReg)
.addImm(ExtendImm); .addImm(ExtendImm);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
.addImm(AArch64CC::NE) .addImm(AArch64CC::NE)
@ -640,25 +633,35 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
// .Lstore: // .Lstore:
// stlxr wStatus, xNew, [xAddr] // stlxr wStatus, xNew, [xAddr]
// cbnz wStatus, .Lloadcmp // cbnz wStatus, .Lloadcmp
StoreBB->addLiveIn(Addr.getReg()); BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
StoreBB->addLiveIn(New.getReg()); .addReg(NewReg)
addPostLoopLiveIns(StoreBB, LiveRegs); .addReg(AddrReg);
BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
.addReg(StatusReg, RegState::Kill) .addReg(StatusReg, getKillRegState(StatusDead))
.addMBB(LoadCmpBB); .addMBB(LoadCmpBB);
StoreBB->addSuccessor(LoadCmpBB); StoreBB->addSuccessor(LoadCmpBB);
StoreBB->addSuccessor(DoneBB); StoreBB->addSuccessor(DoneBB);
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB); DoneBB->transferSuccessors(&MBB);
addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB); MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end(); NextMBBI = MBB.end();
MI.eraseFromParent(); MI.eraseFromParent();
// Recompute livein lists.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
LivePhysRegs LiveRegs;
computeLiveIns(LiveRegs, MRI, *DoneBB);
computeLiveIns(LiveRegs, MRI, *StoreBB);
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
// Do an extra pass around the loop to get loop carried registers right.
StoreBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *StoreBB);
LoadCmpBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
return true; return true;
} }
@ -671,16 +674,15 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
MachineOperand &DestLo = MI.getOperand(0); MachineOperand &DestLo = MI.getOperand(0);
MachineOperand &DestHi = MI.getOperand(1); MachineOperand &DestHi = MI.getOperand(1);
unsigned StatusReg = MI.getOperand(2).getReg(); unsigned StatusReg = MI.getOperand(2).getReg();
MachineOperand &Addr = MI.getOperand(3); bool StatusDead = MI.getOperand(2).isDead();
MachineOperand &DesiredLo = MI.getOperand(4); // Duplicating undef operands into 2 instructions does not guarantee the same
MachineOperand &DesiredHi = MI.getOperand(5); // value on both; However undef should be replaced by xzr anyway.
MachineOperand &NewLo = MI.getOperand(6); assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
MachineOperand &NewHi = MI.getOperand(7); unsigned AddrReg = MI.getOperand(3).getReg();
unsigned DesiredLoReg = MI.getOperand(4).getReg();
LivePhysRegs LiveRegs(TII->getRegisterInfo()); unsigned DesiredHiReg = MI.getOperand(5).getReg();
LiveRegs.addLiveOuts(MBB); unsigned NewLoReg = MI.getOperand(6).getReg();
for (auto I = std::prev(MBB.end()); I != MBBI; --I) unsigned NewHiReg = MI.getOperand(7).getReg();
LiveRegs.stepBackward(*I);
MachineFunction *MF = MBB.getParent(); MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@ -696,20 +698,13 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
// cmp xDestLo, xDesiredLo // cmp xDestLo, xDesiredLo
// sbcs xDestHi, xDesiredHi // sbcs xDestHi, xDesiredHi
// b.ne .Ldone // b.ne .Ldone
LoadCmpBB->addLiveIn(Addr.getReg());
LoadCmpBB->addLiveIn(DestLo.getReg());
LoadCmpBB->addLiveIn(DestHi.getReg());
LoadCmpBB->addLiveIn(DesiredLo.getReg());
LoadCmpBB->addLiveIn(DesiredHi.getReg());
addPostLoopLiveIns(LoadCmpBB, LiveRegs);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX)) BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
.addReg(DestLo.getReg(), RegState::Define) .addReg(DestLo.getReg(), RegState::Define)
.addReg(DestHi.getReg(), RegState::Define) .addReg(DestHi.getReg(), RegState::Define)
.addReg(Addr.getReg()); .addReg(AddrReg);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
.add(DesiredLo) .addReg(DesiredLoReg)
.addImm(0); .addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(AArch64::WZR) .addUse(AArch64::WZR)
@ -717,14 +712,14 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
.addImm(AArch64CC::EQ); .addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
.add(DesiredHi) .addReg(DesiredHiReg)
.addImm(0); .addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(StatusReg, RegState::Kill) .addUse(StatusReg, RegState::Kill)
.addUse(StatusReg, RegState::Kill) .addUse(StatusReg, RegState::Kill)
.addImm(AArch64CC::EQ); .addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW)) BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
.addUse(StatusReg, RegState::Kill) .addUse(StatusReg, getKillRegState(StatusDead))
.addMBB(DoneBB); .addMBB(DoneBB);
LoadCmpBB->addSuccessor(DoneBB); LoadCmpBB->addSuccessor(DoneBB);
LoadCmpBB->addSuccessor(StoreBB); LoadCmpBB->addSuccessor(StoreBB);
@ -732,28 +727,36 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
// .Lstore: // .Lstore:
// stlxp wStatus, xNewLo, xNewHi, [xAddr] // stlxp wStatus, xNewLo, xNewHi, [xAddr]
// cbnz wStatus, .Lloadcmp // cbnz wStatus, .Lloadcmp
StoreBB->addLiveIn(Addr.getReg());
StoreBB->addLiveIn(NewLo.getReg());
StoreBB->addLiveIn(NewHi.getReg());
addPostLoopLiveIns(StoreBB, LiveRegs);
BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg) BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
.add(NewLo) .addReg(NewLoReg)
.add(NewHi) .addReg(NewHiReg)
.add(Addr); .addReg(AddrReg);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
.addReg(StatusReg, RegState::Kill) .addReg(StatusReg, getKillRegState(StatusDead))
.addMBB(LoadCmpBB); .addMBB(LoadCmpBB);
StoreBB->addSuccessor(LoadCmpBB); StoreBB->addSuccessor(LoadCmpBB);
StoreBB->addSuccessor(DoneBB); StoreBB->addSuccessor(DoneBB);
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB); DoneBB->transferSuccessors(&MBB);
addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB); MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end(); NextMBBI = MBB.end();
MI.eraseFromParent(); MI.eraseFromParent();
// Recompute liveness bottom up.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
LivePhysRegs LiveRegs;
computeLiveIns(LiveRegs, MRI, *DoneBB);
computeLiveIns(LiveRegs, MRI, *StoreBB);
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
// Do an extra pass in the loop to get the loop carried dependencies right.
StoreBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *StoreBB);
LoadCmpBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
return true; return true;
} }

View File

@ -3,10 +3,11 @@
define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_8: ; CHECK-LABEL: test_cmpxchg_8:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0] ; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1, uxtb ; CHECK: cmp [[OLD]], w1, uxtb
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
; CHECK: stlxrb [[STATUS:w[3-9]]], w2, [x0] ; CHECK: stlxrb [[STATUS]], w2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]: ; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 ; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
@ -18,6 +19,7 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind { define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_16: ; CHECK-LABEL: test_cmpxchg_16:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0] ; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1, uxth ; CHECK: cmp [[OLD]], w1, uxth
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
@ -33,10 +35,11 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind { define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_32: ; CHECK-LABEL: test_cmpxchg_32:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxr [[OLD:w[0-9]+]], [x0] ; CHECK: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1 ; CHECK: cmp [[OLD]], w1
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
; CHECK: stlxr [[STATUS:w[3-9]]], w2, [x0] ; CHECK: stlxr [[STATUS]], w2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]: ; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 ; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
@ -48,10 +51,11 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind
define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind { define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_64: ; CHECK-LABEL: test_cmpxchg_64:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxr [[OLD:x[0-9]+]], [x0] ; CHECK: ldaxr [[OLD:x[0-9]+]], [x0]
; CHECK: cmp [[OLD]], x1 ; CHECK: cmp [[OLD]], x1
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
; CHECK: stlxr [[STATUS:w[3-9]]], x2, [x0] ; CHECK: stlxr [[STATUS]], x2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]: ; CHECK: [[DONE]]:
; CHECK: subs {{x[0-9]+}}, [[OLD]], x1 ; CHECK: subs {{x[0-9]+}}, [[OLD]], x1

View File

@ -2,11 +2,12 @@
; CHECK-LABEL: cmpxchg_monotonic_32: ; CHECK-LABEL: cmpxchg_monotonic_32:
; CHECK: [[RETRY:.LBB[0-9_]+]]: ; CHECK: [[RETRY:.LBB[0-9_]+]]:
; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] ; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], w1 ; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2: ; CHECK-NEXT: // BB#2:
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, [x0] ; CHECK-NEXT: stlxr [[STATUS]], w2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]: ; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp [[OLD]], w1 ; CHECK-NEXT: cmp [[OLD]], w1
@ -27,11 +28,12 @@ define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 {
; CHECK: // BB#0: ; CHECK: // BB#0:
; CHECK: ldr [[NEW:w[0-9]+]], [x2] ; CHECK: ldr [[NEW:w[0-9]+]], [x2]
; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]: ; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]:
; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] ; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], w1 ; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2: ; CHECK-NEXT: // BB#2:
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0] ; CHECK-NEXT: stlxr [[STATUS]], [[NEW]], [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]: ; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp [[OLD]], w1 ; CHECK-NEXT: cmp [[OLD]], w1
@ -51,11 +53,12 @@ define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0
; CHECK-LABEL: cmpxchg_seq_cst_64: ; CHECK-LABEL: cmpxchg_seq_cst_64:
; CHECK: [[RETRY:.LBB[0-9_]+]]: ; CHECK: [[RETRY:.LBB[0-9_]+]]:
; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0] ; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], x1 ; CHECK-NEXT: cmp [[OLD]], x1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2: ; CHECK-NEXT: // BB#2:
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], x2, [x0] ; CHECK-NEXT: stlxr [[STATUS]], x2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]: ; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp [[OLD]], x1 ; CHECK-NEXT: cmp [[OLD]], x1