diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index c618c4f60e5c..a2bb2b2922a8 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -4955,14 +4955,16 @@ bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { return false; const TargetRegisterClass *ResRC; - unsigned Opc; + unsigned Opc, CmpOpc; // This only supports i32/i64, because i8/i16 aren't legal, and the generic // extractvalue selection doesn't support that. if (VT == MVT::i32) { Opc = AArch64::CMP_SWAP_32; + CmpOpc = AArch64::SUBSWrs; ResRC = &AArch64::GPR32RegClass; } else if (VT == MVT::i64) { Opc = AArch64::CMP_SWAP_64; + CmpOpc = AArch64::SUBSXrs; ResRC = &AArch64::GPR64RegClass; } else { return false; @@ -4979,14 +4981,27 @@ bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { const unsigned ResultReg1 = createResultReg(ResRC); const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass); + const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass); // FIXME: MachineMemOperand doesn't support cmpxchg yet. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(ResultReg1, RegState::Define) - .addReg(ResultReg2, RegState::Define) - .addReg(AddrReg) - .addReg(DesiredReg) - .addReg(NewReg); + .addDef(ResultReg1) + .addDef(ScratchReg) + .addUse(AddrReg) + .addUse(DesiredReg) + .addUse(NewReg); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) + .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) + .addUse(ResultReg1) + .addUse(DesiredReg) + .addImm(0); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) + .addDef(ResultReg2) + .addUse(AArch64::WZR) + .addUse(AArch64::WZR) + .addImm(AArch64CC::NE); assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); updateValueMap(I, ResultReg1, 2); diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index 59de62ad2877..867074c3c374 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -377,28 +377,28 @@ def : Pat<(int_aarch64_clrex), (CLREX 0xf)>; // significantly more naive than the standard expansion: we conservatively // assume seq_cst, strong cmpxchg and omit clrex on failure. -let Constraints = "@earlyclobber $Rd,@earlyclobber $status", +let Constraints = "@earlyclobber $Rd,@earlyclobber $scratch", mayLoad = 1, mayStore = 1 in { -def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$status), +def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, Sched<[WriteAtomic]>; -def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$status), +def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, Sched<[WriteAtomic]>; -def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$status), +def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, Sched<[WriteAtomic]>; -def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$status), +def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch), (ins GPR64:$addr, GPR64:$desired, GPR64:$new), []>, Sched<[WriteAtomic]>; } -let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $status", +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch", mayLoad = 1, mayStore = 1 in -def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$status), +def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch), (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi, GPR64:$newLo, GPR64:$newHi), []>, Sched<[WriteAtomic]>; diff --git a/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll b/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll index d20e693df83e..aa78210fae74 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll @@ -9,10 +9,11 @@ ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, [x0] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: +; CHECK-NEXT: cmp [[OLD]], w1 +; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] ; CHECK-NEXT: mov w0, [[OLD]] -; CHECK-NEXT: ret define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 { %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new monotonic monotonic %tmp1 = extractvalue { i32, i1 } %tmp0, 0 @@ -24,7 +25,7 @@ define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 { ; CHECK-LABEL: cmpxchg_acq_rel_32_load: ; CHECK: // BB#0: -; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2] +; CHECK: ldr [[NEW:w[0-9]+]], [x2] ; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]: ; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] ; CHECK-NEXT: cmp [[OLD]], w1 @@ -33,10 +34,11 @@ define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 { ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: +; CHECK-NEXT: cmp [[OLD]], w1 +; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] ; CHECK-NEXT: mov w0, [[OLD]] -; CHECK-NEXT: ret define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 { %new = load i32, i32* %pnew %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel acquire @@ -56,10 +58,11 @@ define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], x2, [x0] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: +; CHECK-NEXT: cmp [[OLD]], x1 +; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] ; CHECK-NEXT: mov x0, [[OLD]] -; CHECK-NEXT: ret define i64 @cmpxchg_seq_cst_64(i64* %p, i64 %cmp, i64 %new, i32* %ps) #0 { %tmp0 = cmpxchg i64* %p, i64 %cmp, i64 %new seq_cst seq_cst %tmp1 = extractvalue { i64, i1 } %tmp0, 0