[AArch64][FastISel] Select -O0 legal cmpxchg.

At -O0, cmpxchg survives AtomicExpand: it's mostly straightforward
to select it in fast-isel, and let the pseudo be expanded later.

extractvalues on the result are the tricky part: the generic logic
only works for legal types (and it would be painful to make it
support illegal types), so we can only support i32/i64 cmpxchg.

llvm-svn: 276183
This commit is contained in:
Ahmed Bougacha 2016-07-20 21:12:32 +00:00
parent b0674d1143
commit a0cdd79070
3 changed files with 128 additions and 1 deletions

View File

@ -134,6 +134,7 @@ private:
bool selectFRem(const Instruction *I);
bool selectSDiv(const Instruction *I);
bool selectGetElementPtr(const Instruction *I);
bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
@ -4940,6 +4941,58 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
return true;
}
bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
assert(TM.getOptLevel() == CodeGenOpt::None &&
"cmpxchg survived AtomicExpand at optlevel > -O0");
auto *RetPairTy = cast<StructType>(I->getType());
Type *RetTy = RetPairTy->getTypeAtIndex(0U);
assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
"cmpxchg has a non-i1 status result");
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
const TargetRegisterClass *ResRC;
unsigned Opc;
// This only supports i32/i64, because i8/i16 aren't legal, and the generic
// extractvalue selection doesn't support that.
if (VT == MVT::i32) {
Opc = AArch64::CMP_SWAP_32;
ResRC = &AArch64::GPR32RegClass;
} else if (VT == MVT::i64) {
Opc = AArch64::CMP_SWAP_64;
ResRC = &AArch64::GPR64RegClass;
} else {
return false;
}
const MCInstrDesc &II = TII.get(Opc);
const unsigned AddrReg = constrainOperandRegClass(
II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
const unsigned DesiredReg = constrainOperandRegClass(
II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
const unsigned NewReg = constrainOperandRegClass(
II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
const unsigned ResultReg1 = createResultReg(ResRC);
const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
// FIXME: MachineMemOperand doesn't support cmpxchg yet.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(ResultReg1, RegState::Define)
.addReg(ResultReg2, RegState::Define)
.addReg(AddrReg)
.addReg(DesiredReg)
.addReg(NewReg);
assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
updateValueMap(I, ResultReg1, 2);
return true;
}
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
@ -5013,6 +5066,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
return selectFRem(I);
case Instruction::GetElementPtr:
return selectGetElementPtr(I);
case Instruction::AtomicCmpXchg:
return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
}
// fall-back to target-independent instruction selection.

View File

@ -1,4 +1,4 @@
; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 %s -o - | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 %s -o - | FileCheck %s
define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_8:

View File

@ -0,0 +1,72 @@
; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: cmpxchg_monotonic_32:
; CHECK: [[RETRY:.LBB[0-9_]+]]:
; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2:
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
; CHECK-NEXT: str [[STATUS32]], [x3]
; CHECK-NEXT: mov w0, [[OLD]]
; CHECK-NEXT: ret
define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 {
%tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new monotonic monotonic
%tmp1 = extractvalue { i32, i1 } %tmp0, 0
%tmp2 = extractvalue { i32, i1 } %tmp0, 1
%tmp3 = zext i1 %tmp2 to i32
store i32 %tmp3, i32* %ps
ret i32 %tmp1
}
; CHECK-LABEL: cmpxchg_acq_rel_32_load:
; CHECK: // BB#0:
; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2]
; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]:
; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2:
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
; CHECK-NEXT: str [[STATUS32]], [x3]
; CHECK-NEXT: mov w0, [[OLD]]
; CHECK-NEXT: ret
define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 {
%new = load i32, i32* %pnew
%tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel acquire
%tmp1 = extractvalue { i32, i1 } %tmp0, 0
%tmp2 = extractvalue { i32, i1 } %tmp0, 1
%tmp3 = zext i1 %tmp2 to i32
store i32 %tmp3, i32* %ps
ret i32 %tmp1
}
; CHECK-LABEL: cmpxchg_seq_cst_64:
; CHECK: [[RETRY:.LBB[0-9_]+]]:
; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], x1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2:
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], x2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
; CHECK-NEXT: str [[STATUS32]], [x3]
; CHECK-NEXT: mov x0, [[OLD]]
; CHECK-NEXT: ret
define i64 @cmpxchg_seq_cst_64(i64* %p, i64 %cmp, i64 %new, i32* %ps) #0 {
%tmp0 = cmpxchg i64* %p, i64 %cmp, i64 %new seq_cst seq_cst
%tmp1 = extractvalue { i64, i1 } %tmp0, 0
%tmp2 = extractvalue { i64, i1 } %tmp0, 1
%tmp3 = zext i1 %tmp2 to i32
store i32 %tmp3, i32* %ps
ret i64 %tmp1
}
attributes #0 = { nounwind }