[FastISel][AArch64] Extend 'select' lowering to support also i1 to i16.

Related to rdar://problem/18960150.

llvm-svn: 221846
This commit is contained in:
Juergen Ributzka 2014-11-13 00:36:38 +00:00
parent 1b6c73474d
commit d1a042abd0
3 changed files with 105 additions and 97 deletions

View File

@ -2497,59 +2497,71 @@ bool AArch64FastISel::selectCmp(const Instruction *I) {
}
bool AArch64FastISel::selectSelect(const Instruction *I) {
const SelectInst *SI = cast<SelectInst>(I);
EVT DestEVT = TLI.getValueType(SI->getType(), true);
if (!DestEVT.isSimple())
assert(isa<SelectInst>(I) && "Expected a select instruction.");
MVT VT;
if (!isTypeSupported(I->getType(), VT))
return false;
MVT DestVT = DestEVT.getSimpleVT();
if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
DestVT != MVT::f64)
unsigned Opc;
const TargetRegisterClass *RC;
switch (VT.SimpleTy) {
default:
return false;
unsigned SelectOpc;
const TargetRegisterClass *RC = nullptr;
switch (DestVT.SimpleTy) {
default: return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
SelectOpc = AArch64::CSELWr; RC = &AArch64::GPR32RegClass; break;
Opc = AArch64::CSELWr;
RC = &AArch64::GPR32RegClass;
break;
case MVT::i64:
SelectOpc = AArch64::CSELXr; RC = &AArch64::GPR64RegClass; break;
Opc = AArch64::CSELXr;
RC = &AArch64::GPR64RegClass;
break;
case MVT::f32:
SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
Opc = AArch64::FCSELSrrr;
RC = &AArch64::FPR32RegClass;
break;
case MVT::f64:
SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
Opc = AArch64::FCSELDrrr;
RC = &AArch64::FPR64RegClass;
break;
}
const SelectInst *SI = cast<SelectInst>(I);
const Value *Cond = SI->getCondition();
bool NeedTest = true;
AArch64CC::CondCode CC = AArch64CC::NE;
if (foldXALUIntrinsic(CC, I, Cond))
NeedTest = false;
unsigned CondReg = getRegForValue(Cond);
if (!CondReg)
return false;
bool CondIsKill = hasTrivialKill(Cond);
// Try to pickup the flags, so we don't have to emit another compare.
if (foldXALUIntrinsic(CC, I, Cond)) {
// Fake request the condition to force emission of the XALU intrinsic.
unsigned CondReg = getRegForValue(Cond);
if (!CondReg)
return false;
} else {
unsigned CondReg = getRegForValue(Cond);
if (!CondReg)
return false;
bool CondIsKill = hasTrivialKill(Cond);
if (NeedTest) {
unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
// Emit a TST instruction (ANDS wzr, reg, #imm).
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri),
AArch64::WZR)
.addReg(CondReg, getKillRegState(CondIsKill))
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
}
unsigned TrueReg = getRegForValue(SI->getTrueValue());
bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
unsigned Src1Reg = getRegForValue(SI->getTrueValue());
bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
unsigned FalseReg = getRegForValue(SI->getFalseValue());
bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
unsigned Src2Reg = getRegForValue(SI->getFalseValue());
bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
if (!TrueReg || !FalseReg)
if (!Src1Reg || !Src2Reg)
return false;
unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
FalseReg, FalseIsKill, CC);
unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
Src2IsKill, CC);
updateValueMap(I, ResultReg);
return true;
}

View File

@ -1,63 +0,0 @@
; RUN: llc -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
define i32 @t1(i32 %c) nounwind readnone {
entry:
; CHECK: @t1
; CHECK: and w0, w0, #0x1
; CHECK: cmp w0, #0
; CHECK: csel w0, w{{[0-9]+}}, w{{[0-9]+}}, ne
%0 = icmp sgt i32 %c, 1
%1 = select i1 %0, i32 123, i32 357
ret i32 %1
}
define i64 @t2(i32 %c) nounwind readnone {
entry:
; CHECK: @t2
; CHECK: and w0, w0, #0x1
; CHECK: cmp w0, #0
; CHECK: csel x0, x{{[0-9]+}}, x{{[0-9]+}}, ne
%0 = icmp sgt i32 %c, 1
%1 = select i1 %0, i64 123, i64 357
ret i64 %1
}
define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
entry:
; CHECK: @t3
; CHECK: and w0, w0, #0x1
; CHECK: cmp w0, #0
; CHECK: csel w0, w{{[0-9]+}}, w{{[0-9]+}}, ne
%0 = select i1 %c, i32 %a, i32 %b
ret i32 %0
}
define i64 @t4(i1 %c, i64 %a, i64 %b) nounwind readnone {
entry:
; CHECK: @t4
; CHECK: and w0, w0, #0x1
; CHECK: cmp w0, #0
; CHECK: csel x0, x{{[0-9]+}}, x{{[0-9]+}}, ne
%0 = select i1 %c, i64 %a, i64 %b
ret i64 %0
}
define float @t5(i1 %c, float %a, float %b) nounwind readnone {
entry:
; CHECK: @t5
; CHECK: and w0, w0, #0x1
; CHECK: cmp w0, #0
; CHECK: fcsel s0, s0, s1, ne
%0 = select i1 %c, float %a, float %b
ret float %0
}
define double @t6(i1 %c, double %a, double %b) nounwind readnone {
entry:
; CHECK: @t6
; CHECK: and w0, w0, #0x1
; CHECK: cmp w0, #0
; CHECK: fcsel d0, d0, d1, ne
%0 = select i1 %c, double %a, double %b
ret double %0
}

View File

@ -0,0 +1,59 @@
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
; First test the different supported value types for select.
define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
; CHECK-LABEL: select_i1
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne
%1 = select i1 %c, i1 %a, i1 %b
ret i1 %1
}
define zeroext i8 @select_i8(i1 zeroext %c, i8 zeroext %a, i8 zeroext %b) {
; CHECK-LABEL: select_i8
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne
%1 = select i1 %c, i8 %a, i8 %b
ret i8 %1
}
define zeroext i16 @select_i16(i1 zeroext %c, i16 zeroext %a, i16 zeroext %b) {
; CHECK-LABEL: select_i16
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne
%1 = select i1 %c, i16 %a, i16 %b
ret i16 %1
}
define i32 @select_i32(i1 zeroext %c, i32 %a, i32 %b) {
; CHECK-LABEL: select_i32
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne
%1 = select i1 %c, i32 %a, i32 %b
ret i32 %1
}
define i64 @select_i64(i1 zeroext %c, i64 %a, i64 %b) {
; CHECK-LABEL: select_i64
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: csel {{x[0-9]+}}, x1, x2, ne
%1 = select i1 %c, i64 %a, i64 %b
ret i64 %1
}
define float @select_f32(i1 zeroext %c, float %a, float %b) {
; CHECK-LABEL: select_f32
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne
%1 = select i1 %c, float %a, float %b
ret float %1
}
define double @select_f64(i1 zeroext %c, double %a, double %b) {
; CHECK-LABEL: select_f64
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne
%1 = select i1 %c, double %a, double %b
ret double %1
}