[ARMv8] Add CodeGen support for VSEL.

This uses the ARMcmov pattern that Tim cleaned up in r188995.

Thanks to Simon Tatham for his floating point help!

llvm-svn: 189024
This commit is contained in:
Joey Gouly 2013-08-22 15:29:11 +00:00
parent ea68a5d636
commit 881eab53be
3 changed files with 414 additions and 9 deletions

View File

@ -3178,6 +3178,61 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SelectTrue, SelectFalse, ISD::SETNE);
}
static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
if (CC == ISD::SETNE)
return ISD::SETEQ;
return ISD::getSetCCSwappedOperands(CC);
}
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
bool &swpCmpOps, bool &swpVselOps) {
// Start by selecting the GE condition code for opcodes that return true for
// 'equality'
if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
CC == ISD::SETULE)
CondCode = ARMCC::GE;
// and GT for opcodes that return false for 'equality'.
else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
CC == ISD::SETULT)
CondCode = ARMCC::GT;
// Since we are constrained to GE/GT, if the opcode contains 'less', we need
// to swap the compare operands.
if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
CC == ISD::SETULT)
swpCmpOps = true;
// Both GT and GE are ordered comparisons, and return false for 'unordered'.
// If we have an unordered opcode, we need to swap the operands to the VSEL
// instruction (effectively negating the condition).
//
// This also has the effect of swapping which one of 'less' or 'greater'
// returns true, so we also swap the compare operands. It also switches
// whether we return true for 'equality', so we compensate by picking the
// opposite condition code to our original choice.
if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
CC == ISD::SETUGT) {
swpCmpOps = !swpCmpOps;
swpVselOps = !swpVselOps;
CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
}
// 'ordered' is 'anything but unordered', so use the VS condition code and
// swap the VSEL operands.
if (CC == ISD::SETO) {
CondCode = ARMCC::VS;
swpVselOps = true;
}
// 'unordered or not equal' is 'anything but equal', so use the EQ condition
// code and swap the VSEL operands.
if (CC == ISD::SETUNE) {
CondCode = ARMCC::EQ;
swpVselOps = true;
}
}
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@ -3188,15 +3243,52 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
// Try to generate VSEL on ARMv8.
// The VSEL instruction can't use all the usual ARM condition
// codes: it only has two bits to select the condition code, so it's
// constrained to use only GE, GT, VS and EQ.
//
// To implement all the various ISD::SETXXX opcodes, we sometimes need to
// swap the operands of the previous compare instruction (effectively
// inverting the compare condition, swapping 'less' and 'greater') and
// sometimes need to swap the operands to the VSEL (which inverts the
// condition in the sense of firing whenever the previous condition didn't)
if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 ||
TrueVal.getValueType() == MVT::f64)) {
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
CC = getInverseCCForVSEL(CC);
std::swap(TrueVal, FalseVal);
}
}
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
// Try to generate VSEL on ARMv8.
if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 ||
TrueVal.getValueType() == MVT::f64)) {
bool swpCmpOps = false;
bool swpVselOps = false;
checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
if (swpCmpOps)
std::swap(LHS, RHS);
if (swpVselOps)
std::swap(TrueVal, FalseVal);
}
}
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

View File

@ -333,24 +333,28 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
let D = VFPNeonA8Domain;
}
multiclass vsel_inst<string op, bits<2> opc> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
multiclass vsel_inst<string op, bits<2> opc, int CC> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
Uses = [CPSR], AddedComplexity = 4 in {
def S : ASbInp<0b11100, opc, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
[]>, Requires<[HasV8FP]>;
[(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>,
Requires<[HasV8FP]>;
def D : ADbInp<0b11100, opc, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
[]>, Requires<[HasV8FP]>;
[(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>,
Requires<[HasV8FP]>;
}
}
defm VSELGT : vsel_inst<"gt", 0b11>;
defm VSELGE : vsel_inst<"ge", 0b10>;
defm VSELEQ : vsel_inst<"eq", 0b00>;
defm VSELVS : vsel_inst<"vs", 0b01>;
// The CC constants here match ARMCC::CondCodes.
defm VSELGT : vsel_inst<"gt", 0b11, 12>;
defm VSELGE : vsel_inst<"ge", 0b10, 10>;
defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
defm VSELVS : vsel_inst<"vs", 0b01, 6>;
multiclass vmaxmin_inst<string op, bit opc> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {

View File

@ -0,0 +1,309 @@
; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+v8fp -float-abi=hard | FileCheck %s
@varfloat = global float 0.0
@vardouble = global double 0.0
define void @test_vsel32sgt(i32 %lhs32, i32 %rhs32, float %a, float %b) {
; CHECK: test_vsel32sgt
%tst1 = icmp sgt i32 %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: cmp r0, r1
; CHECK: vselgt.f32 s0, s0, s1
ret void
}
define void @test_vsel64sgt(i32 %lhs32, i32 %rhs32, double %a, double %b) {
; CHECK: test_vsel64sgt
%tst1 = icmp sgt i32 %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: cmp r0, r1
; CHECK: vselgt.f64 d16, d0, d1
ret void
}
define void @test_vsel32sge(i32 %lhs32, i32 %rhs32, float %a, float %b) {
; CHECK: test_vsel32sge
%tst1 = icmp sge i32 %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: cmp r0, r1
; CHECK: vselge.f32 s0, s0, s1
ret void
}
define void @test_vsel64sge(i32 %lhs32, i32 %rhs32, double %a, double %b) {
; CHECK: test_vsel64sge
%tst1 = icmp sge i32 %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: cmp r0, r1
; CHECK: vselge.f64 d16, d0, d1
ret void
}
define void @test_vsel32eq(i32 %lhs32, i32 %rhs32, float %a, float %b) {
; CHECK: test_vsel32eq
%tst1 = icmp eq i32 %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: cmp r0, r1
; CHECK: vseleq.f32 s0, s0, s1
ret void
}
define void @test_vsel64eq(i32 %lhs32, i32 %rhs32, double %a, double %b) {
; CHECK: test_vsel64eq
%tst1 = icmp eq i32 %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: cmp r0, r1
; CHECK: vseleq.f64 d16, d0, d1
ret void
}
define void @test_vsel32slt(i32 %lhs32, i32 %rhs32, float %a, float %b) {
; CHECK: test_vsel32slt
%tst1 = icmp slt i32 %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: cmp r0, r1
; CHECK: vselgt.f32 s0, s1, s0
ret void
}
define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) {
; CHECK: test_vsel64slt
%tst1 = icmp slt i32 %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: cmp r0, r1
; CHECK: vselgt.f64 d16, d1, d0
ret void
}
define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) {
; CHECK: test_vsel32sle
%tst1 = icmp sle i32 %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: cmp r0, r1
; CHECK: vselge.f32 s0, s1, s0
ret void
}
define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) {
; CHECK: test_vsel64sle
%tst1 = icmp sle i32 %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: cmp r0, r1
; CHECK: vselge.f64 d16, d1, d0
ret void
}
define void @test_vsel32ogt(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32ogt
%tst1 = fcmp ogt float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselgt.f32 s0, s2, s3
ret void
}
define void @test_vsel64ogt(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64ogt
%tst1 = fcmp ogt float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselgt.f64 d16, d1, d2
ret void
}
define void @test_vsel32oge(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32oge
%tst1 = fcmp oge float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselge.f32 s0, s2, s3
ret void
}
define void @test_vsel64oge(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64oge
%tst1 = fcmp oge float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselge.f64 d16, d1, d2
ret void
}
define void @test_vsel32oeq(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32oeq
%tst1 = fcmp oeq float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s0, s1
; CHECK: vseleq.f32 s0, s2, s3
ret void
}
define void @test_vsel64oeq(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64oeq
%tst1 = fcmp oeq float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s0, s1
; CHECK: vseleq.f64 d16, d1, d2
ret void
}
define void @test_vsel32ugt(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32ugt
%tst1 = fcmp ugt float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s1, s0
; CHECK: vselge.f32 s0, s3, s2
ret void
}
define void @test_vsel64ugt(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64ugt
%tst1 = fcmp ugt float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s1, s0
; CHECK: vselge.f64 d16, d2, d1
ret void
}
define void @test_vsel32uge(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32uge
%tst1 = fcmp uge float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s1, s0
; CHECK: vselgt.f32 s0, s3, s2
ret void
}
define void @test_vsel64uge(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64uge
%tst1 = fcmp uge float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s1, s0
; CHECK: vselgt.f64 d16, d2, d1
ret void
}
define void @test_vsel32olt(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32olt
%tst1 = fcmp olt float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s1, s0
; CHECK: vselgt.f32 s0, s2, s3
ret void
}
define void @test_vsel64olt(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64olt
%tst1 = fcmp olt float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s1, s0
; CHECK: vselgt.f64 d16, d1, d2
ret void
}
define void @test_vsel32ult(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32ult
%tst1 = fcmp ult float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselge.f32 s0, s3, s2
ret void
}
define void @test_vsel64ult(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64ult
%tst1 = fcmp ult float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselge.f64 d16, d2, d1
ret void
}
define void @test_vsel32ole(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32ole
%tst1 = fcmp ole float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s1, s0
; CHECK: vselge.f32 s0, s2, s3
ret void
}
define void @test_vsel64ole(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64ole
%tst1 = fcmp ole float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s1, s0
; CHECK: vselge.f64 d16, d1, d2
ret void
}
define void @test_vsel32ule(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32ule
%tst1 = fcmp ule float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselgt.f32 s0, s3, s2
ret void
}
define void @test_vsel64ule(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64ule
%tst1 = fcmp ule float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselgt.f64 d16, d2, d1
ret void
}
define void @test_vsel32ord(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32ord
%tst1 = fcmp ord float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselvs.f32 s0, s3, s2
ret void
}
define void @test_vsel64ord(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64ord
%tst1 = fcmp ord float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselvs.f64 d16, d2, d1
ret void
}
define void @test_vsel32une(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32une
%tst1 = fcmp une float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s0, s1
; CHECK: vseleq.f32 s0, s3, s2
ret void
}
define void @test_vsel64une(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64une
%tst1 = fcmp une float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s0, s1
; CHECK: vseleq.f64 d16, d2, d1
ret void
}
define void @test_vsel32uno(float %lhs32, float %rhs32, float %a, float %b) {
; CHECK: test_vsel32uno
%tst1 = fcmp uno float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselvs.f32 s0, s2, s3
ret void
}
define void @test_vsel64uno(float %lhs32, float %rhs32, double %a, double %b) {
; CHECK: test_vsel64uno
%tst1 = fcmp uno float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
; CHECK: vcmpe.f32 s0, s1
; CHECK: vselvs.f64 d16, d1, d2
ret void
}