AVX-512: SINT_TO_FP cost model and some bugfixes

Checked some corner cases, for example translation
of <8 x i1> to <8 x double>

llvm-svn: 221883
This commit is contained in:
Elena Demikhovsky 2014-11-13 11:46:16 +00:00
parent 3217c6a52c
commit d5e95b57e0
4 changed files with 123 additions and 4 deletions

View File

@ -1410,6 +1410,10 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
@ -13209,10 +13213,18 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
SDLoc dl(Op);
if (SrcVT.isVector())
if (SrcVT.isVector()) {
if (SrcVT.getVectorElementType() == MVT::i1) {
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT,
Op.getOperand(0)));
}
return SDValue();
}
assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
@ -13225,7 +13237,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
return Op;
}
SDLoc dl(Op);
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
@ -15455,8 +15466,11 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget
if (NumElts != 8 && NumElts != 16)
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");

View File

@ -618,6 +618,13 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
};
if (ST->hasAVX512()) {

View File

@ -1,4 +1,5 @@
; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
; SSE2: sitofpv2i8v2double
@ -279,3 +280,47 @@ define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
%1 = sitofp <32 x i64> %a to <32 x float>
ret <32 x float> %1
}
; AVX512F-LABEL: sitofp_16i8_float
; AVX512F: cost of 2 {{.*}} sitofp
define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
%1 = sitofp <16 x i8> %a to <16 x float>
ret <16 x float> %1
}
define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
; AVX512F-LABEL: sitofp_16i16_float
; AVX512F: cost of 2 {{.*}} sitofp
%1 = sitofp <16 x i16> %a to <16 x float>
ret <16 x float> %1
}
; AVX512F-LABEL: sitofp_8i8_double
; AVX512F: cost of 2 {{.*}} sitofp
define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
%1 = sitofp <8 x i8> %a to <8 x double>
ret <8 x double> %1
}
; AVX512F-LABEL: sitofp_8i16_double
; AVX512F: cost of 2 {{.*}} sitofp
define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
%1 = sitofp <8 x i16> %a to <8 x double>
ret <8 x double> %1
}
; AVX512F-LABEL: sitofp_8i1_double
; AVX512F: cost of 4 {{.*}} sitofp
define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
%cmpres = fcmp ogt <8 x double> %a, zeroinitializer
%1 = sitofp <8 x i1> %cmpres to <8 x double>
ret <8 x double> %1
}
; AVX512F-LABEL: sitofp_16i1_float
; AVX512F: cost of 3 {{.*}} sitofp
define <16 x float> @sitofp_16i1_float(<16 x float> %a) {
%cmpres = fcmp ogt <16 x float> %a, zeroinitializer
%1 = sitofp <16 x i1> %cmpres to <16 x float>
ret <16 x float> %1
}

View File

@ -255,3 +255,56 @@ define double @uitofp03(i32 %a) nounwind {
%b = uitofp i32 %a to double
ret double %b
}
; CHECK-LABEL: @sitofp_16i1_float
; CHECK: vpbroadcastd
; CHECK: vcvtdq2ps
define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
%mask = icmp slt <16 x i32> %a, zeroinitializer
%1 = sitofp <16 x i1> %mask to <16 x float>
ret <16 x float> %1
}
; CHECK-LABEL: @sitofp_16i8_float
; CHECK: vpmovsxbd
; CHECK: vcvtdq2ps
define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
%1 = sitofp <16 x i8> %a to <16 x float>
ret <16 x float> %1
}
; CHECK-LABEL: @sitofp_16i16_float
; CHECK: vpmovsxwd
; CHECK: vcvtdq2ps
define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
%1 = sitofp <16 x i16> %a to <16 x float>
ret <16 x float> %1
}
; CHECK-LABEL: @sitofp_8i16_double
; CHECK: vpmovsxwd
; CHECK: vcvtdq2pd
define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
%1 = sitofp <8 x i16> %a to <8 x double>
ret <8 x double> %1
}
; CHECK-LABEL: sitofp_8i8_double
; CHECK: vpmovzxwd
; CHECK: vpslld
; CHECK: vpsrad
; CHECK: vcvtdq2pd
define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
%1 = sitofp <8 x i8> %a to <8 x double>
ret <8 x double> %1
}
; CHECK-LABEL: @sitofp_8i1_double
; CHECK: vpbroadcastq
; CHECK: vcvtdq2pd
define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
%cmpres = fcmp ogt <8 x double> %a, zeroinitializer
%1 = sitofp <8 x i1> %cmpres to <8 x double>
ret <8 x double> %1
}