AVX-512: SINT_TO_FP cost model and some bugfixes

Checked some corner cases, for example translation of <8 x i1> to <8 x double> llvm-svn: 221883
2014-11-13 11:46:16 +00:00 · 2014-11-13 11:46:16 +00:00 · d5e95b57e0
parent 3217c6a52c
commit d5e95b57e0
4 changed files with 123 additions and 4 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -1410,6 +1410,10 @@ void X86TargetLowering::resetOperationActions() {
    setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
    setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v8i1,   Custom);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v16i1,  Custom);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v16i8,  Promote);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v16i16, Promote);
    setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
    setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
    setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
@ -13209,10 +13213,18 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
                                           SelectionDAG &DAG) const {
  MVT SrcVT = Op.getOperand(0).getSimpleValueType();
+  SDLoc dl(Op);

-  if (SrcVT.isVector())
+  if (SrcVT.isVector()) {
+    if (SrcVT.getVectorElementType() == MVT::i1) {
+      MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
+      return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
+                         DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT,
+                                     Op.getOperand(0)));
+    }
    return SDValue();
-
+  }
+  
  assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
         "Unknown SINT_TO_FP to lower!");

@ -13225,7 +13237,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
    return Op;
  }

-  SDLoc dl(Op);
  unsigned Size = SrcVT.getSizeInBits()/8;
  MachineFunction &MF = DAG.getMachineFunction();
  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
@ -15455,8 +15466,11 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget
  if (NumElts != 8 && NumElts != 16)
    return SDValue();

-  if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
+  if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
+    if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
+      return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
    return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+  }

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@ -618,6 +618,13 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v16i32, 3 },
    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v16i32, 3 },

+    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i1,  3 },
+    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i8,  2 },
+    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, 2 },
+    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i1,   4 },
+    { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i16,  2 },
+    { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  1 },
  };

  if (ST->hasAVX512()) {
--- a/llvm/test/Analysis/CostModel/X86/sitofp.ll
+++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll
@ -1,4 +1,5 @@
 ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s

 define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
  ; SSE2: sitofpv2i8v2double
@ -279,3 +280,47 @@ define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
  %1 = sitofp <32 x i64> %a to <32 x float>
  ret <32 x float> %1
 }
+
+; AVX512F-LABEL: sitofp_16i8_float
+; AVX512F: cost of 2 {{.*}} sitofp
+define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
+  %1 = sitofp <16 x i8> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
+  ; AVX512F-LABEL: sitofp_16i16_float
+  ; AVX512F: cost of 2 {{.*}} sitofp
+  %1 = sitofp <16 x i16> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+; AVX512F-LABEL: sitofp_8i8_double
+; AVX512F: cost of 2 {{.*}} sitofp
+define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
+  %1 = sitofp <8 x i8> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+; AVX512F-LABEL: sitofp_8i16_double
+; AVX512F: cost of 2 {{.*}} sitofp
+define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
+  %1 = sitofp <8 x i16> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+; AVX512F-LABEL: sitofp_8i1_double
+; AVX512F: cost of 4 {{.*}} sitofp
+define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
+  %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
+  %1 = sitofp <8 x i1> %cmpres to <8 x double>
+  ret <8 x double> %1
+}
+
+; AVX512F-LABEL: sitofp_16i1_float
+; AVX512F: cost of 3 {{.*}} sitofp
+define <16 x float> @sitofp_16i1_float(<16 x float> %a) {
+  %cmpres = fcmp ogt <16 x float> %a, zeroinitializer
+  %1 = sitofp <16 x i1> %cmpres to <16 x float>
+  ret <16 x float> %1
+}
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@ -255,3 +255,56 @@ define double @uitofp03(i32 %a) nounwind {
  %b = uitofp i32 %a to double
  ret double %b
 }
+
+; CHECK-LABEL: @sitofp_16i1_float
+; CHECK: vpbroadcastd
+; CHECK: vcvtdq2ps
+define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
+  %mask = icmp slt <16 x i32> %a, zeroinitializer
+  %1 = sitofp <16 x i1> %mask to <16 x float>
+  ret <16 x float> %1
+}
+
+; CHECK-LABEL: @sitofp_16i8_float
+; CHECK: vpmovsxbd
+; CHECK: vcvtdq2ps
+define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
+  %1 = sitofp <16 x i8> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+; CHECK-LABEL: @sitofp_16i16_float
+; CHECK: vpmovsxwd
+; CHECK: vcvtdq2ps
+define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
+  %1 = sitofp <16 x i16> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+; CHECK-LABEL: @sitofp_8i16_double
+; CHECK: vpmovsxwd
+; CHECK: vcvtdq2pd
+define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
+  %1 = sitofp <8 x i16> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+; CHECK-LABEL: sitofp_8i8_double
+; CHECK: vpmovzxwd
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vcvtdq2pd
+define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
+  %1 = sitofp <8 x i8> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+
+; CHECK-LABEL: @sitofp_8i1_double
+; CHECK: vpbroadcastq
+; CHECK: vcvtdq2pd
+define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
+  %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
+  %1 = sitofp <8 x i1> %cmpres to <8 x double>
+  ret <8 x double> %1
+}