[X86] Don't allow combineSIntToFP to create v2i32 vectors after type legalization.

If we're after type legalization we should only be trying to turn v2i64 into v2i32. So bitcast to v4i32, shuffle the even elements together. Then use X86ISD::CVTSI2P. The alternative is to leave the v2i64 type alone and let it scalarized. Hopefully keeping it packed is better. Fixes PR42905. llvm-svn: 368091
2019-08-06 21:43:15 +00:00 · 2019-08-06 21:43:15 +00:00 · ecc1e5d476
parent 906e727972
commit ecc1e5d476
2 changed files with 40 additions and 4 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -42848,6 +42848,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
 }

 static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
+                               TargetLowering::DAGCombinerInfo &DCI,
                               const X86Subtarget &Subtarget) {
  // First try to optimize away the conversion entirely when it's
  // conditionally from a constant. Vectors only.
@ -42877,13 +42878,22 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
    unsigned BitWidth = InVT.getScalarSizeInBits();
    unsigned NumSignBits = DAG.ComputeNumSignBits(Op0);
    if (NumSignBits >= (BitWidth - 31)) {
-      EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32);
+      EVT TruncVT = MVT::i32;
      if (InVT.isVector())
        TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT,
                                   InVT.getVectorNumElements());
      SDLoc dl(N);
-      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
-      return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
+      if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) {
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
+        return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
+      }
+      // If we're after legalize and the type is v2i32 we need to shuffle and
+      // use CVTSI2P.
+      assert(InVT == MVT::v2i64 && "Unexpected VT!");
+      SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0);
+      SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast,
+                                          { 0, 2, -1, -1 });
+      return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf);
    }
  }

@ -44481,7 +44491,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
  case ISD::MLOAD:          return combineMaskedLoad(N, DAG, DCI, Subtarget);
  case ISD::STORE:          return combineStore(N, DAG, DCI, Subtarget);
  case ISD::MSTORE:         return combineMaskedStore(N, DAG, DCI, Subtarget);
-  case ISD::SINT_TO_FP:     return combineSIntToFP(N, DAG, Subtarget);
+  case ISD::SINT_TO_FP:     return combineSIntToFP(N, DAG, DCI, Subtarget);
  case ISD::UINT_TO_FP:     return combineUIntToFP(N, DAG, Subtarget);
  case ISD::FADD:
  case ISD::FSUB:           return combineFaddFsub(N, DAG, Subtarget);
--- a/llvm/test/CodeGen/X86/pr42905.ll
+++ b/llvm/test/CodeGen/X86/pr42905.ll
@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define <4 x double> @autogen_SD30452(i1 %L230) {
+; CHECK-LABEL: autogen_SD30452:
+; CHECK:       # %bb.0: # %BB
+; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [151829,151829]
+; CHECK-NEXT:    movq %xmm0, %rax
+; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; CHECK-NEXT:    movq %xmm2, %rax
+; CHECK-NEXT:    xorps %xmm2, %xmm2
+; CHECK-NEXT:    cvtsi2sd %rax, %xmm2
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; CHECK-NEXT:    cvtdq2pd %xmm1, %xmm1
+; CHECK-NEXT:    retq
+BB:
+  %I = insertelement <4 x i64> zeroinitializer, i64 151829, i32 3
+  %Shuff7 = shufflevector <4 x i64> %I, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
+  br label %CF242
+
+CF242:                                            ; preds = %CF242, %BB
+  %FC125 = sitofp <4 x i64> %Shuff7 to <4 x double>
+  ret <4 x double> %FC125
+}