[X86] Don't allow combineSIntToFP to create v2i32 vectors after type legalization.

If we're after type legalization we should only be trying to turn
v2i64 into v2i32. So bitcast to v4i32, shuffle the even elements
together. Then use X86ISD::CVTSI2P. The alternative is to leave
the v2i64 type alone and let it scalarized. Hopefully keeping
it packed is better.

Fixes PR42905.

llvm-svn: 368091
This commit is contained in:
Craig Topper 2019-08-06 21:43:15 +00:00
parent 906e727972
commit ecc1e5d476
2 changed files with 40 additions and 4 deletions

View File

@ -42848,6 +42848,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
// First try to optimize away the conversion entirely when it's
// conditionally from a constant. Vectors only.
@ -42877,13 +42878,22 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
unsigned BitWidth = InVT.getScalarSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0);
if (NumSignBits >= (BitWidth - 31)) {
EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32);
EVT TruncVT = MVT::i32;
if (InVT.isVector())
TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT,
InVT.getVectorNumElements());
SDLoc dl(N);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
}
// If we're after legalize and the type is v2i32 we need to shuffle and
// use CVTSI2P.
assert(InVT == MVT::v2i64 && "Unexpected VT!");
SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0);
SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast,
{ 0, 2, -1, -1 });
return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf);
}
}
@ -44481,7 +44491,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, DCI, Subtarget);
case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);

View File

@ -0,0 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
define <4 x double> @autogen_SD30452(i1 %L230) {
; CHECK-LABEL: autogen_SD30452:
; CHECK: # %bb.0: # %BB
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [151829,151829]
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; CHECK-NEXT: movq %xmm2, %rax
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: cvtsi2sd %rax, %xmm2
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK-NEXT: retq
BB:
%I = insertelement <4 x i64> zeroinitializer, i64 151829, i32 3
%Shuff7 = shufflevector <4 x i64> %I, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
br label %CF242
CF242: ; preds = %CF242, %BB
%FC125 = sitofp <4 x i64> %Shuff7 to <4 x double>
ret <4 x double> %FC125
}