forked from OSchip/llvm-project
[x86] use vector instructions to lower more FP->int->FP casts
This is an enhancement to D77895 to avoid another round-trip from XMM->GPR->XMM. This time we handle the case of starting/ending with an f64 and casting to signed i32 as the intermediate value. It's a bit more involved than I initially assumed because we need to use target-specific opcodes to represent the non-standard cast ops. Differential Revision: https://reviews.llvm.org/D78362
This commit is contained in:
parent
8c68de2d63
commit
cceb630a07
|
@ -19178,17 +19178,25 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
|
|||
MVT IntVT = CastToInt.getSimpleValueType();
|
||||
SDValue X = CastToInt.getOperand(0);
|
||||
// TODO: Allow size-changing from source to dest (double -> i32 -> float)
|
||||
if (X.getSimpleValueType() != VT ||
|
||||
VT.getSizeInBits() != IntVT.getSizeInBits())
|
||||
if (X.getSimpleValueType() != VT)
|
||||
return SDValue();
|
||||
|
||||
// See if we have a 128-bit vector cast op for this type of cast.
|
||||
unsigned NumEltsInXMM = 128 / VT.getScalarSizeInBits();
|
||||
MVT VecFPVT = MVT::getVectorVT(VT, NumEltsInXMM);
|
||||
MVT VecIntVT = MVT::getVectorVT(IntVT, NumEltsInXMM);
|
||||
if (!useVectorCast(CastToFP.getOpcode(), VecIntVT, VecFPVT, Subtarget))
|
||||
// See if we have 128-bit vector cast instructions for this type of cast.
|
||||
// We need cvttps2dq + cvtdq2ps or cvttpd2dq + cvtdq2pd.
|
||||
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
|
||||
IntVT != MVT::i32)
|
||||
return SDValue();
|
||||
|
||||
unsigned NumFPEltsInXMM = 128 / VT.getScalarSizeInBits();
|
||||
unsigned NumIntEltsInXMM = 128 / IntVT.getScalarSizeInBits();
|
||||
MVT VecFPVT = MVT::getVectorVT(VT, NumFPEltsInXMM);
|
||||
MVT VecIntVT = MVT::getVectorVT(IntVT, NumIntEltsInXMM);
|
||||
|
||||
// We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
|
||||
bool NeedX86Opcodes = VT.getSizeInBits() != IntVT.getSizeInBits();
|
||||
unsigned ToIntOpcode = NeedX86Opcodes ? X86ISD::CVTTP2SI : ISD::FP_TO_SINT;
|
||||
unsigned ToFPOpcode = NeedX86Opcodes ? X86ISD::CVTSI2P : ISD::SINT_TO_FP;
|
||||
|
||||
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
|
||||
//
|
||||
// We are not defining the high elements (for example, zero them) because
|
||||
|
@ -19198,8 +19206,8 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
|
|||
SDLoc DL(CastToFP);
|
||||
SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
|
||||
SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecFPVT, X);
|
||||
SDValue VCastToInt = DAG.getNode(ISD::FP_TO_SINT, DL, VecIntVT, VecX);
|
||||
SDValue VCastToFP = DAG.getNode(ISD::SINT_TO_FP, DL, VecFPVT, VCastToInt);
|
||||
SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX);
|
||||
SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecFPVT, VCastToInt);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx);
|
||||
}
|
||||
|
||||
|
|
|
@ -263,15 +263,14 @@ define float @trunc_signed_f32_nsz(float %x) #0 {
|
|||
define double @trunc_signed32_f64_no_fast_math(double %x) {
|
||||
; SSE-LABEL: trunc_signed32_f64_no_fast_math:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttsd2si %xmm0, %eax
|
||||
; SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-NEXT: cvtsi2sd %eax, %xmm0
|
||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: trunc_signed32_f64_no_fast_math:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vcvttsd2si %xmm0, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %eax, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
%i = fptosi double %x to i32
|
||||
%r = sitofp i32 %i to double
|
||||
|
@ -281,9 +280,8 @@ define double @trunc_signed32_f64_no_fast_math(double %x) {
|
|||
define double @trunc_signed32_f64_nsz(double %x) #0 {
|
||||
; SSE2-LABEL: trunc_signed32_f64_nsz:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: cvttsd2si %xmm0, %eax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2sd %eax, %xmm0
|
||||
; SSE2-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc_signed32_f64_nsz:
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
define i32 @isint_return(double %d) nounwind {
|
||||
; CHECK64-LABEL: isint_return:
|
||||
; CHECK64: # %bb.0:
|
||||
; CHECK64-NEXT: cvttsd2si %xmm0, %eax
|
||||
; CHECK64-NEXT: cvtsi2sd %eax, %xmm1
|
||||
; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1
|
||||
; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1
|
||||
; CHECK64-NEXT: movq %xmm1, %rax
|
||||
; CHECK64-NEXT: andl $1, %eax
|
||||
|
@ -18,8 +18,8 @@ define i32 @isint_return(double %d) nounwind {
|
|||
; CHECK32-LABEL: isint_return:
|
||||
; CHECK32: # %bb.0:
|
||||
; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK32-NEXT: cvttsd2si %xmm0, %eax
|
||||
; CHECK32-NEXT: cvtsi2sd %eax, %xmm1
|
||||
; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
|
||||
; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
|
||||
; CHECK32-NEXT: movd %xmm1, %eax
|
||||
; CHECK32-NEXT: andl $1, %eax
|
||||
|
@ -62,8 +62,8 @@ declare void @foo()
|
|||
define void @isint_branch(double %d) nounwind {
|
||||
; CHECK64-LABEL: isint_branch:
|
||||
; CHECK64: # %bb.0:
|
||||
; CHECK64-NEXT: cvttsd2si %xmm0, %eax
|
||||
; CHECK64-NEXT: cvtsi2sd %eax, %xmm1
|
||||
; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1
|
||||
; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; CHECK64-NEXT: ucomisd %xmm1, %xmm0
|
||||
; CHECK64-NEXT: jne .LBB2_2
|
||||
; CHECK64-NEXT: jp .LBB2_2
|
||||
|
@ -77,8 +77,8 @@ define void @isint_branch(double %d) nounwind {
|
|||
; CHECK32-LABEL: isint_branch:
|
||||
; CHECK32: # %bb.0:
|
||||
; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK32-NEXT: cvttsd2si %xmm0, %eax
|
||||
; CHECK32-NEXT: cvtsi2sd %eax, %xmm1
|
||||
; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
|
||||
; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; CHECK32-NEXT: ucomisd %xmm1, %xmm0
|
||||
; CHECK32-NEXT: jne .LBB2_2
|
||||
; CHECK32-NEXT: jp .LBB2_2
|
||||
|
|
|
@ -5,8 +5,8 @@ define zeroext i8 @t(double %x) nounwind readnone {
|
|||
; CHECK-LABEL: t:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: cvttsd2si %xmm0, %eax
|
||||
; CHECK-NEXT: cvtsi2sd %eax, %xmm1
|
||||
; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
|
||||
; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; CHECK-NEXT: cmpeqsd %xmm0, %xmm1
|
||||
; CHECK-NEXT: movd %xmm1, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
|
@ -24,8 +24,8 @@ define zeroext i8 @u(double %x) nounwind readnone {
|
|||
; CHECK-LABEL: u:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: cvttsd2si %xmm0, %eax
|
||||
; CHECK-NEXT: cvtsi2sd %eax, %xmm1
|
||||
; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
|
||||
; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; CHECK-NEXT: cmpneqsd %xmm0, %xmm1
|
||||
; CHECK-NEXT: movd %xmm1, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
|
|
Loading…
Reference in New Issue