forked from OSchip/llvm-project
[X86] Don't scalarize v2f32->v2i64 strict_fp_to_sint/uint with avx512dq and not avx512vl.
We can pad the v2f32 with 0s up to v8f32 and use a v8f32->v8i64 operation. This is what we end up with on non-strict nodes except we don't pad with 0s since we don't care about exceptions.
This commit is contained in:
parent
3badd17b69
commit
a135c4a2cf
|
@ -1719,6 +1719,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
|
||||
Subtarget.hasVLX() ? Legal : Custom);
|
||||
|
||||
if (Subtarget.hasDQI()) {
|
||||
// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
|
||||
// v2f32 UINT_TO_FP is already custom under SSE2.
|
||||
assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
|
||||
isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
|
||||
"Unexpected operation action!");
|
||||
// v2i64 FP_TO_S/UINT(v2f32) custom conversion.
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
|
||||
}
|
||||
|
||||
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
|
||||
setOperationAction(ISD::SMAX, VT, Legal);
|
||||
setOperationAction(ISD::UMAX, VT, Legal);
|
||||
|
@ -1838,19 +1851,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
|
||||
|
||||
if (Subtarget.hasDQI()) {
|
||||
// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
|
||||
// v2f32 UINT_TO_FP is already custom under SSE2.
|
||||
assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
|
||||
isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
|
||||
"Unexpected operation action!");
|
||||
// v2i64 FP_TO_S/UINT(v2f32) custom conversion.
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
|
||||
}
|
||||
|
||||
if (Subtarget.hasBWI()) {
|
||||
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
|
||||
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
|
||||
|
@ -20717,6 +20717,25 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
}
|
||||
|
||||
if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {
|
||||
if (!Subtarget.hasVLX()) {
|
||||
// Non-strict nodes without VLX can we widened to v4f32->v4i64 by type
|
||||
// legalizer and then widened again by vector op legalization.
|
||||
if (!IsStrict)
|
||||
return SDValue();
|
||||
|
||||
SDValue Zero = DAG.getConstantFP(0.0, dl, MVT::v2f32);
|
||||
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32,
|
||||
{Src, Zero, Zero, Zero});
|
||||
Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
|
||||
{Op->getOperand(0), Tmp});
|
||||
SDValue Chain = Tmp.getValue(1);
|
||||
Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Tmp, Chain}, dl);
|
||||
return Tmp;
|
||||
}
|
||||
|
||||
assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL");
|
||||
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
|
||||
DAG.getUNDEF(MVT::v2f32));
|
||||
|
|
|
@ -685,26 +685,14 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
|
|||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-64-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
|
||||
; AVX512DQ-32: # %bb.0:
|
||||
; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[1],zero,zero,zero
|
||||
; AVX512DQ-32-NEXT: vcvttps2qq %ymm1, %zmm1
|
||||
; AVX512DQ-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX512DQ-32-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
|
||||
; AVX512DQ-32-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512DQ-32-NEXT: vzeroupper
|
||||
; AVX512DQ-32-NEXT: retl
|
||||
;
|
||||
; AVX512DQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
|
||||
; AVX512DQ-64: # %bb.0:
|
||||
; AVX512DQ-64-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX512DQ-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512DQ-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512DQ-64-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX512DQ-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512DQ-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512DQ-64-NEXT: retq
|
||||
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
|
@ -1016,26 +1004,14 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
|||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-64-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
|
||||
; AVX512DQ-32: # %bb.0:
|
||||
; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[1],zero,zero,zero
|
||||
; AVX512DQ-32-NEXT: vcvttps2uqq %ymm1, %zmm1
|
||||
; AVX512DQ-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX512DQ-32-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
|
||||
; AVX512DQ-32-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512DQ-32-NEXT: vzeroupper
|
||||
; AVX512DQ-32-NEXT: retl
|
||||
;
|
||||
; AVX512DQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
|
||||
; AVX512DQ-64: # %bb.0:
|
||||
; AVX512DQ-64-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512DQ-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512DQ-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512DQ-64-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512DQ-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512DQ-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512DQ-64-NEXT: retq
|
||||
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
|
|
|
@ -3976,14 +3976,30 @@ define <2 x i64> @constrained_vector_fptosi_v2i64_v2f32() #0 {
|
|||
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fptosi_v2i64_v2f32:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm1
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f32:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm0
|
||||
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm1
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f32:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f32:
|
||||
; AVX512DQ: # %bb.0: # %entry
|
||||
; AVX512DQ-NEXT: vcvttps2qq {{.*}}(%rip), %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
entry:
|
||||
%result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(
|
||||
<2 x float><float 42.0, float 43.0>,
|
||||
|
@ -4588,14 +4604,21 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
|
|||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constrained_vector_fptoui_v2i64_v2f32:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f32:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f32:
|
||||
; AVX512DQ: # %bb.0: # %entry
|
||||
; AVX512DQ-NEXT: vcvttps2uqq {{.*}}(%rip), %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
entry:
|
||||
%result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(
|
||||
<2 x float><float 42.0, float 43.0>,
|
||||
|
|
Loading…
Reference in New Issue