forked from OSchip/llvm-project
[X86] Custom legalize (v2i32 (setcc (v2f32))) so that we don't end up with a (v4i1 (setcc (v4f32)))
Undef VLX, getSetCCResultType returns v2i1/v4i1 for v2f32/v4f32 so default type legalization will end up changing the setcc result type back to vXi1 if it had been extended. The resulting extend gets messed up further by type legalization and is difficult to recombine back to (v4i32 (setcc (v4f32))) after legalization. I went ahead and enabled this for SSE2 and later since its always the result we want and this helps type legalization get there in less steps. llvm-svn: 324822
This commit is contained in:
parent
59aaf1ae33
commit
9121eb575e
|
@ -799,6 +799,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
|
||||
|
||||
// Provide custom widening for v2f32 setcc. This is really for VLX when
|
||||
// setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
|
||||
// type legalization changing the result type to v4i1 during widening.
|
||||
// It works fine for SSE2 and is probably faster so no need to qualify with
|
||||
// VLX support.
|
||||
setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
|
||||
|
||||
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::CTPOP, VT, Custom);
|
||||
|
@ -17928,6 +17935,11 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
|||
assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() &&
|
||||
"Invalid number of packed elements for source and destination!");
|
||||
|
||||
// This is being called by type legalization because v2i32 is marked custom
|
||||
// for result type legalization for v2f32.
|
||||
if (VTOp0 == MVT::v2i32)
|
||||
return SDValue();
|
||||
|
||||
if (VT.is128BitVector() && VTOp0.is256BitVector()) {
|
||||
// On non-AVX512 targets, a vector of MVT::i1 is promoted by the type
|
||||
// legalizer to a wider vector type. In the case of 'vsetcc' nodes, the
|
||||
|
@ -24740,6 +24752,26 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
Results.push_back(Res);
|
||||
return;
|
||||
}
|
||||
case ISD::SETCC: {
|
||||
// Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when
|
||||
// setCC result type is v2i1 because type legalzation will end up with
|
||||
// a v4i1 setcc plus an extend.
|
||||
assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type");
|
||||
if (N->getOperand(0).getValueType() != MVT::v2f32)
|
||||
return;
|
||||
SDValue UNDEF = DAG.getUNDEF(MVT::v2f32);
|
||||
SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
|
||||
N->getOperand(0), UNDEF);
|
||||
SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
|
||||
N->getOperand(1), UNDEF);
|
||||
SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS,
|
||||
N->getOperand(2));
|
||||
if (!ExperimentalVectorWideningLegalization)
|
||||
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
Results.push_back(Res);
|
||||
return;
|
||||
}
|
||||
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
|
||||
case X86ISD::FMINC:
|
||||
case X86ISD::FMIN:
|
||||
|
|
|
@ -1707,19 +1707,14 @@ define <2 x float> @sbto2f32(<2 x float> %a) {
|
|||
; VLDQ-LABEL: sbto2f32:
|
||||
; VLDQ: # %bb.0:
|
||||
; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %k0
|
||||
; VLDQ-NEXT: vpmovm2q %k0, %xmm0
|
||||
; VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
|
||||
; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
|
||||
; VLDQ-NEXT: retq
|
||||
;
|
||||
; VLNODQ-LABEL: sbto2f32:
|
||||
; VLNODQ: # %bb.0:
|
||||
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1
|
||||
; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; VLNODQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
|
||||
; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
|
||||
; VLNODQ-NEXT: retq
|
||||
%cmpres = fcmp ogt <2 x float> %a, zeroinitializer
|
||||
|
|
|
@ -2536,18 +2536,14 @@ define <2 x float> @sbto2f32(<2 x float> %a) {
|
|||
; GENERIC-LABEL: sbto2f32:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
|
||||
; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sbto2f32:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
|
||||
; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
|
||||
; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
|
||||
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
|
||||
; SKX-NEXT: retq # sched: [7:1.00]
|
||||
%cmpres = fcmp ogt <2 x float> %a, zeroinitializer
|
||||
|
|
|
@ -908,8 +908,8 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
|
|||
; AVX512-LABEL: test46:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlq $63, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test46:
|
||||
|
|
Loading…
Reference in New Issue