[X86] Teach combineExtSetcc to handle ZERO_EXTEND by widening the setcc and then masking. A later DAG combine will convert to a shift.

This helps to avoid a constant pool load needed to zero extend from the mask.

llvm-svn: 324804
This commit is contained in:
Craig Topper 2018-02-10 08:06:49 +00:00
parent 4f0f426d1f
commit d34af6f636
4 changed files with 47 additions and 79 deletions

View File

@ -36218,10 +36218,6 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDLoc dl(N);
// Only handle sext/aext for now.
if (N->getOpcode() != ISD::SIGN_EXTEND && N->getOpcode() != ISD::ANY_EXTEND)
return SDValue();
// Only do this combine with AVX512 for vector extends.
if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC)
return SDValue();
@ -36249,7 +36245,12 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
if (Size != MatchingVecType.getSizeInBits())
return SDValue();
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
if (N->getOpcode() == ISD::ZERO_EXTEND)
Res = DAG.getZeroExtendInReg(Res, dl, N0.getValueType().getScalarType());
return Res;
}
static SDValue combineSext(SDNode *N, SelectionDAG &DAG,

View File

@ -1694,17 +1694,11 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
}
define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
; KNL-LABEL: zext_16xi1_to_16xi16:
; KNL: # %bb.0:
; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpsrlw $15, %ymm0, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_16xi1_to_16xi16:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
; SKX-NEXT: retq
; ALL-LABEL: zext_16xi1_to_16xi16:
; ALL: # %bb.0:
; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0
; ALL-NEXT: retq
%mask = icmp eq <16 x i16> %x, %y
%1 = zext <16 x i1> %mask to <16 x i16>
ret <16 x i16> %1
@ -1735,46 +1729,28 @@ define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
}
define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
; KNL-LABEL: zext_4xi1_to_4x32:
; KNL: # %bb.0:
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpsrld $31, %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_4xi1_to_4x32:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1
; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
; SKX-NEXT: retq
; ALL-LABEL: zext_4xi1_to_4x32:
; ALL: # %bb.0:
; ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
; ALL-NEXT: vpand %xmm2, %xmm0, %xmm0
; ALL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; ALL-NEXT: vpsrld $31, %xmm0, %xmm0
; ALL-NEXT: retq
%mask = icmp eq <4 x i8> %x, %y
%1 = zext <4 x i1> %mask to <4 x i32>
ret <4 x i32> %1
}
define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
; KNL-LABEL: zext_2xi1_to_2xi64:
; KNL: # %bb.0:
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_2xi1_to_2xi64:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1
; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
; SKX-NEXT: retq
; ALL-LABEL: zext_2xi1_to_2xi64:
; ALL: # %bb.0:
; ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
; ALL-NEXT: vpand %xmm2, %xmm0, %xmm0
; ALL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; ALL-NEXT: vpsrlq $63, %xmm0, %xmm0
; ALL-NEXT: retq
%mask = icmp eq <2 x i8> %x, %y
%1 = zext <2 x i1> %mask to <2 x i64>
ret <2 x i64> %1

View File

@ -4670,14 +4670,14 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
; GENERIC-LABEL: zext_16xi1_to_16xi16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_16xi1_to_16xi16:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp eq <16 x i16> %x, %y
%1 = zext <16 x i1> %mask to <16 x i16>
@ -4708,8 +4708,8 @@ define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4xi1_to_4x32:
@ -4717,8 +4717,8 @@ define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp eq <4 x i8> %x, %y
%1 = zext <4 x i1> %mask to <4 x i32>
@ -4731,8 +4731,8 @@ define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_2xi1_to_2xi64:
@ -4740,8 +4740,8 @@ define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp eq <2 x i8> %x, %y
%1 = zext <2 x i1> %mask to <2 x i64>

View File

@ -883,23 +883,14 @@ define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
}
define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
; AVX512-LABEL: test45:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlq $63, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; SKX-LABEL: test45:
; SKX: ## %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
; SKX-NEXT: retq
; CHECK-LABEL: test45:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpsrlq $63, %xmm0, %xmm0
; CHECK-NEXT: retq
%mask = icmp eq <2 x i16> %x, %y
%1 = zext <2 x i1> %mask to <2 x i64>
ret <2 x i64> %1