diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 64577422d151..8dbff9f05650 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36218,10 +36218,6 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDLoc dl(N); - // Only handle sext/aext for now. - if (N->getOpcode() != ISD::SIGN_EXTEND && N->getOpcode() != ISD::ANY_EXTEND) - return SDValue(); - // Only do this combine with AVX512 for vector extends. if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC) return SDValue(); @@ -36249,7 +36245,12 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, if (Size != MatchingVecType.getSizeInBits()) return SDValue(); - return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + + if (N->getOpcode() == ISD::ZERO_EXTEND) + Res = DAG.getZeroExtendInReg(Res, dl, N0.getValueType().getScalarType()); + + return Res; } static SDValue combineSext(SDNode *N, SelectionDAG &DAG, diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index 64db862132df..d7bc88439a94 100644 --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -1694,17 +1694,11 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { } define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { -; KNL-LABEL: zext_16xi1_to_16xi16: -; KNL: # %bb.0: -; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpsrlw $15, %ymm0, %ymm0 -; KNL-NEXT: retq -; -; SKX-LABEL: zext_16xi1_to_16xi16: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 -; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} -; SKX-NEXT: retq +; ALL-LABEL: zext_16xi1_to_16xi16: +; ALL: # %bb.0: +; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0 +; ALL-NEXT: retq %mask = icmp eq <16 x i16> %x, %y %1 = zext <16 x i1> %mask to <16 x i16> ret <16 x i16> %1 @@ -1735,46 +1729,28 @@ define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { } define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { -; KNL-LABEL: zext_4xi1_to_4x32: -; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpsrld $31, %xmm0, %xmm0 -; KNL-NEXT: retq -; -; SKX-LABEL: zext_4xi1_to_4x32: -; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 -; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} -; SKX-NEXT: retq +; ALL-LABEL: zext_4xi1_to_4x32: +; ALL: # %bb.0: +; ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; ALL-NEXT: vpand %xmm2, %xmm0, %xmm0 +; ALL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; ALL-NEXT: vpsrld $31, %xmm0, %xmm0 +; ALL-NEXT: retq %mask = icmp eq <4 x i8> %x, %y %1 = zext <4 x i1> %mask to <4 x i32> ret <4 x i32> %1 } define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { -; KNL-LABEL: zext_2xi1_to_2xi64: -; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0 -; KNL-NEXT: retq -; -; SKX-LABEL: zext_2xi1_to_2xi64: -; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 -; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} -; SKX-NEXT: retq +; ALL-LABEL: zext_2xi1_to_2xi64: +; ALL: # %bb.0: +; ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; ALL-NEXT: vpand %xmm2, %xmm0, %xmm0 +; ALL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; ALL-NEXT: vpsrlq $63, %xmm0, %xmm0 +; ALL-NEXT: retq %mask = icmp eq <2 x i8> %x, %y %1 = zext <2 x i1> %mask to <2 x i64> ret <2 x i64> %1 diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 5cb9dbb5113e..7a2aa56f7cc5 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -4670,14 +4670,14 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { ; GENERIC-LABEL: zext_16xi1_to_16xi16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [4:0.50] +; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16xi1_to_16xi16: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] +; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp eq <16 x i16> %x, %y %1 = zext <16 x i1> %mask to <16 x i16> @@ -4708,8 +4708,8 @@ define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { ; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50] ; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] ; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4xi1_to_4x32: @@ -4717,8 +4717,8 @@ define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50] ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] +; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp eq <4 x i8> %x, %y %1 = zext <4 x i1> %mask to <4 x i32> @@ -4731,8 +4731,8 @@ define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { ; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50] ; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] ; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [4:0.50] +; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2xi1_to_2xi64: @@ -4740,8 +4740,8 @@ define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50] ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] +; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp eq <2 x i8> %x, %y %1 = zext <2 x i1> %mask to <2 x i64> diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll index dd2b8f417ab5..0c5669dec761 100644 --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -883,23 +883,14 @@ define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 { } define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 { -; AVX512-LABEL: test45: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] -; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] -; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlq $63, %xmm0, %xmm0 -; AVX512-NEXT: retq -; -; SKX-LABEL: test45: -; SKX: ## %bb.0: -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] -; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] -; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 -; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} -; SKX-NEXT: retq +; CHECK-LABEL: test45: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] +; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpsrlq $63, %xmm0, %xmm0 +; CHECK-NEXT: retq %mask = icmp eq <2 x i16> %x, %y %1 = zext <2 x i1> %mask to <2 x i64> ret <2 x i64> %1