[X86] combineCMP - fold cmpEQ/NE(TRUNC(X),0) -> cmpEQ/NE(X,0)

If we are truncating from a i32 source before comparing the result against zero, then see if we can directly compare the source value against zero.

If the upper (truncated) bits are known to be zero then we can compare against that, hopefully increasing the chances of us folding the compare into a EFLAG result of the source's operation.

Fixes PR49028.

Differential Revision: https://reviews.llvm.org/D100491
This commit is contained in:
Simon Pilgrim 2021-04-15 13:55:32 +01:00
parent 22c017f0f9
commit 9d57a77b81
8 changed files with 70 additions and 56 deletions

View File

@ -48821,15 +48821,28 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
}
}
// Look for a truncate with a single use.
if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse())
// Look for a truncate.
if (Op.getOpcode() != ISD::TRUNCATE)
return SDValue();
SDValue Trunc = Op;
Op = Op.getOperand(0);
// Arithmetic op can only have one use.
if (!Op.hasOneUse())
return SDValue();
// See if we can compare with zero against the truncation source,
// which should help using the Z flag from many ops. Only do this for
// i32 truncated op to prevent partial-reg compares of promoted ops.
EVT OpVT = Op.getValueType();
APInt UpperBits =
APInt::getBitsSetFrom(OpVT.getSizeInBits(), VT.getSizeInBits());
if (OpVT == MVT::i32 && DAG.MaskedValueIsZero(Op, UpperBits) &&
onlyZeroFlagUsed(SDValue(N, 0))) {
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, dl, OpVT));
}
// After this the truncate and arithmetic op must have a single use.
if (!Trunc.hasOneUse() || !Op.hasOneUse())
return SDValue();
unsigned NewOpc;
switch (Op.getOpcode()) {

View File

@ -75,7 +75,6 @@ define i16 @and_i16_ri(i16 zeroext %0, i16 zeroext %1) {
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: andl $-17, %eax
; X64-NEXT: testw %ax, %ax
; X64-NEXT: cmovel %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq

View File

@ -11,12 +11,10 @@ define void @pr34127() {
; CHECK-NEXT: movzwl {{.*}}(%rip), %eax
; CHECK-NEXT: movzwl {{.*}}(%rip), %ecx
; CHECK-NEXT: andl %eax, %ecx
; CHECK-NEXT: movl %eax, %edx
; CHECK-NEXT: andl %ecx, %edx
; CHECK-NEXT: movzwl %dx, %edx
; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: testw %cx, %ax
; CHECK-NEXT: andl %eax, %ecx
; CHECK-NEXT: movzwl %cx, %ecx
; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: sete %dl
; CHECK-NEXT: andl %eax, %edx
; CHECK-NEXT: movq %rdx, {{.*}}(%rip)

View File

@ -8,7 +8,6 @@ define zeroext i16 @PR49028(i16 zeroext %0, i8* %1) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl %eax
; X86-NEXT: testw %ax, %ax
; X86-NEXT: sete (%ecx)
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
@ -17,7 +16,6 @@ define zeroext i16 @PR49028(i16 zeroext %0, i8* %1) {
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: testw %ax, %ax
; X64-NEXT: sete (%rsi)
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq

View File

@ -646,12 +646,18 @@ define i1 @or_cmp_ne_i32(i32 %x, i32 %y) {
}
define i1 @or_cmp_eq_i16(i16 zeroext %x, i16 zeroext %y) {
; CHECK-LABEL: or_cmp_eq_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: notl %edi
; CHECK-NEXT: testw %si, %di
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
; NOBMI-LABEL: or_cmp_eq_i16:
; NOBMI: # %bb.0:
; NOBMI-NEXT: notl %edi
; NOBMI-NEXT: testl %esi, %edi
; NOBMI-NEXT: sete %al
; NOBMI-NEXT: retq
;
; BMI-LABEL: or_cmp_eq_i16:
; BMI: # %bb.0:
; BMI-NEXT: andnl %esi, %edi, %eax
; BMI-NEXT: sete %al
; BMI-NEXT: retq
%o = or i16 %x, %y
%c = icmp eq i16 %x, %o
ret i1 %c

View File

@ -887,7 +887,7 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
; SSE-NEXT: cmpneqps %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: testw %ax, %ax
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@ -994,7 +994,7 @@ define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtw %xmm0, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
; SSE-NEXT: testw %ax, %ax
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@ -1002,7 +1002,7 @@ define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: testw %ax, %ax
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: setne %al
; AVX-NEXT: retq
;
@ -1119,7 +1119,7 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
; SSE-NEXT: pcmpeqd %xmm0, %xmm2
; SSE-NEXT: packssdw %xmm3, %xmm2
; SSE-NEXT: pmovmskb %xmm2, %eax
; SSE-NEXT: testw %ax, %ax
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;

View File

@ -470,7 +470,7 @@ define i1 @test_v2i16(<2 x i16> %a0) {
; SSE-NEXT: psrld $16, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: testw %ax, %ax
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@ -479,7 +479,7 @@ define i1 @test_v2i16(<2 x i16> %a0) {
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: testw %ax, %ax
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: sete %al
; AVX-NEXT: retq
%1 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a0)
@ -496,7 +496,7 @@ define i1 @test_v4i16(<4 x i16> %a0) {
; SSE-NEXT: psrld $16, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: testw %ax, %ax
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@ -507,7 +507,7 @@ define i1 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: testw %ax, %ax
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: setne %al
; AVX-NEXT: retq
%1 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a0)
@ -526,7 +526,7 @@ define i1 @test_v8i16(<8 x i16> %a0) {
; SSE-NEXT: psrld $16, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: testw %ax, %ax
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@ -539,7 +539,7 @@ define i1 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: testw %ax, %ax
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: sete %al
; AVX-NEXT: retq
%1 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a0)
@ -559,7 +559,7 @@ define i1 @test_v16i16(<16 x i16> %a0) {
; SSE-NEXT: psrld $16, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: testw %ax, %ax
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@ -574,7 +574,7 @@ define i1 @test_v16i16(<16 x i16> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: testw %ax, %ax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -590,7 +590,7 @@ define i1 @test_v16i16(<16 x i16> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: testw %ax, %ax
; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: setne %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -606,7 +606,7 @@ define i1 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: testw %ax, %ax
; AVX512-NEXT: testl %eax, %eax
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -629,7 +629,7 @@ define i1 @test_v32i16(<32 x i16> %a0) {
; SSE-NEXT: psrld $16, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: testw %ax, %ax
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@ -645,7 +645,7 @@ define i1 @test_v32i16(<32 x i16> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: testw %ax, %ax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -662,7 +662,7 @@ define i1 @test_v32i16(<32 x i16> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: testw %ax, %ax
; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -680,7 +680,7 @@ define i1 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: testw %ax, %ax
; AVX512-NEXT: testl %eax, %eax
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -707,7 +707,7 @@ define i1 @test_v64i16(<64 x i16> %a0) {
; SSE-NEXT: psrld $16, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: testw %ax, %ax
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@ -725,7 +725,7 @@ define i1 @test_v64i16(<64 x i16> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: testw %ax, %ax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -744,7 +744,7 @@ define i1 @test_v64i16(<64 x i16> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: testw %ax, %ax
; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: setne %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -763,7 +763,7 @@ define i1 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: testw %ax, %ax
; AVX512-NEXT: testl %eax, %eax
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -810,7 +810,7 @@ define i1 @test_v4i8(<4 x i8> %a0) {
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: testb %al, %al
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@ -821,7 +821,7 @@ define i1 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: testb %al, %al
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: setne %al
; AVX-NEXT: retq
%1 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a0)
@ -841,7 +841,7 @@ define i1 @test_v8i8(<8 x i8> %a0) {
; SSE-NEXT: psrlw $8, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: testb %al, %al
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@ -854,7 +854,7 @@ define i1 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: testb %al, %al
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: sete %al
; AVX-NEXT: retq
%1 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a0)
@ -876,7 +876,7 @@ define i1 @test_v16i8(<16 x i8> %a0) {
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: testb %al, %al
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@ -891,7 +891,7 @@ define i1 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: testb %al, %al
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: setne %al
; AVX-NEXT: retq
%1 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a0)
@ -914,7 +914,7 @@ define i1 @test_v32i8(<32 x i8> %a0) {
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: testb %al, %al
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@ -931,7 +931,7 @@ define i1 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: testb %al, %al
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -949,7 +949,7 @@ define i1 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: testb %al, %al
; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -967,7 +967,7 @@ define i1 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: testb %al, %al
; AVX512-NEXT: testl %eax, %eax
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -993,7 +993,7 @@ define i1 @test_v64i8(<64 x i8> %a0) {
; SSE-NEXT: psrlw $8, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: testb %al, %al
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@ -1011,7 +1011,7 @@ define i1 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: testb %al, %al
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1030,7 +1030,7 @@ define i1 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: testb %al, %al
; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: setne %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1080,7 +1080,7 @@ define i1 @test_v128i8(<128 x i8> %a0) {
; SSE-NEXT: psrlw $8, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: testb %al, %al
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;

View File

@ -1139,7 +1139,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
; SSE-NEXT: pcmpeqd %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: testw %ax, %ax
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;