forked from OSchip/llvm-project
[Codegen] TargetLowering::SimplifySetCC(): omit urem when possible
Summary: This addresses the regression that is being exposed by D50222 in `test/CodeGen/X86/jump_sign.ll` The missing fold, at least partially, looks trivial: https://rise4fun.com/Alive/Zsln i.e. if we are comparing with zero, and comparing the `urem`-by-non-power-of-two, and the `urem` is of something that may at most have a single bit set (or no bits set at all), the `urem` is not needed. Reviewers: RKSimon, craig.topper, xbolva00, spatel Reviewed By: xbolva00, spatel Subscribers: xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63390 llvm-svn: 364286
This commit is contained in:
parent
0142b9ce31
commit
cdd43eac4f
|
@ -3024,6 +3024,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
|
|||
}
|
||||
}
|
||||
|
||||
// Given:
|
||||
// icmp eq/ne (urem %x, %y), 0
|
||||
// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
|
||||
// icmp eq/ne %x, 0
|
||||
if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
|
||||
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
|
||||
KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
|
||||
KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
|
||||
if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
|
||||
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
|
||||
}
|
||||
|
||||
if (SDValue V =
|
||||
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
|
||||
return V;
|
||||
|
|
|
@ -14,14 +14,7 @@
|
|||
define i1 @p0_scalar_urem_by_const(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: p0_scalar_urem_by_const:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: andl $128, %edi
|
||||
; CHECK-NEXT: movl $2863311531, %eax # imm = 0xAAAAAAAB
|
||||
; CHECK-NEXT: imulq %rdi, %rax
|
||||
; CHECK-NEXT: shrq $34, %rax
|
||||
; CHECK-NEXT: addl %eax, %eax
|
||||
; CHECK-NEXT: leal (%rax,%rax,2), %eax
|
||||
; CHECK-NEXT: cmpl %eax, %edi
|
||||
; CHECK-NEXT: testb $-128, %dil
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = and i32 %x, 128 ; clearly a power-of-two or zero
|
||||
|
@ -33,12 +26,7 @@ define i1 @p0_scalar_urem_by_const(i32 %x, i32 %y) {
|
|||
define i1 @p1_scalar_urem_by_nonconst(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: p1_scalar_urem_by_nonconst:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: andl $128, %eax
|
||||
; CHECK-NEXT: orl $6, %esi
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: divl %esi
|
||||
; CHECK-NEXT: testl %edx, %edx
|
||||
; CHECK-NEXT: testb $-128, %dil
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = and i32 %x, 128 ; clearly a power-of-two or zero
|
||||
|
|
Loading…
Reference in New Issue