forked from OSchip/llvm-project
DAGCombine: fold "(zext x) == C" into "x == (trunc C)" if the trunc is lossless.
On x86 this allows to fold a load into the cmp, greatly reducing register pressure. movzbl (%rdi), %eax cmpl $47, %eax -> cmpb $47, (%rdi) This shaves 8k off gcc.o on i386. I'll leave applying the patch in README.txt to Chris :) llvm-svn: 130005
This commit is contained in:
parent
ad45d911bb
commit
341c11da3b
|
@ -1916,6 +1916,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
|
|||
// TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
|
||||
}
|
||||
|
||||
// (zext x) == C --> x == (trunc C)
|
||||
if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
|
||||
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
|
||||
unsigned MinBits = N0.getValueSizeInBits();
|
||||
SDValue PreZExt;
|
||||
if (N0->getOpcode() == ISD::ZERO_EXTEND) {
|
||||
// ZExt
|
||||
MinBits = N0->getOperand(0).getValueSizeInBits();
|
||||
PreZExt = N0->getOperand(0);
|
||||
} else if (N0->getOpcode() == ISD::AND) {
|
||||
// DAGCombine turns costly ZExts into ANDs
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
|
||||
if ((C->getAPIntValue()+1).isPowerOf2()) {
|
||||
MinBits = C->getAPIntValue().countTrailingOnes();
|
||||
PreZExt = N0->getOperand(0);
|
||||
}
|
||||
} else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
|
||||
// ZEXTLOAD
|
||||
if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
|
||||
MinBits = LN0->getMemoryVT().getSizeInBits();
|
||||
PreZExt = N0;
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we're not loosing bits from the constant.
|
||||
if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
|
||||
EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
|
||||
if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
|
||||
// Will get folded away.
|
||||
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
|
||||
SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
|
||||
return DAG.getSetCC(dl, VT, Trunc, C, Cond);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the LHS is '(and load, const)', the RHS is 0,
|
||||
// the test is for equality or unsigned, and all 1 bits of the const are
|
||||
// in the same partial word, see if we can shorten the load.
|
||||
|
|
|
@ -2259,34 +2259,6 @@ icmp transform.
|
|||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
These functions:
|
||||
int foo(int *X) {
|
||||
if ((*X & 255) == 47)
|
||||
bar();
|
||||
}
|
||||
int foo2(int X) {
|
||||
if ((X & 255) == 47)
|
||||
bar();
|
||||
}
|
||||
|
||||
codegen to:
|
||||
|
||||
movzbl (%rdi), %eax
|
||||
cmpl $47, %eax
|
||||
jne LBB0_2
|
||||
|
||||
and:
|
||||
movzbl %dil, %eax
|
||||
cmpl $47, %eax
|
||||
jne LBB1_2
|
||||
|
||||
If a dag combine shrunk the compare to a byte compare, then we'd fold the load
|
||||
in the first example, and eliminate the movzbl in the second, saving a register.
|
||||
This can be a target independent dag combine that works on ISD::SETCC, it would
|
||||
catch this before the legalize ops pass.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We should optimize this:
|
||||
|
||||
%tmp = load i16* %arrayidx, align 4, !tbaa !0
|
||||
|
@ -2329,8 +2301,7 @@ Index: InstCombine/InstCombineCompares.cpp
|
|||
{
|
||||
|
||||
|
||||
but we can't do that until the dag combine above is added. Not having this
|
||||
is blocking resolving PR6627.
|
||||
Not having this is blocking resolving PR6627.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||
|
||||
declare void @bar()
|
||||
|
||||
define void @test1(i32* nocapture %X) nounwind {
|
||||
entry:
|
||||
%tmp1 = load i32* %X, align 4
|
||||
%and = and i32 %tmp1, 255
|
||||
%cmp = icmp eq i32 %and, 47
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
; CHECK: test1:
|
||||
; CHECK: cmpb $47, (%rdi)
|
||||
}
|
||||
|
||||
define void @test2(i32 %X) nounwind {
|
||||
entry:
|
||||
%and = and i32 %X, 255
|
||||
%cmp = icmp eq i32 %and, 47
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
; CHECK: test2:
|
||||
; CHECK: cmpb $47, %dil
|
||||
}
|
Loading…
Reference in New Issue