forked from OSchip/llvm-project
Teach instcombine 4 new xforms:
(add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
This commit is contained in:
parent
d0e48ea682
commit
7ac943fffd
|
@ -241,6 +241,7 @@ namespace {
|
||||||
Instruction *transformCallThroughTrampoline(CallSite CS);
|
Instruction *transformCallThroughTrampoline(CallSite CS);
|
||||||
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
|
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
|
||||||
bool DoXform = true);
|
bool DoXform = true);
|
||||||
|
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// InsertNewInstBefore - insert an instruction New before instruction Old
|
// InsertNewInstBefore - insert an instruction New before instruction Old
|
||||||
|
@ -2100,7 +2101,48 @@ unsigned InstCombiner::ComputeNumSignBits(Value *V, unsigned Depth) const{
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Instruction::And:
|
case Instruction::And:
|
||||||
|
// Logical binary ops preserve the number of sign bits at the worst.
|
||||||
|
Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
|
||||||
|
if (Tmp != 1) {
|
||||||
|
Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth+1);
|
||||||
|
Tmp = std::min(Tmp, Tmp2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// X & C has sign bits equal to C if C's top bits are zeros.
|
||||||
|
if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
|
||||||
|
// See what bits are known to be zero on the output.
|
||||||
|
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
|
||||||
|
APInt Mask = APInt::getAllOnesValue(TyBits);
|
||||||
|
ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
|
||||||
|
|
||||||
|
KnownZero |= ~C->getValue();
|
||||||
|
// If we know that we have leading zeros, we know we have at least that
|
||||||
|
// many sign bits.
|
||||||
|
Tmp = std::max(Tmp, KnownZero.countLeadingOnes());
|
||||||
|
}
|
||||||
|
return Tmp;
|
||||||
|
|
||||||
case Instruction::Or:
|
case Instruction::Or:
|
||||||
|
// Logical binary ops preserve the number of sign bits at the worst.
|
||||||
|
Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
|
||||||
|
if (Tmp != 1) {
|
||||||
|
Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth+1);
|
||||||
|
Tmp = std::min(Tmp, Tmp2);
|
||||||
|
}
|
||||||
|
// X & C has sign bits equal to C if C's top bits are zeros.
|
||||||
|
if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
|
||||||
|
// See what bits are known to be one on the output.
|
||||||
|
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
|
||||||
|
APInt Mask = APInt::getAllOnesValue(TyBits);
|
||||||
|
ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
|
||||||
|
|
||||||
|
KnownOne |= C->getValue();
|
||||||
|
// If we know that we have leading ones, we know we have at least that
|
||||||
|
// many sign bits.
|
||||||
|
Tmp = std::max(Tmp, KnownOne.countLeadingOnes());
|
||||||
|
}
|
||||||
|
return Tmp;
|
||||||
|
|
||||||
case Instruction::Xor: // NOT is handled here.
|
case Instruction::Xor: // NOT is handled here.
|
||||||
// Logical binary ops preserve the number of sign bits.
|
// Logical binary ops preserve the number of sign bits.
|
||||||
Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
|
Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
|
||||||
|
@ -2109,9 +2151,9 @@ unsigned InstCombiner::ComputeNumSignBits(Value *V, unsigned Depth) const{
|
||||||
return std::min(Tmp, Tmp2);
|
return std::min(Tmp, Tmp2);
|
||||||
|
|
||||||
case Instruction::Select:
|
case Instruction::Select:
|
||||||
Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
|
Tmp = ComputeNumSignBits(U->getOperand(1), Depth+1);
|
||||||
if (Tmp == 1) return 1; // Early out.
|
if (Tmp == 1) return 1; // Early out.
|
||||||
Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth+1);
|
Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth+1);
|
||||||
return std::min(Tmp, Tmp2);
|
return std::min(Tmp, Tmp2);
|
||||||
|
|
||||||
case Instruction::Add:
|
case Instruction::Add:
|
||||||
|
@ -2506,6 +2548,32 @@ static bool CannotBeNegativeZero(const Value *V) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// WillNotOverflowSignedAdd - Return true if we can prove that:
|
||||||
|
/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
|
||||||
|
/// This basically requires proving that the add in the original type would not
|
||||||
|
/// overflow to change the sign bit or have a carry out.
|
||||||
|
bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
|
||||||
|
// There are different heuristics we can use for this. Here are some simple
|
||||||
|
// ones.
|
||||||
|
|
||||||
|
// Add has the property that adding any two 2's complement numbers can only
|
||||||
|
// have one carry bit which can change a sign. As such, if LHS and RHS each
|
||||||
|
// have at least two sign bits, we know that the addition of the two values will
|
||||||
|
// sign extend fine.
|
||||||
|
if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
|
||||||
|
// If one of the operands only has one non-zero bit, and if the other operand
|
||||||
|
// has a known-zero bit in a more significant place than it (not including the
|
||||||
|
// sign bit) the ripple may go up to and fill the zero, but won't change the
|
||||||
|
// sign. For example, (X & ~4) + 1.
|
||||||
|
|
||||||
|
// TODO: Implement.
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
|
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
|
||||||
bool Changed = SimplifyCommutative(I);
|
bool Changed = SimplifyCommutative(I);
|
||||||
|
@ -2781,6 +2849,84 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
|
||||||
if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
|
if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
|
||||||
return ReplaceInstUsesWith(I, LHS);
|
return ReplaceInstUsesWith(I, LHS);
|
||||||
|
|
||||||
|
// Check for (add (sext x), y), see if we can merge this into an
|
||||||
|
// integer add followed by a sext.
|
||||||
|
if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
|
||||||
|
// (add (sext x), cst) --> (sext (add x, cst'))
|
||||||
|
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
|
||||||
|
Constant *CI =
|
||||||
|
ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
|
||||||
|
if (LHSConv->hasOneUse() &&
|
||||||
|
ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
|
||||||
|
WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
|
||||||
|
// Insert the new, smaller add.
|
||||||
|
Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
|
||||||
|
CI, "addconv");
|
||||||
|
InsertNewInstBefore(NewAdd, I);
|
||||||
|
return new SExtInst(NewAdd, I.getType());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// (add (sext x), (sext y)) --> (sext (add int x, y))
|
||||||
|
if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
|
||||||
|
// Only do this if x/y have the same type, if at last one of them has a
|
||||||
|
// single use (so we don't increase the number of sexts), and if the
|
||||||
|
// integer add will not overflow.
|
||||||
|
if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
|
||||||
|
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
|
||||||
|
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
|
||||||
|
RHSConv->getOperand(0))) {
|
||||||
|
// Insert the new integer add.
|
||||||
|
Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
|
||||||
|
RHSConv->getOperand(0),
|
||||||
|
"addconv");
|
||||||
|
InsertNewInstBefore(NewAdd, I);
|
||||||
|
return new SExtInst(NewAdd, I.getType());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for (add double (sitofp x), y), see if we can merge this into an
|
||||||
|
// integer add followed by a promotion.
|
||||||
|
if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
|
||||||
|
// (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
|
||||||
|
// ... if the constant fits in the integer value. This is useful for things
|
||||||
|
// like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
|
||||||
|
// requires a constant pool load, and generally allows the add to be better
|
||||||
|
// instcombined.
|
||||||
|
if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
|
||||||
|
Constant *CI =
|
||||||
|
ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
|
||||||
|
if (LHSConv->hasOneUse() &&
|
||||||
|
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
|
||||||
|
WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
|
||||||
|
// Insert the new integer add.
|
||||||
|
Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
|
||||||
|
CI, "addconv");
|
||||||
|
InsertNewInstBefore(NewAdd, I);
|
||||||
|
return new SIToFPInst(NewAdd, I.getType());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
|
||||||
|
if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
|
||||||
|
// Only do this if x/y have the same type, if at last one of them has a
|
||||||
|
// single use (so we don't increase the number of int->fp conversions),
|
||||||
|
// and if the integer add will not overflow.
|
||||||
|
if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
|
||||||
|
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
|
||||||
|
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
|
||||||
|
RHSConv->getOperand(0))) {
|
||||||
|
// Insert the new integer add.
|
||||||
|
Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
|
||||||
|
RHSConv->getOperand(0),
|
||||||
|
"addconv");
|
||||||
|
InsertNewInstBefore(NewAdd, I);
|
||||||
|
return new SIToFPInst(NewAdd, I.getType());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return Changed ? &I : 0;
|
return Changed ? &I : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {add i32}
|
||||||
|
; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep sext | count 1
|
||||||
|
|
||||||
|
; Should only have one sext and the add should be i32 instead of i64.
|
||||||
|
|
||||||
|
define i64 @test1(i32 %A) {
|
||||||
|
%B = ashr i32 %A, 7 ; <i32> [#uses=1]
|
||||||
|
%C = ashr i32 %A, 9 ; <i32> [#uses=1]
|
||||||
|
%D = sext i32 %B to i64 ; <i64> [#uses=1]
|
||||||
|
%E = sext i32 %C to i64 ; <i64> [#uses=1]
|
||||||
|
%F = add i64 %D, %E ; <i64> [#uses=1]
|
||||||
|
ret i64 %F
|
||||||
|
}
|
||||||
|
|
|
@ -31,3 +31,13 @@ define i32 @test5(i32 %A) {
|
||||||
ret i32 %E
|
ret i32 %E
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define i32 @test6(i32 %A) {
|
||||||
|
%B = and i32 %A, 7 ; <i32> [#uses=1]
|
||||||
|
%C = and i32 %A, 32 ; <i32> [#uses=1]
|
||||||
|
%D = sitofp i32 %B to double ; <double> [#uses=1]
|
||||||
|
%E = sitofp i32 %C to double ; <double> [#uses=1]
|
||||||
|
%F = add double %D, %E ; <double> [#uses=1]
|
||||||
|
%G = fptosi double %F to i32 ; <i32> [#uses=1]
|
||||||
|
ret i32 %G
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue