llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

5489 lines
215 KiB
C++
Raw Normal View History

//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
2001-12-15 00:52:21 +08:00
//
// InstructionCombining - Combine instructions to form fewer, simple
// instructions. This pass does not modify the CFG. This pass is where
// algebraic simplification happens.
2001-12-15 00:52:21 +08:00
//
// This pass combines things like:
// %Y = add i32 %X, 1
// %Z = add i32 %Y, 1
2001-12-15 00:52:21 +08:00
// into:
// %Z = add i32 %X, 2
2001-12-15 00:52:21 +08:00
//
// This is a simple worklist driven algorithm.
//
2003-09-10 13:29:43 +08:00
// This pass guarantees that the following canonicalizations are performed on
2003-07-24 05:41:57 +08:00
// the program:
// 1. If a binary operator has a constant operand, it is moved to the RHS
// 2. Bitwise operators with constant operands are always grouped so that
// shifts are performed first, then or's, then and's, then xor's.
// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
// 4. All cmp instructions on boolean values are replaced with logical ops
// 5. add X, X is represented as (X*2) => (X << 1)
// 6. Multiplies with a power-of-two constant argument are transformed into
// shifts.
// ... etc.
2003-07-24 05:41:57 +08:00
//
2001-12-15 00:52:21 +08:00
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "instcombine"
#include "llvm/Transforms/Scalar.h"
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Operator.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/PatternMatch.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
#include <climits>
using namespace llvm;
using namespace llvm::PatternMatch;
STATISTIC(NumCombined , "Number of insts combined");
STATISTIC(NumConstProp, "Number of constant folds");
STATISTIC(NumDeadInst , "Number of dead inst eliminated");
STATISTIC(NumSunkInst , "Number of instructions sunk");
char InstCombiner::ID = 0;
static RegisterPass<InstCombiner>
X("instcombine", "Combine redundant instructions");
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(LCSSAID);
AU.setPreservesCFG();
}
// isOnlyUse - Return true if this instruction will be deleted if we stop using
// it.
static bool isOnlyUse(Value *V) {
return V->hasOneUse() || isa<Constant>(V);
}
// getPromotedType - Return the specified type promoted as it would be to pass
// though a va_arg area...
static const Type *getPromotedType(const Type *Ty) {
if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
if (ITy->getBitWidth() < 32)
return Type::getInt32Ty(Ty->getContext());
}
return Ty;
}
/// ShouldChangeType - Return true if it is desirable to convert a computation
/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
/// type for example, or from a smaller to a larger illegal type.
bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
assert(isa<IntegerType>(From) && isa<IntegerType>(To));
// If we don't have TD, we don't know if the source/dest are legal.
if (!TD) return false;
unsigned FromWidth = From->getPrimitiveSizeInBits();
unsigned ToWidth = To->getPrimitiveSizeInBits();
bool FromLegal = TD->isLegalInteger(FromWidth);
bool ToLegal = TD->isLegalInteger(ToWidth);
// If this is a legal integer from type, and the result would be an illegal
// type, don't do the transformation.
if (FromLegal && !ToLegal)
return false;
// Otherwise, if both are illegal, do not increase the size of the result. We
// do allow things like i160 -> i64, but not i64 -> i160.
if (!FromLegal && !ToLegal && ToWidth > FromWidth)
return false;
return true;
}
/// getBitCastOperand - If the specified operand is a CastInst, a constant
/// expression bitcast, or a GetElementPtrInst with all zero indices, return the
/// operand value, otherwise return null.
static Value *getBitCastOperand(Value *V) {
if (Operator *O = dyn_cast<Operator>(V)) {
if (O->getOpcode() == Instruction::BitCast)
return O->getOperand(0);
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
if (GEP->hasAllZeroIndices())
return GEP->getPointerOperand();
}
return 0;
}
// SimplifyCommutative - This performs a few simplifications for commutative
// operators:
//
// 1. Order operands such that they are listed from right (least complex) to
// left (most complex). This puts constants before unary operators before
// binary operators.
//
// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
//
bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
bool Changed = false;
if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
Changed = !I.swapOperands();
if (!I.isAssociative()) return Changed;
Instruction::BinaryOps Opcode = I.getOpcode();
if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
if (isa<Constant>(I.getOperand(1))) {
Constant *Folded = ConstantExpr::get(I.getOpcode(),
cast<Constant>(I.getOperand(1)),
cast<Constant>(Op->getOperand(1)));
I.setOperand(0, Op->getOperand(0));
I.setOperand(1, Folded);
return true;
} else if (BinaryOperator *Op1=dyn_cast<BinaryOperator>(I.getOperand(1)))
if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
isOnlyUse(Op) && isOnlyUse(Op1)) {
Constant *C1 = cast<Constant>(Op->getOperand(1));
Constant *C2 = cast<Constant>(Op1->getOperand(1));
// Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
Op1->getOperand(0),
Op1->getName(), &I);
Worklist.Add(New);
I.setOperand(0, New);
I.setOperand(1, Folded);
return true;
}
}
return Changed;
}
2001-12-15 00:52:21 +08:00
// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
// if the LHS is a constant zero (which is the 'negate' form).
2002-05-07 00:49:18 +08:00
//
Value *InstCombiner::dyn_castNegVal(Value *V) const {
if (BinaryOperator::isNeg(V))
return BinaryOperator::getNegArgument(V);
// Constants can be considered to be negated values if they can be folded.
if (ConstantInt *C = dyn_cast<ConstantInt>(V))
return ConstantExpr::getNeg(C);
if (ConstantVector *C = dyn_cast<ConstantVector>(V))
if (C->getType()->getElementType()->isInteger())
return ConstantExpr::getNeg(C);
return 0;
}
// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
// instruction if the LHS is a constant negative zero (which is the 'negate'
// form).
//
Value *InstCombiner::dyn_castFNegVal(Value *V) const {
if (BinaryOperator::isFNeg(V))
return BinaryOperator::getFNegArgument(V);
// Constants can be considered to be negated values if they can be folded.
if (ConstantFP *C = dyn_cast<ConstantFP>(V))
return ConstantExpr::getFNeg(C);
if (ConstantVector *C = dyn_cast<ConstantVector>(V))
if (C->getType()->getElementType()->isFloatingPoint())
return ConstantExpr::getFNeg(C);
return 0;
}
/// isFreeToInvert - Return true if the specified value is free to invert (apply
/// ~ to). This happens in cases where the ~ can be eliminated.
static inline bool isFreeToInvert(Value *V) {
// ~(~(X)) -> X.
if (BinaryOperator::isNot(V))
return true;
// Constants can be considered to be not'ed values.
if (isa<ConstantInt>(V))
return true;
// Compares can be inverted if they have a single use.
if (CmpInst *CI = dyn_cast<CmpInst>(V))
return CI->hasOneUse();
return false;
}
static inline Value *dyn_castNotVal(Value *V) {
// If this is not(not(x)) don't return that this is a not: we want the two
// not's to be folded first.
if (BinaryOperator::isNot(V)) {
Value *Operand = BinaryOperator::getNotArgument(V);
if (!isFreeToInvert(Operand))
return Operand;
}
// Constants can be considered to be not'ed values...
if (ConstantInt *C = dyn_cast<ConstantInt>(V))
return ConstantInt::get(C->getType(), ~C->getValue());
return 0;
2002-05-07 00:49:18 +08:00
}
// dyn_castFoldableMul - If this value is a multiply that can be folded into
// other computations (because it has a constant operand), return the
// non-constant operand of the multiply, and set CST to point to the multiplier.
// Otherwise, return null.
//
static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
if (V->hasOneUse() && V->getType()->isInteger())
if (Instruction *I = dyn_cast<Instruction>(V)) {
if (I->getOpcode() == Instruction::Mul)
if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
return I->getOperand(0);
if (I->getOpcode() == Instruction::Shl)
if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
// The multiplier is really 1 << CST.
uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
uint32_t CSTVal = CST->getLimitedValue(BitWidth);
CST = ConstantInt::get(V->getType()->getContext(),
APInt(BitWidth, 1).shl(CSTVal));
return I->getOperand(0);
}
}
return 0;
}
/// AddOne - Add one to a ConstantInt.
static Constant *AddOne(Constant *C) {
return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
}
/// SubOne - Subtract one from a ConstantInt.
static Constant *SubOne(ConstantInt *C) {
return ConstantInt::get(C->getContext(), C->getValue()-1);
}
static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
InstCombiner *IC) {
if (CastInst *CI = dyn_cast<CastInst>(&I))
return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
// Figure out if the constant is the left or the right argument.
bool ConstIsRHS = isa<Constant>(I.getOperand(1));
Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
if (Constant *SOC = dyn_cast<Constant>(SO)) {
if (ConstIsRHS)
return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
}
Value *Op0 = SO, *Op1 = ConstOperand;
if (!ConstIsRHS)
std::swap(Op0, Op1);
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
SO->getName()+".op");
if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
SO->getName()+".cmp");
if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
SO->getName()+".cmp");
llvm_unreachable("Unknown binary instruction type!");
}
// FoldOpIntoSelect - Given an instruction with a select as one operand and a
// constant as the other operand, try to fold the binary operator into the
// select arguments. This also works for Cast instructions, which obviously do
// not have a second operand.
Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
// Don't modify shared select instructions
if (!SI->hasOneUse()) return 0;
Value *TV = SI->getOperand(1);
Value *FV = SI->getOperand(2);
if (isa<Constant>(TV) || isa<Constant>(FV)) {
// Bool selects with constant operands can be folded to logical ops.
if (SI->getType() == Type::getInt1Ty(SI->getContext())) return 0;
Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
return SelectInst::Create(SI->getCondition(), SelectTrueVal,
SelectFalseVal);
}
return 0;
}
/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
/// has a PHI node as operand #0, see if we can fold the instruction into the
/// PHI (which is only possible if all operands to the PHI are constants).
///
/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
/// that would normally be unprofitable because they strongly encourage jump
/// threading.
Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
bool AllowAggressive) {
AllowAggressive = false;
PHINode *PN = cast<PHINode>(I.getOperand(0));
unsigned NumPHIValues = PN->getNumIncomingValues();
if (NumPHIValues == 0 ||
// We normally only transform phis with a single use, unless we're trying
// hard to make jump threading happen.
(!PN->hasOneUse() && !AllowAggressive))
return 0;
// Check to see if all of the operands of the PHI are simple constants
// (constantint/constantfp/undef). If there is one non-constant value,
// remember the BB it is in. If there is more than one or if *it* is a PHI,
// bail out. We don't do arbitrary constant expressions here because moving
// their computation can be expensive without a cost model.
BasicBlock *NonConstBB = 0;
for (unsigned i = 0; i != NumPHIValues; ++i)
if (!isa<Constant>(PN->getIncomingValue(i)) ||
isa<ConstantExpr>(PN->getIncomingValue(i))) {
if (NonConstBB) return 0; // More than one non-const value.
if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi.
NonConstBB = PN->getIncomingBlock(i);
// If the incoming non-constant value is in I's block, we have an infinite
// loop.
if (NonConstBB == I.getParent())
return 0;
}
// If there is exactly one non-constant value, we can insert a copy of the
// operation in that block. However, if this is a critical edge, we would be
// inserting the computation one some other paths (e.g. inside a loop). Only
// do this if the pred block is unconditionally branching into the phi block.
if (NonConstBB != 0 && !AllowAggressive) {
BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
if (!BI || !BI->isUnconditional()) return 0;
}
// Okay, we can do the transformation: create the new PHI node.
PHINode *NewPN = PHINode::Create(I.getType(), "");
NewPN->reserveOperandSpace(PN->getNumOperands()/2);
InsertNewInstBefore(NewPN, *PN);
NewPN->takeName(PN);
// Next, add all of the operands to the PHI.
if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
// We only currently try to fold the condition of a select when it is a phi,
// not the true/false values.
Value *TrueV = SI->getTrueValue();
Value *FalseV = SI->getFalseValue();
BasicBlock *PhiTransBB = PN->getParent();
for (unsigned i = 0; i != NumPHIValues; ++i) {
BasicBlock *ThisBB = PN->getIncomingBlock(i);
Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
Value *InV = 0;
if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
} else {
assert(PN->getIncomingBlock(i) == NonConstBB);
InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
FalseVInPred,
"phitmp", NonConstBB->getTerminator());
Worklist.Add(cast<Instruction>(InV));
}
NewPN->addIncoming(InV, ThisBB);
}
} else if (I.getNumOperands() == 2) {
Constant *C = cast<Constant>(I.getOperand(1));
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InV = 0;
if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
if (CmpInst *CI = dyn_cast<CmpInst>(&I))
InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
else
InV = ConstantExpr::get(I.getOpcode(), InC, C);
} else {
assert(PN->getIncomingBlock(i) == NonConstBB);
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
InV = BinaryOperator::Create(BO->getOpcode(),
PN->getIncomingValue(i), C, "phitmp",
NonConstBB->getTerminator());
else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
InV = CmpInst::Create(CI->getOpcode(),
CI->getPredicate(),
PN->getIncomingValue(i), C, "phitmp",
NonConstBB->getTerminator());
else
llvm_unreachable("Unknown binop!");
Worklist.Add(cast<Instruction>(InV));
}
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
} else {
CastInst *CI = cast<CastInst>(&I);
const Type *RetTy = CI->getType();
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InV;
if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
} else {
assert(PN->getIncomingBlock(i) == NonConstBB);
InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i),
I.getType(), "phitmp",
NonConstBB->getTerminator());
Worklist.Add(cast<Instruction>(InV));
}
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
}
return ReplaceInstUsesWith(I, NewPN);
}
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
/// WillNotOverflowSignedAdd - Return true if we can prove that:
/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
// There are different heuristics we can use for this. Here are some simple
// ones.
// Add has the property that adding any two 2's complement numbers can only
// have one carry bit which can change a sign. As such, if LHS and RHS each
// have at least two sign bits, we know that the addition of the two values
// will sign extend fine.
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
return true;
// If one of the operands only has one non-zero bit, and if the other operand
// has a known-zero bit in a more significant place than it (not including the
// sign bit) the ripple may go up to and fill the zero, but won't change the
// sign. For example, (X & ~4) + 1.
// TODO: Implement.
return false;
}
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2002-05-07 00:49:18 +08:00
if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
I.hasNoUnsignedWrap(), TD))
return ReplaceInstUsesWith(I, V);
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
// X + (signbit) --> X ^ signbit
const APInt& Val = CI->getValue();
uint32_t BitWidth = Val.getBitWidth();
if (Val == APInt::getSignBit(BitWidth))
return BinaryOperator::CreateXor(LHS, RHS);
// See if SimplifyDemandedBits can simplify this. This handles stuff like
// (X & 254)+1 -> (X&254)|1
if (SimplifyDemandedInstructionBits(I))
return &I;
// zext(bool) + C -> bool ? C + 1 : C
if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
}
if (isa<PHINode>(LHS))
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
ConstantInt *XorRHS = 0;
Value *XorLHS = 0;
if (isa<ConstantInt>(RHSC) &&
match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
uint32_t Size = TySizeBits / 2;
APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1));
APInt CFF80Val(-C0080Val);
do {
if (TySizeBits > Size) {
// If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
// If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) ||
(RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) {
// This is a sign extend if the top bits are known zero.
if (!MaskedValueIsZero(XorLHS,
APInt::getHighBitsSet(TySizeBits, TySizeBits - Size)))
Size = 0; // Not a sign ext, but can't be any others either.
break;
}
}
Size >>= 1;
C0080Val = APIntOps::lshr(C0080Val, Size);
CFF80Val = APIntOps::ashr(CFF80Val, Size);
} while (Size >= 1);
// FIXME: This shouldn't be necessary. When the backends can handle types
// with funny bit widths then this switch statement should be removed. It
// is just here to get the size of the "middle" type back up to something
// that the back ends can handle.
const Type *MiddleType = 0;
switch (Size) {
default: break;
case 32:
case 16:
case 8: MiddleType = IntegerType::get(I.getContext(), Size); break;
}
if (MiddleType) {
Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext");
return new SExtInst(NewTrunc, I.getType(), I.getName());
}
}
}
2002-05-07 00:49:18 +08:00
if (I.getType() == Type::getInt1Ty(I.getContext()))
return BinaryOperator::CreateXor(LHS, RHS);
if (I.getType()->isInteger()) {
// X + X --> X << 1
if (LHS == RHS)
return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
if (RHSI->getOpcode() == Instruction::Sub)
if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B
return ReplaceInstUsesWith(I, RHSI->getOperand(0));
}
if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
if (LHSI->getOpcode() == Instruction::Sub)
if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B
return ReplaceInstUsesWith(I, LHSI->getOperand(0));
}
}
// -A + B --> B - A
// -A + -B --> -(A + B)
if (Value *LHSV = dyn_castNegVal(LHS)) {
if (LHS->getType()->isIntOrIntVector()) {
if (Value *RHSV = dyn_castNegVal(RHS)) {
Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
return BinaryOperator::CreateNeg(NewAdd);
}
}
return BinaryOperator::CreateSub(RHS, LHSV);
}
2002-05-07 00:49:18 +08:00
// A + -B --> A - B
if (!isa<Constant>(RHS))
if (Value *V = dyn_castNegVal(RHS))
return BinaryOperator::CreateSub(LHS, V);
ConstantInt *C2;
if (Value *X = dyn_castFoldableMul(LHS, C2)) {
if (X == RHS) // X*C + X --> X * (C+1)
return BinaryOperator::CreateMul(RHS, AddOne(C2));
// X*C1 + X*C2 --> X * (C1+C2)
ConstantInt *C1;
if (X == dyn_castFoldableMul(RHS, C1))
return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
}
// X + X*C --> X * (C+1)
if (dyn_castFoldableMul(RHS, C2) == LHS)
return BinaryOperator::CreateMul(LHS, AddOne(C2));
// X + ~X --> -1 since ~X = -X-1
if (dyn_castNotVal(LHS) == RHS ||
dyn_castNotVal(RHS) == LHS)
return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
// A+B --> A|B iff A and B have no bits set in common.
if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
APInt LHSKnownOne(IT->getBitWidth(), 0);
APInt LHSKnownZero(IT->getBitWidth(), 0);
ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
if (LHSKnownZero != 0) {
APInt RHSKnownOne(IT->getBitWidth(), 0);
APInt RHSKnownZero(IT->getBitWidth(), 0);
ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
// No bits in common -> bitwise or.
2008-05-20 04:03:53 +08:00
if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
return BinaryOperator::CreateOr(LHS, RHS);
}
}
// W*X + Y*Z --> W * (X+Z) iff W == Y
2008-02-03 16:19:11 +08:00
if (I.getType()->isIntOrIntVector()) {
Value *W, *X, *Y, *Z;
if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
if (W != Y) {
if (W == Z) {
2008-02-26 18:53:30 +08:00
std::swap(Y, Z);
} else if (Y == X) {
2008-02-26 18:53:30 +08:00
std::swap(W, X);
} else if (X == Z) {
std::swap(Y, Z);
std::swap(W, X);
}
}
if (W == Y) {
Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
return BinaryOperator::CreateMul(W, NewAdd);
}
}
}
if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
Value *X = 0;
if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
return BinaryOperator::CreateSub(SubOne(CRHS), X);
// (X & FF00) + xx00 -> (X+xx00) & FF00
if (LHS->hasOneUse() &&
match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
if (Anded == CRHS) {
// See if all bits from the first bit set in the Add RHS up are included
// in the mask. First, get the rightmost bit.
const APInt &AddRHSV = CRHS->getValue();
// Form a mask of all bits from the lowest bit added through the top.
APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
// See if the and mask includes all of these bits.
APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
if (AddRHSHighBits == AddRHSHighBitsAnd) {
// Okay, the xform is safe. Insert the new add pronto.
Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
return BinaryOperator::CreateAnd(NewAdd, C2);
}
}
}
// Try to fold constant add into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
}
// add (select X 0 (sub n A)) A --> select X A n
{
SelectInst *SI = dyn_cast<SelectInst>(LHS);
Value *A = RHS;
if (!SI) {
SI = dyn_cast<SelectInst>(RHS);
A = LHS;
}
if (SI && SI->hasOneUse()) {
Value *TV = SI->getTrueValue();
Value *FV = SI->getFalseValue();
Value *N;
// Can we fold the add into the argument of the select?
// We check both true and false select arguments for a matching subtract.
if (match(FV, m_Zero()) &&
match(TV, m_Sub(m_Value(N), m_Specific(A))))
// Fold the add into the true select value.
return SelectInst::Create(SI->getCondition(), N, A);
if (match(TV, m_Zero()) &&
match(FV, m_Sub(m_Value(N), m_Specific(A))))
// Fold the add into the false select value.
return SelectInst::Create(SI->getCondition(), A, N);
}
}
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
// Check for (add (sext x), y), see if we can merge this into an
// integer add followed by a sext.
if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
// (add (sext x), cst) --> (sext (add x, cst'))
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
Constant *CI =
ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
if (LHSConv->hasOneUse() &&
ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
// Insert the new, smaller add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
return new SExtInst(NewAdd, I.getType());
}
}
// (add (sext x), (sext y)) --> (sext (add int x, y))
if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of sexts), and if the
// integer add will not overflow.
if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0))) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), "addconv");
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
return new SExtInst(NewAdd, I.getType());
}
}
}
return Changed ? &I : 0;
}
Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// X + 0 --> X
if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
if (CFP->isExactlyValue(ConstantFP::getNegativeZero
(I.getType())->getValueAPF()))
return ReplaceInstUsesWith(I, LHS);
}
if (isa<PHINode>(LHS))
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
}
// -A + B --> B - A
// -A + -B --> -(A + B)
if (Value *LHSV = dyn_castFNegVal(LHS))
return BinaryOperator::CreateFSub(RHS, LHSV);
// A + -B --> A - B
if (!isa<Constant>(RHS))
if (Value *V = dyn_castFNegVal(RHS))
return BinaryOperator::CreateFSub(LHS, V);
// Check for X+0.0. Simplify it to X if we know X is not -0.0.
if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
return ReplaceInstUsesWith(I, LHS);
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
// Check for (add double (sitofp x), y), see if we can merge this into an
// integer add followed by a promotion.
if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
// (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
// ... if the constant fits in the integer value. This is useful for things
// like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
// requires a constant pool load, and generally allows the add to be better
// instcombined.
if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
Constant *CI =
ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
if (LHSConv->hasOneUse() &&
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
return new SIToFPInst(NewAdd, I.getType());
}
}
// (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of int->fp conversions),
// and if the integer add will not overflow.
if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0))) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0),"addconv");
Teach instcombine 4 new xforms: (add (sext x), cst) --> (sext (add x, cst')) (add (sext x), (sext y)) --> (sext (add int x, y)) (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) This generally reduces conversions. For example MiBench/telecomm-gsm gets these simplifications: HACK2: %tmp67.i142.i.i = sext i16 %tmp6.i141.i.i to i32 ; <i32> [#uses=1] %tmp23.i139.i.i = sext i16 %tmp2.i138.i.i to i32 ; <i32> [#uses=1] %tmp8.i143.i.i = add i32 %tmp67.i142.i.i, %tmp23.i139.i.i ; <i32> [#uses=3] HACK2: %tmp67.i121.i.i = sext i16 %tmp6.i120.i.i to i32 ; <i32> [#uses=1] %tmp23.i118.i.i = sext i16 %tmp2.i117.i.i to i32 ; <i32> [#uses=1] %tmp8.i122.i.i = add i32 %tmp67.i121.i.i, %tmp23.i118.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i190.i = sext i16 %tmp6.i.i189.i to i32 ; <i32> [#uses=1] %tmp23.i.i187.i = sext i16 %tmp2.i.i186.i to i32 ; <i32> [#uses=1] %tmp8.i.i191.i = add i32 %tmp67.i.i190.i, %tmp23.i.i187.i ; <i32> [#uses=3] HACK2: %tmp67.i173.i.i.i = sext i16 %tmp6.i172.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i170.i.i.i = sext i16 %tmp2.i169.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i174.i.i.i = add i32 %tmp67.i173.i.i.i, %tmp23.i170.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i152.i.i.i = sext i16 %tmp6.i151.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i149.i.i.i = sext i16 %tmp2.i148.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i153.i.i.i = add i32 %tmp67.i152.i.i.i, %tmp23.i149.i.i.i ; <i32> [#uses=3] HACK2: %tmp67.i.i.i.i = sext i16 %tmp6.i.i.i.i to i32 ; <i32> [#uses=1] %tmp23.i.i5.i.i = sext i16 %tmp2.i.i.i.i to i32 ; <i32> [#uses=1] %tmp8.i.i7.i.i = add i32 %tmp67.i.i.i.i, %tmp23.i.i5.i.i ; <i32> [#uses=3] This also fixes a bug in ComputeNumSignBits handling select and makes it more aggressive with and/or. llvm-svn: 51302
2008-05-20 13:46:13 +08:00
return new SIToFPInst(NewAdd, I.getType());
}
}
}
return Changed ? &I : 0;
}
/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
/// code necessary to compute the offset from the base pointer (without adding
/// in the base pointer). Return the result as a signed integer of intptr size.
Value *InstCombiner::EmitGEPOffset(User *GEP) {
TargetData &TD = *getTargetData();
gep_type_iterator GTI = gep_type_begin(GEP);
const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
Value *Result = Constant::getNullValue(IntPtrTy);
// Build a mask for high order bits.
unsigned IntPtrWidth = TD.getPointerSizeInBits();
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
++i, ++GTI) {
Value *Op = *i;
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
if (OpC->isZero()) continue;
// Handle a struct index, which adds its field offset to the pointer.
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
Result = Builder->CreateAdd(Result,
ConstantInt::get(IntPtrTy, Size),
GEP->getName()+".offs");
continue;
}
Constant *Scale = ConstantInt::get(IntPtrTy, Size);
Constant *OC =
ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
Scale = ConstantExpr::getMul(OC, Scale);
// Emit an add instruction.
Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
continue;
}
// Convert to correct type.
if (Op->getType() != IntPtrTy)
Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
if (Size != 1) {
Constant *Scale = ConstantInt::get(IntPtrTy, Size);
// We'll let instcombine(mul) convert this to a shl if possible.
Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
}
// Emit an add instruction.
Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
}
return Result;
}
/// Optimize pointer differences into the same array into a size. Consider:
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
///
Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
const Type *Ty) {
assert(TD && "Must have target data info for this");
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
GetElementPtrInst *GEP = 0;
ConstantExpr *CstGEP = 0;
// TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
// For now we require one side to be the base pointer "A" or a constant
// expression derived from it.
if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
// (gep X, ...) - X
if (LHSGEP->getOperand(0) == RHS) {
GEP = LHSGEP;
Swapped = false;
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
// (gep X, ...) - (ce_gep X, ...)
if (CE->getOpcode() == Instruction::GetElementPtr &&
LHSGEP->getOperand(0) == CE->getOperand(0)) {
CstGEP = CE;
GEP = LHSGEP;
Swapped = false;
}
}
}
if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
// X - (gep X, ...)
if (RHSGEP->getOperand(0) == LHS) {
GEP = RHSGEP;
Swapped = true;
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
// (ce_gep X, ...) - (gep X, ...)
if (CE->getOpcode() == Instruction::GetElementPtr &&
RHSGEP->getOperand(0) == CE->getOperand(0)) {
CstGEP = CE;
GEP = RHSGEP;
Swapped = true;
}
}
}
if (GEP == 0)
return 0;
// Emit the offset of the GEP and an intptr_t.
Value *Result = EmitGEPOffset(GEP);
// If we had a constant expression GEP on the other side offsetting the
// pointer, subtract it from the offset we have.
if (CstGEP) {
Value *CstOffset = EmitGEPOffset(CstGEP);
Result = Builder->CreateSub(Result, CstOffset);
}
// If we have p - gep(p, ...) then we have to negate the result.
if (Swapped)
Result = Builder->CreateNeg(Result, "diff.neg");
return Builder->CreateIntCast(Result, Ty, true);
}
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Op0 == Op1) // sub X, X -> 0
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW.
if (Value *V = dyn_castNegVal(Op1)) {
BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
Res->setHasNoSignedWrap(I.hasNoSignedWrap());
Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
return Res;
}
2002-05-07 00:49:18 +08:00
if (isa<UndefValue>(Op0))
return ReplaceInstUsesWith(I, Op0); // undef - X -> undef
if (isa<UndefValue>(Op1))
return ReplaceInstUsesWith(I, Op1); // X - undef -> undef
if (I.getType() == Type::getInt1Ty(I.getContext()))
return BinaryOperator::CreateXor(Op0, Op1);
if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
// Replace (-1 - A) with (~A).
if (C->isAllOnesValue())
return BinaryOperator::CreateNot(Op1);
// C - ~X == X + (1+C)
Value *X = 0;
if (match(Op1, m_Not(m_Value(X))))
return BinaryOperator::CreateAdd(X, AddOne(C));
// -(X >>u 31) -> (X >>s 31)
// -(X >>s 31) -> (X >>u 31)
if (C->isZero()) {
if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
if (SI->getOpcode() == Instruction::LShr) {
if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
// Check to see if we are shifting out everything but the sign bit.
if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
SI->getType()->getPrimitiveSizeInBits()-1) {
// Ok, the transformation is safe. Insert AShr.
return BinaryOperator::Create(Instruction::AShr,
SI->getOperand(0), CU, SI->getName());
}
}
} else if (SI->getOpcode() == Instruction::AShr) {
if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
// Check to see if we are shifting out everything but the sign bit.
if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
SI->getType()->getPrimitiveSizeInBits()-1) {
// Ok, the transformation is safe. Insert LShr.
return BinaryOperator::CreateLShr(
SI->getOperand(0), CU, SI->getName());
}
}
}
}
}
// Try to fold constant sub into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
// C - zext(bool) -> bool ? C - 1 : C
if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
}
if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
if (Op1I->getOpcode() == Instruction::Add) {
if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y
return BinaryOperator::CreateNeg(Op1I->getOperand(1),
I.getName());
else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y
return BinaryOperator::CreateNeg(Op1I->getOperand(0),
I.getName());
else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
// C1-(X+C2) --> (C1-C2)-X
return BinaryOperator::CreateSub(
ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
}
}
if (Op1I->hasOneUse()) {
// Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
// is not used by anyone else...
//
if (Op1I->getOpcode() == Instruction::Sub) {
// Swap the two operands of the subexpr...
Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
Op1I->setOperand(0, IIOp1);
Op1I->setOperand(1, IIOp0);
// Create the new top level add instruction...
return BinaryOperator::CreateAdd(Op0, Op1);
}
// Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
//
if (Op1I->getOpcode() == Instruction::And &&
(Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
return BinaryOperator::CreateAnd(Op0, NewNot);
}
// 0 - (X sdiv C) -> (X sdiv -C)
if (Op1I->getOpcode() == Instruction::SDiv)
if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
if (CSI->isZero())
if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
ConstantExpr::getNeg(DivRHS));
// X - X*C --> X * (1-C)
ConstantInt *C2 = 0;
if (dyn_castFoldableMul(Op1I, C2) == Op0) {
Constant *CP1 =
ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
C2);
return BinaryOperator::CreateMul(Op0, CP1);
}
}
}
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
if (Op0I->getOpcode() == Instruction::Add) {
if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X
return ReplaceInstUsesWith(I, Op0I->getOperand(1));
else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X
return ReplaceInstUsesWith(I, Op0I->getOperand(0));
} else if (Op0I->getOpcode() == Instruction::Sub) {
if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y
return BinaryOperator::CreateNeg(Op0I->getOperand(1),
I.getName());
}
}
ConstantInt *C1;
if (Value *X = dyn_castFoldableMul(Op0, C1)) {
if (X == Op1) // X*C - X --> X * (C-1)
return BinaryOperator::CreateMul(Op1, SubOne(C1));
ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2)
if (X == dyn_castFoldableMul(Op1, C2))
return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
}
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
if (TD) {
Value *LHSOp, *RHSOp;
if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
match(Op1, m_PtrToInt(m_Value(RHSOp))))
if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
return ReplaceInstUsesWith(I, Res);
// trunc(p)-trunc(q) -> trunc(p-q)
if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
return ReplaceInstUsesWith(I, Res);
}
return 0;
}
Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// If this is a 'B = x-(-A)', change to B = x+A...
if (Value *V = dyn_castFNegVal(Op1))
return BinaryOperator::CreateFAdd(Op0, V);
if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
if (Op1I->getOpcode() == Instruction::FAdd) {
if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y
return BinaryOperator::CreateFNeg(Op1I->getOperand(1),
I.getName());
else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y
return BinaryOperator::CreateFNeg(Op1I->getOperand(0),
I.getName());
}
}
return 0;
}
/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits
/// are carefully arranged to allow folding of expressions such as:
///
/// (A < B) | (A > B) --> (A != B)
///
/// Note that this is only valid if the first and second predicates have the
/// same sign. Is illegal to do: (A u< B) | (A s> B)
///
/// Three bits are used to represent the condition, as follows:
/// 0 A > B
/// 1 A == B
/// 2 A < B
///
/// <=> Value Definition
/// 000 0 Always false
/// 001 1 A > B
/// 010 2 A == B
/// 011 3 A >= B
/// 100 4 A < B
/// 101 5 A != B
/// 110 6 A <= B
/// 111 7 Always true
///
static unsigned getICmpCode(const ICmpInst *ICI) {
switch (ICI->getPredicate()) {
// False -> 0
case ICmpInst::ICMP_UGT: return 1; // 001
case ICmpInst::ICMP_SGT: return 1; // 001
case ICmpInst::ICMP_EQ: return 2; // 010
case ICmpInst::ICMP_UGE: return 3; // 011
case ICmpInst::ICMP_SGE: return 3; // 011
case ICmpInst::ICMP_ULT: return 4; // 100
case ICmpInst::ICMP_SLT: return 4; // 100
case ICmpInst::ICMP_NE: return 5; // 101
case ICmpInst::ICMP_ULE: return 6; // 110
case ICmpInst::ICMP_SLE: return 6; // 110
// True -> 7
default:
llvm_unreachable("Invalid ICmp predicate!");
return 0;
}
}
/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
/// predicate into a three bit mask. It also returns whether it is an ordered
/// predicate by reference.
static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
isOrdered = false;
switch (CC) {
case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000
case FCmpInst::FCMP_UNO: return 0; // 000
case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001
case FCmpInst::FCMP_UGT: return 1; // 001
case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010
case FCmpInst::FCMP_UEQ: return 2; // 010
case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011
case FCmpInst::FCMP_UGE: return 3; // 011
case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100
case FCmpInst::FCMP_ULT: return 4; // 100
case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101
case FCmpInst::FCMP_UNE: return 5; // 101
case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110
case FCmpInst::FCMP_ULE: return 6; // 110
// True -> 7
default:
// Not expecting FCMP_FALSE and FCMP_TRUE;
llvm_unreachable("Unexpected FCmp predicate!");
return 0;
}
}
/// getICmpValue - This is the complement of getICmpCode, which turns an
/// opcode and two operands into either a constant true or false, or a brand
/// new ICmp instruction. The sign is passed in to determine which kind
/// of predicate to use in the new icmp instruction.
static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS) {
switch (Code) {
default: assert(0 && "Illegal ICmp code!");
case 0:
return ConstantInt::getFalse(LHS->getContext());
case 1:
if (Sign)
return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS);
case 2:
return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS);
case 3:
if (Sign)
return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS);
return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS);
case 4:
if (Sign)
return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS);
return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS);
case 5:
return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS);
case 6:
if (Sign)
return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
case 7:
return ConstantInt::getTrue(LHS->getContext());
}
}
/// getFCmpValue - This is the complement of getFCmpCode, which turns an
/// opcode and two operands into either a FCmp instruction. isordered is passed
/// in to determine which kind of predicate to use in the new fcmp instruction.
static Value *getFCmpValue(bool isordered, unsigned code,
Value *LHS, Value *RHS) {
switch (code) {
default: llvm_unreachable("Illegal FCmp code!");
case 0:
if (isordered)
return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
else
return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS);
case 1:
if (isordered)
return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS);
else
return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS);
case 2:
if (isordered)
return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS);
else
return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS);
case 3:
if (isordered)
return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS);
else
return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS);
case 4:
if (isordered)
return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS);
else
return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS);
case 5:
if (isordered)
return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS);
else
return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS);
case 6:
if (isordered)
return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
else
return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
case 7: return ConstantInt::getTrue(LHS->getContext());
}
}
/// PredicatesFoldable - Return true if both predicates match sign or if at
/// least one of them is an equality comparison (which is signless).
static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
(CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
(CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
}
// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where
// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
// guaranteed to be a binary operator.
Instruction *InstCombiner::OptAndOp(Instruction *Op,
ConstantInt *OpRHS,
ConstantInt *AndRHS,
BinaryOperator &TheAnd) {
Value *X = Op->getOperand(0);
Constant *Together = 0;
if (!Op->isShift())
Together = ConstantExpr::getAnd(AndRHS, OpRHS);
switch (Op->getOpcode()) {
case Instruction::Xor:
if (Op->hasOneUse()) {
// (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
Value *And = Builder->CreateAnd(X, AndRHS);
And->takeName(Op);
return BinaryOperator::CreateXor(And, Together);
}
break;
case Instruction::Or:
if (Together == AndRHS) // (X | C) & C --> C
return ReplaceInstUsesWith(TheAnd, AndRHS);
if (Op->hasOneUse() && Together != OpRHS) {
// (X | C1) & C2 --> (X | (C1&C2)) & C2
Value *Or = Builder->CreateOr(X, Together);
Or->takeName(Op);
return BinaryOperator::CreateAnd(Or, AndRHS);
}
break;
case Instruction::Add:
if (Op->hasOneUse()) {
// Adding a one to a single bit bit-field should be turned into an XOR
// of the bit. First thing to check is to see if this AND is with a
// single bit constant.
const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
// If there is only one bit set.
if (AndRHSV.isPowerOf2()) {
// Ok, at this point, we know that we are masking the result of the
// ADD down to exactly one bit. If the constant we are adding has
// no bits set below this bit, then we can eliminate the ADD.
const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
// Check to see if any bits below the one bit set in AndRHSV are set.
if ((AddRHS & (AndRHSV-1)) == 0) {
// If not, the only thing that can effect the output of the AND is
// the bit specified by AndRHSV. If that bit is set, the effect of
// the XOR is to toggle the bit. If it is clear, then the ADD has
// no effect.
if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
TheAnd.setOperand(0, X);
return &TheAnd;
} else {
// Pull the XOR out of the AND.
Value *NewAnd = Builder->CreateAnd(X, AndRHS);
NewAnd->takeName(Op);
return BinaryOperator::CreateXor(NewAnd, AndRHS);
}
}
}
}
break;
case Instruction::Shl: {
// We know that the AND will not produce any of the bits shifted in, so if
// the anded constant includes them, clear them now!
//
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
AndRHS->getValue() & ShlMask);
if (CI->getValue() == ShlMask) {
// Masking out bits that the shift already masks
return ReplaceInstUsesWith(TheAnd, Op); // No need for the and.
} else if (CI != AndRHS) { // Reducing bits set in and.
TheAnd.setOperand(1, CI);
return &TheAnd;
}
break;
}
case Instruction::LShr: {
// We know that the AND will not produce any of the bits shifted in, so if
// the anded constant includes them, clear them now! This only applies to
// unsigned shifts, because a signed shr may bring in set bits!
//
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
ConstantInt *CI = ConstantInt::get(Op->getContext(),
AndRHS->getValue() & ShrMask);
if (CI->getValue() == ShrMask) {
// Masking out bits that the shift already masks.
return ReplaceInstUsesWith(TheAnd, Op);
} else if (CI != AndRHS) {
TheAnd.setOperand(1, CI); // Reduce bits set in and cst.
return &TheAnd;
}
break;
}
case Instruction::AShr:
// Signed shr.
// See if this is shifting in some sign extension, then masking it out
// with an and.
if (Op->hasOneUse()) {
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
Constant *C = ConstantInt::get(Op->getContext(),
AndRHS->getValue() & ShrMask);
if (C == AndRHS) { // Masking out bits shifted in.
// (Val ashr C1) & C2 -> (Val lshr C1) & C2
// Make the argument unsigned.
Value *ShVal = Op->getOperand(0);
ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
}
}
break;
}
return 0;
}
/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient
/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
/// whether to treat the V, Lo and HI as signed or not. IB is the location to
/// insert new instructions.
Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
bool isSigned, bool Inside,
Instruction &IB) {
assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
"Lo is not <= Hi in range emission code!");
if (Inside) {
if (Lo == Hi) // Trivially false.
return new ICmpInst(ICmpInst::ICMP_NE, V, V);
// V >= Min && V < Hi --> V < Hi
if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
ICmpInst::Predicate pred = (isSigned ?
ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
return new ICmpInst(pred, V, Hi);
}
// Emit V-Lo <u Hi-Lo
Constant *NegLo = ConstantExpr::getNeg(Lo);
Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
}
if (Lo == Hi) // Trivially true.
return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
// V < Min || V >= Hi -> V > Hi-1
Hi = SubOne(cast<ConstantInt>(Hi));
if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
ICmpInst::Predicate pred = (isSigned ?
ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
return new ICmpInst(pred, V, Hi);
}
// Emit V-Lo >u Hi-1-Lo
// Note that Hi has already had one subtracted from it, above.
ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
}
// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
// any number of 0s on either side. The 1s are allowed to wrap from LSB to
// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
// not, since all 1s are not contiguous.
static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
const APInt& V = Val->getValue();
uint32_t BitWidth = Val->getType()->getBitWidth();
if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
// look for the first zero bit after the run of ones
MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
// look for the first non-zero bit
ME = V.getActiveBits();
return true;
}
/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
/// where isSub determines whether the operator is a sub. If we can fold one of
/// the following xforms:
///
/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
///
/// return (A +/- B).
///
Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
ConstantInt *Mask, bool isSub,
Instruction &I) {
Instruction *LHSI = dyn_cast<Instruction>(LHS);
if (!LHSI || LHSI->getNumOperands() != 2 ||
!isa<ConstantInt>(LHSI->getOperand(1))) return 0;
ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
switch (LHSI->getOpcode()) {
default: return 0;
case Instruction::And:
if (ConstantExpr::getAnd(N, Mask) == Mask) {
// If the AndRHS is a power of two minus one (0+1+), this is simple.
if ((Mask->getValue().countLeadingZeros() +
Mask->getValue().countPopulation()) ==
Mask->getValue().getBitWidth())
break;
// Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
// part, we don't need any explicit masks to take them out of A. If that
// is all N is, ignore it.
uint32_t MB = 0, ME = 0;
if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive
uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
if (MaskedValueIsZero(RHS, Mask))
break;
}
}
return 0;
case Instruction::Or:
case Instruction::Xor:
// If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
if ((Mask->getValue().countLeadingZeros() +
Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
&& ConstantExpr::getAnd(N, Mask)->isNullValue())
break;
return 0;
}
if (isSub)
return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
}
/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
if (PredicatesFoldable(LHSCC, RHSCC)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
LHS->getOperand(1) == RHS->getOperand(0))
LHS->swapOperands();
if (LHS->getOperand(0) == RHS->getOperand(0) &&
LHS->getOperand(1) == RHS->getOperand(1)) {
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
bool isSigned = LHS->isSigned() || RHS->isSigned();
Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
if (Instruction *I = dyn_cast<Instruction>(RV))
return I;
// Otherwise, it's a constant boolean value.
return ReplaceInstUsesWith(I, RV);
}
}
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
if (LHSCst == 0 || RHSCst == 0) return 0;
2009-11-29 08:51:17 +08:00
if (LHSCst == RHSCst && LHSCC == RHSCC) {
// (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
// where C is a power of 2
if (LHSCC == ICmpInst::ICMP_ULT &&
LHSCst->getValue().isPowerOf2()) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return new ICmpInst(LHSCC, NewOr, LHSCst);
}
// (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return new ICmpInst(LHSCC, NewOr, LHSCst);
}
}
// From here on, we only handle:
// (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
if (Val != Val2) return 0;
// ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
return 0;
// We can't fold (ugt x, C) & (sgt x, C2).
if (!PredicatesFoldable(LHSCC, RHSCC))
return 0;
// Ensure that the larger constant is on the RHS.
bool ShouldSwap;
if (CmpInst::isSigned(LHSCC) ||
(ICmpInst::isEquality(LHSCC) &&
CmpInst::isSigned(RHSCC)))
ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
else
ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
if (ShouldSwap) {
std::swap(LHS, RHS);
std::swap(LHSCst, RHSCst);
std::swap(LHSCC, RHSCC);
}
// At this point, we know we have have two icmp instructions
// comparing a value against two constants and and'ing the result
// together. Because of the above check, we know that we only have
// icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
// (from the icmp folding check above), that the two constants
// are not equal and that the larger constant is on the RHS
assert(LHSCst != RHSCst && "Compares not folded above?");
switch (LHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false
case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false
case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
return ReplaceInstUsesWith(I, LHS);
}
case ICmpInst::ICMP_NE:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_ULT:
if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
break; // (X != 13 & X u< 15) -> no change
case ICmpInst::ICMP_SLT:
if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
break; // (X != 13 & X s< 15) -> no change
case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
return ReplaceInstUsesWith(I, RHS);
case ICmpInst::ICMP_NE:
if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
Constant *AddCST = ConstantExpr::getNeg(LHSCst);
Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
return new ICmpInst(ICmpInst::ICMP_UGT, Add,
ConstantInt::get(Add->getType(), 1));
}
break; // (X != 13 & X != 15) -> no change
}
break;
case ICmpInst::ICMP_ULT:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
return ReplaceInstUsesWith(I, LHS);
case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change
break;
}
break;
case ICmpInst::ICMP_SLT:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false
case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
return ReplaceInstUsesWith(I, LHS);
case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change
break;
}
break;
case ICmpInst::ICMP_UGT:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
return ReplaceInstUsesWith(I, RHS);
case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change
break;
case ICmpInst::ICMP_NE:
if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
return new ICmpInst(LHSCC, Val, RHSCst);
break; // (X u> 13 & X != 15) -> no change
case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
return InsertRangeTest(Val, AddOne(LHSCst),
RHSCst, false, true, I);
case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change
break;
}
break;
case ICmpInst::ICMP_SGT:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
return ReplaceInstUsesWith(I, RHS);
case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change
break;
case ICmpInst::ICMP_NE:
if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
return new ICmpInst(LHSCC, Val, RHSCst);
break; // (X s> 13 & X != 15) -> no change
case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
return InsertRangeTest(Val, AddOne(LHSCst),
RHSCst, true, true, I);
case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change
break;
}
break;
}
return 0;
}
Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
RHS->getPredicate() == FCmpInst::FCMP_ORD) {
// (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
// If either of the constants are nans, then the whole thing returns
// false.
if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
return new FCmpInst(FCmpInst::FCMP_ORD,
LHS->getOperand(0), RHS->getOperand(0));
}
// Handle vector zeros. This occurs because the canonical form of
// "fcmp ord x,x" is "fcmp ord x, 0".
if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
isa<ConstantAggregateZero>(RHS->getOperand(1)))
return new FCmpInst(FCmpInst::FCMP_ORD,
LHS->getOperand(0), RHS->getOperand(0));
return 0;
}
Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
// Swap RHS operands to match LHS.
Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
std::swap(Op1LHS, Op1RHS);
}
if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
// Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
if (Op0CC == Op1CC)
return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
if (Op0CC == FCmpInst::FCMP_TRUE)
return ReplaceInstUsesWith(I, RHS);
if (Op1CC == FCmpInst::FCMP_TRUE)
return ReplaceInstUsesWith(I, LHS);
bool Op0Ordered;
bool Op1Ordered;
unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
if (Op1Pred == 0) {
std::swap(LHS, RHS);
std::swap(Op0Pred, Op1Pred);
std::swap(Op0Ordered, Op1Ordered);
}
if (Op0Pred == 0) {
// uno && ueq -> uno && (uno || eq) -> ueq
// ord && olt -> ord && (ord && lt) -> olt
if (Op0Ordered == Op1Ordered)
return ReplaceInstUsesWith(I, RHS);
// uno && oeq -> uno && (ord && eq) -> false
// uno && ord -> false
if (!Op0Ordered)
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
// ord && ueq -> ord && (uno || eq) -> oeq
return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS));
}
}
return 0;
}
Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V = SimplifyAndInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
2009-10-12 06:00:32 +08:00
const APInt &AndRHSMask = AndRHS->getValue();
APInt NotAndRHS(~AndRHSMask);
// Optimize a variety of ((val OP C1) & C2) combinations...
2009-10-12 06:00:32 +08:00
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
Value *Op0LHS = Op0I->getOperand(0);
Value *Op0RHS = Op0I->getOperand(1);
switch (Op0I->getOpcode()) {
2009-10-12 06:00:32 +08:00
default: break;
case Instruction::Xor:
case Instruction::Or:
// If the mask is only needed on one incoming arm, push it up.
2009-10-12 06:00:32 +08:00
if (!Op0I->hasOneUse()) break;
if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
// Not masking anything out for the LHS, move to RHS.
Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
Op0RHS->getName()+".masked");
return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
}
if (!isa<Constant>(Op0RHS) &&
MaskedValueIsZero(Op0RHS, NotAndRHS)) {
// Not masking anything out for the RHS, move to LHS.
Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
Op0LHS->getName()+".masked");
return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
}
break;
case Instruction::Add:
// ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
// ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
// ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
return BinaryOperator::CreateAnd(V, AndRHS);
if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes
break;
case Instruction::Sub:
// ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
// ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
// ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
return BinaryOperator::CreateAnd(V, AndRHS);
// (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
// has 1's for all bits that the subtraction with A might affect.
if (Op0I->hasOneUse()) {
uint32_t BitWidth = AndRHSMask.getBitWidth();
uint32_t Zeros = AndRHSMask.countLeadingZeros();
APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
if (!(A && A->isZero()) && // avoid infinite recursion.
MaskedValueIsZero(Op0LHS, Mask)) {
Value *NewNeg = Builder->CreateNeg(Op0RHS);
return BinaryOperator::CreateAnd(NewNeg, AndRHS);
}
}
break;
case Instruction::Shl:
case Instruction::LShr:
// (1 << x) & 1 --> zext(x == 0)
// (1 >> x) & 1 --> zext(x == 0)
if (AndRHSMask == 1 && Op0LHS == AndRHS) {
Value *NewICmp =
Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
return new ZExtInst(NewICmp, I.getType());
}
break;
}
if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
return Res;
} else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
// If this is an integer truncation or change from signed-to-unsigned, and
// if the source is an and/or with immediate, transform it. This
// frequently occurs for bitfield accesses.
if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
CastOp->getNumOperands() == 2)
if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
if (CastOp->getOpcode() == Instruction::And) {
// Change: and (cast (and X, C1) to T), C2
// into : and (cast X to T), trunc_or_bitcast(C1)&C2
// This will fold the two constants together, which may allow
// other simplifications.
Value *NewCast = Builder->CreateTruncOrBitCast(
CastOp->getOperand(0), I.getType(),
CastOp->getName()+".shrunk");
// trunc_or_bitcast(C1)&C2
Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
C3 = ConstantExpr::getAnd(C3, AndRHS);
return BinaryOperator::CreateAnd(NewCast, C3);
} else if (CastOp->getOpcode() == Instruction::Or) {
// Change: and (cast (or X, C1) to T), C2
// into : trunc(C1)&C2 iff trunc(C1)&C2 == C2
Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
// trunc(C1)&C2
return ReplaceInstUsesWith(I, AndRHS);
}
}
}
}
// Try to fold constant and into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
if (isa<PHINode>(Op0))
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
}
2004-07-30 20:50:08 +08:00
// (~A & ~B) == (~(A | B)) - De Morgan's Law
if (Value *Op0NotVal = dyn_castNotVal(Op0))
if (Value *Op1NotVal = dyn_castNotVal(Op1))
if (Op0->hasOneUse() && Op1->hasOneUse()) {
Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
I.getName()+".demorgan");
return BinaryOperator::CreateNot(Or);
}
{
Value *A = 0, *B = 0, *C = 0, *D = 0;
// (A|B) & ~(A&B) -> A^B
if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
((A == C && B == D) || (A == D && B == C)))
return BinaryOperator::CreateXor(A, B);
// ~(A&B) & (A|B) -> A^B
if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
((A == C && B == D) || (A == D && B == C)))
return BinaryOperator::CreateXor(A, B);
if (Op0->hasOneUse() &&
match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
if (A == Op1) { // (A^B)&A -> A&(A^B)
I.swapOperands(); // Simplify below
std::swap(Op0, Op1);
} else if (B == Op1) { // (A^B)&B -> B&(B^A)
cast<BinaryOperator>(Op0)->swapOperands();
I.swapOperands(); // Simplify below
std::swap(Op0, Op1);
}
}
if (Op1->hasOneUse() &&
match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
if (B == Op0) { // B&(A^B) -> B&(B^A)
cast<BinaryOperator>(Op1)->swapOperands();
std::swap(A, B);
}
if (A == Op0) // A&(A^B) -> A & ~B
return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
}
// (A&((~A)|B)) -> A&B
if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
return BinaryOperator::CreateAnd(A, Op1);
if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
return BinaryOperator::CreateAnd(A, Op0);
}
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
return Res;
// fold (and (cast A), (cast B)) -> (cast (and A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
const Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVector() &&
// Only do this if the casts both really cause code to be generated.
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
I.getType()) &&
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType())) {
Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
Op1C->getOperand(0), I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
}
// (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
SI0->getOperand(1) == SI1->getOperand(1) &&
(SI0->hasOneUse() || SI1->hasOneUse())) {
Value *NewOp =
Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
SI0->getName());
return BinaryOperator::Create(SI1->getOpcode(), NewOp,
SI1->getOperand(1));
}
}
// If and'ing two fcmp, try combine them into one.
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
return Res;
}
return Changed ? &I : 0;
}
/// CollectBSwapParts - Analyze the specified subexpression and see if it is
/// capable of providing pieces of a bswap. The subexpression provides pieces
/// of a bswap if it is proven that each of the non-zero bytes in the output of
/// the expression came from the corresponding "byte swapped" byte in some other
/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then
/// we know that the expression deposits the low byte of %X into the high byte
/// of the bswap result and that all other bytes are zero. This expression is
/// accepted, the high byte of ByteValues is set to X to indicate a correct
/// match.
///
/// This function returns true if the match was unsuccessful and false if so.
/// On entry to the function the "OverallLeftShift" is a signed integer value
/// indicating the number of bytes that the subexpression is later shifted. For
/// example, if the expression is later right shifted by 16 bits, the
/// OverallLeftShift value would be -2 on entry. This is used to specify which
/// byte of ByteValues is actually being set.
///
/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
/// byte is masked to zero by a user. For example, in (X & 255), X will be
/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits
/// this function to working on up to 32-byte (256 bit) values. ByteMask is
/// always in the local (OverallLeftShift) coordinate space.
///
static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
SmallVector<Value*, 8> &ByteValues) {
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If this is an or instruction, it may be an inner node of the bswap.
if (I->getOpcode() == Instruction::Or) {
return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
ByteValues) ||
CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
ByteValues);
}
// If this is a logical shift by a constant multiple of 8, recurse with
// OverallLeftShift and ByteMask adjusted.
if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
unsigned ShAmt =
cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
// Ensure the shift amount is defined and of a byte value.
if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
return true;
unsigned ByteShift = ShAmt >> 3;
if (I->getOpcode() == Instruction::Shl) {
// X << 2 -> collect(X, +2)
OverallLeftShift += ByteShift;
ByteMask >>= ByteShift;
} else {
// X >>u 2 -> collect(X, -2)
OverallLeftShift -= ByteShift;
ByteMask <<= ByteShift;
ByteMask &= (~0U >> (32-ByteValues.size()));
}
if (OverallLeftShift >= (int)ByteValues.size()) return true;
if (OverallLeftShift <= -(int)ByteValues.size()) return true;
return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
ByteValues);
}
// If this is a logical 'and' with a mask that clears bytes, clear the
// corresponding bytes in ByteMask.
if (I->getOpcode() == Instruction::And &&
isa<ConstantInt>(I->getOperand(1))) {
// Scan every byte of the and mask, seeing if the byte is either 0 or 255.
unsigned NumBytes = ByteValues.size();
APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
// If this byte is masked out by a later operation, we don't care what
// the and mask is.
if ((ByteMask & (1 << i)) == 0)
continue;
// If the AndMask is all zeros for this byte, clear the bit.
APInt MaskB = AndMask & Byte;
if (MaskB == 0) {
ByteMask &= ~(1U << i);
continue;
}
// If the AndMask is not all ones for this byte, it's not a bytezap.
if (MaskB != Byte)
return true;
// Otherwise, this byte is kept.
}
return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
ByteValues);
}
}
// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
// the input value to the bswap. Some observations: 1) if more than one byte
// is demanded from this input, then it could not be successfully assembled
// into a byteswap. At least one of the two bytes would not be aligned with
// their ultimate destination.
if (!isPowerOf2_32(ByteMask)) return true;
unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
// 2) The input and ultimate destinations must line up: if byte 3 of an i32
// is demanded, it needs to go into byte 0 of the result. This means that the
// byte needs to be shifted until it lands in the right byte bucket. The
// shift amount depends on the position: if the byte is coming from the high
// part of the value (e.g. byte 3) then it must be shifted right. If from the
// low part, it must be shifted left.
unsigned DestByteNo = InputByteNo + OverallLeftShift;
if (InputByteNo < ByteValues.size()/2) {
if (ByteValues.size()-1-DestByteNo != InputByteNo)
return true;
} else {
if (ByteValues.size()-1-DestByteNo != InputByteNo)
return true;
}
// If the destination byte value is already defined, the values are or'd
// together, which isn't a bswap (unless it's an or of the same bits).
if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
return true;
ByteValues[DestByteNo] = V;
return false;
}
/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
/// If so, insert the new bswap intrinsic and return it.
Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
if (!ITy || ITy->getBitWidth() % 16 ||
// ByteMask only allows up to 32-byte values.
ITy->getBitWidth() > 32*8)
return 0; // Can only bswap pairs of bytes. Can't do vectors.
/// ByteValues - For each byte of the result, we keep track of which value
/// defines each byte.
SmallVector<Value*, 8> ByteValues;
ByteValues.resize(ITy->getBitWidth()/8);
// Try to find all the pieces corresponding to the bswap.
uint32_t ByteMask = ~0U >> (32-ByteValues.size());
if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
return 0;
// Check to see if all of the bytes come from the same value.
Value *V = ByteValues[0];
if (V == 0) return 0; // Didn't find a byte? Must be zero.
// Check to make sure that all of the bytes come from the same value.
for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
if (ByteValues[i] != V)
return 0;
const Type *Tys[] = { ITy };
Module *M = I.getParent()->getParent()->getParent();
Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
return CallInst::Create(F, V);
}
/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check
/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
/// we can simplify this expression to "cond ? C : D or B".
static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
Value *C, Value *D) {
2008-11-16 12:26:55 +08:00
// If A is not a select of -1/0, this cannot match.
Value *Cond = 0;
if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
return 0;
2008-11-16 12:26:55 +08:00
// ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
if (match(D, m_SelectCst<0, -1>(m_Specific(Cond))))
2008-11-16 12:26:55 +08:00
return SelectInst::Create(Cond, C, B);
if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
2008-11-16 12:26:55 +08:00
return SelectInst::Create(Cond, C, B);
// ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
if (match(B, m_SelectCst<0, -1>(m_Specific(Cond))))
2008-11-16 12:26:55 +08:00
return SelectInst::Create(Cond, C, D);
if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
2008-11-16 12:26:55 +08:00
return SelectInst::Create(Cond, C, D);
return 0;
}
/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
if (PredicatesFoldable(LHSCC, RHSCC)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
LHS->getOperand(1) == RHS->getOperand(0))
LHS->swapOperands();
if (LHS->getOperand(0) == RHS->getOperand(0) &&
LHS->getOperand(1) == RHS->getOperand(1)) {
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
bool isSigned = LHS->isSigned() || RHS->isSigned();
Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
if (Instruction *I = dyn_cast<Instruction>(RV))
return I;
// Otherwise, it's a constant boolean value.
return ReplaceInstUsesWith(I, RV);
}
}
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
if (LHSCst == 0 || RHSCst == 0) return 0;
2009-11-29 08:51:17 +08:00
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
if (LHSCst == RHSCst && LHSCC == RHSCC &&
LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return new ICmpInst(LHSCC, NewOr, LHSCst);
}
// From here on, we only handle:
// (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
if (Val != Val2) return 0;
// ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
return 0;
// We can't fold (ugt x, C) | (sgt x, C2).
if (!PredicatesFoldable(LHSCC, RHSCC))
return 0;
// Ensure that the larger constant is on the RHS.
bool ShouldSwap;
if (CmpInst::isSigned(LHSCC) ||
(ICmpInst::isEquality(LHSCC) &&
CmpInst::isSigned(RHSCC)))
ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
else
ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
if (ShouldSwap) {
std::swap(LHS, RHS);
std::swap(LHSCst, RHSCst);
std::swap(LHSCC, RHSCC);
}
// At this point, we know we have have two icmp instructions
// comparing a value against two constants and or'ing the result
// together. Because of the above check, we know that we only have
// ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
// icmp folding check above), that the two constants are not
// equal.
assert(LHSCst != RHSCst && "Compares not folded above?");
switch (LHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ:
if (LHSCst == SubOne(RHSCst)) {
// (X == 13 | X == 14) -> X-13 <u 2
Constant *AddCST = ConstantExpr::getNeg(LHSCst);
Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
}
break; // (X == 13 | X == 15) -> no change
case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
break;
case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15
case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15
case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15
return ReplaceInstUsesWith(I, RHS);
}
break;
case ICmpInst::ICMP_NE:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13
case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13
case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13
return ReplaceInstUsesWith(I, LHS);
case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
}
break;
case ICmpInst::ICMP_ULT:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
break;
case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
// If RHSCst is [us]MAXINT, it is always false. Not handling
// this can cause overflow.
if (RHSCst->isMaxValue(false))
return ReplaceInstUsesWith(I, LHS);
return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
false, false, I);
case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15
case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15
return ReplaceInstUsesWith(I, RHS);
case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change
break;
}
break;
case ICmpInst::ICMP_SLT:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change
break;
case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2
// If RHSCst is [us]MAXINT, it is always false. Not handling
// this can cause overflow.
if (RHSCst->isMaxValue(true))
return ReplaceInstUsesWith(I, LHS);
return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
true, false, I);
case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15
case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15
return ReplaceInstUsesWith(I, RHS);
case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change
break;
}
break;
case ICmpInst::ICMP_UGT:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13
case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13
return ReplaceInstUsesWith(I, LHS);
case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change
break;
}
break;
case ICmpInst::ICMP_SGT:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13
case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13
return ReplaceInstUsesWith(I, LHS);
case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change
break;
}
break;
}
return 0;
}
Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
RHS->getPredicate() == FCmpInst::FCMP_UNO &&
LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
// If either of the constants are nans, then the whole thing returns
// true.
if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
// Otherwise, no need to compare the two constants, compare the
// rest.
return new FCmpInst(FCmpInst::FCMP_UNO,
LHS->getOperand(0), RHS->getOperand(0));
}
// Handle vector zeros. This occurs because the canonical form of
// "fcmp uno x,x" is "fcmp uno x, 0".
if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
isa<ConstantAggregateZero>(RHS->getOperand(1)))
return new FCmpInst(FCmpInst::FCMP_UNO,
LHS->getOperand(0), RHS->getOperand(0));
return 0;
}
Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
// Swap RHS operands to match LHS.
Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
std::swap(Op1LHS, Op1RHS);
}
if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
// Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
if (Op0CC == Op1CC)
return new FCmpInst((FCmpInst::Predicate)Op0CC,
Op0LHS, Op0RHS);
if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
if (Op0CC == FCmpInst::FCMP_FALSE)
return ReplaceInstUsesWith(I, RHS);
if (Op1CC == FCmpInst::FCMP_FALSE)
return ReplaceInstUsesWith(I, LHS);
bool Op0Ordered;
bool Op1Ordered;
unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
if (Op0Ordered == Op1Ordered) {
// If both are ordered or unordered, return a new fcmp with
// or'ed predicates.
Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS);
if (Instruction *I = dyn_cast<Instruction>(RV))
return I;
// Otherwise, it's a constant boolean value...
return ReplaceInstUsesWith(I, RV);
}
}
return 0;
}
/// FoldOrWithConstants - This helper function folds:
///
2008-12-02 13:09:00 +08:00
/// ((A | B) & C1) | (B & C2)
///
/// into:
///
2008-12-02 13:09:00 +08:00
/// (A & C1) | B
///
2008-12-02 13:09:00 +08:00
/// when the XOR of the two constants is "all ones" (-1).
Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
Value *A, Value *B, Value *C) {
ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
if (!CI1) return 0;
Value *V1 = 0;
ConstantInt *CI2 = 0;
if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
APInt Xor = CI1->getValue() ^ CI2->getValue();
if (!Xor.isAllOnesValue()) return 0;
if (V1 == A || V1 == B) {
Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
return BinaryOperator::CreateOr(NewOp, V1);
}
return 0;
}
Instruction *InstCombiner::visitOr(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V = SimplifyOrInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
ConstantInt *C1 = 0; Value *X = 0;
// (X & C1) | C2 --> (X | C2) & (C1|C2)
if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
isOnlyUse(Op0)) {
Value *Or = Builder->CreateOr(X, RHS);
Or->takeName(Op0);
return BinaryOperator::CreateAnd(Or,
ConstantInt::get(I.getContext(),
RHS->getValue() | C1->getValue()));
}
// (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
isOnlyUse(Op0)) {
Value *Or = Builder->CreateOr(X, RHS);
Or->takeName(Op0);
return BinaryOperator::CreateXor(Or,
ConstantInt::get(I.getContext(),
C1->getValue() & ~RHS->getValue()));
}
// Try to fold constant and into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
if (isa<PHINode>(Op0))
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
}
Value *A = 0, *B = 0;
ConstantInt *C1 = 0, *C2 = 0;
// (A | B) | C and A | (B | C) -> bswap if possible.
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
if (match(Op0, m_Or(m_Value(), m_Value())) ||
match(Op1, m_Or(m_Value(), m_Value())) ||
(match(Op0, m_Shift(m_Value(), m_Value())) &&
match(Op1, m_Shift(m_Value(), m_Value())))) {
if (Instruction *BSwap = MatchBSwap(I))
return BSwap;
}
// (X^C)|Y -> (X|Y)^C iff Y&C == 0
if (Op0->hasOneUse() &&
match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
MaskedValueIsZero(Op1, C1->getValue())) {
Value *NOr = Builder->CreateOr(A, Op1);
NOr->takeName(Op0);
return BinaryOperator::CreateXor(NOr, C1);
}
// Y|(X^C) -> (X|Y)^C iff Y&C == 0
if (Op1->hasOneUse() &&
match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
MaskedValueIsZero(Op0, C1->getValue())) {
Value *NOr = Builder->CreateOr(A, Op0);
NOr->takeName(Op0);
return BinaryOperator::CreateXor(NOr, C1);
}
// (A & C)|(B & D)
Value *C = 0, *D = 0;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
match(Op1, m_And(m_Value(B), m_Value(D)))) {
Value *V1 = 0, *V2 = 0, *V3 = 0;
C1 = dyn_cast<ConstantInt>(C);
C2 = dyn_cast<ConstantInt>(D);
if (C1 && C2) { // (A & C1)|(B & C2)
// If we have: ((V + N) & C1) | (V & C2)
// .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
// replace with V+N.
if (C1->getValue() == ~C2->getValue()) {
if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
match(A, m_Add(m_Value(V1), m_Value(V2)))) {
// Add commutes, try both ways.
if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
return ReplaceInstUsesWith(I, A);
if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
return ReplaceInstUsesWith(I, A);
}
// Or commutes, try both ways.
if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
match(B, m_Add(m_Value(V1), m_Value(V2)))) {
// Add commutes, try both ways.
if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
return ReplaceInstUsesWith(I, B);
if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
return ReplaceInstUsesWith(I, B);
}
}
// ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
// iff (C1&C2) == 0 and (N&~C1) == 0
if ((C1->getValue() & C2->getValue()) == 0) {
if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N)
(V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V)
return BinaryOperator::CreateAnd(A,
ConstantInt::get(A->getContext(),
C1->getValue()|C2->getValue()));
// Or commutes, try both ways.
if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N)
(V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V)
return BinaryOperator::CreateAnd(B,
ConstantInt::get(B->getContext(),
C1->getValue()|C2->getValue()));
}
}
// Check to see if we have any common things being and'ed. If so, find the
// terms for V1 & (V2|V3).
if (isOnlyUse(Op0) || isOnlyUse(Op1)) {
V1 = 0;
if (A == B) // (A & C)|(A & D) == A & (C|D)
V1 = A, V2 = C, V3 = D;
else if (A == D) // (A & C)|(B & A) == A & (B|C)
V1 = A, V2 = B, V3 = C;
else if (C == B) // (A & C)|(C & D) == C & (A|D)
V1 = C, V2 = A, V3 = D;
else if (C == D) // (A & C)|(B & C) == C & (A|B)
V1 = C, V2 = A, V3 = B;
if (V1) {
Value *Or = Builder->CreateOr(V2, V3, "tmp");
return BinaryOperator::CreateAnd(V1, Or);
}
}
// (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants
if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
return Match;
if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
return Match;
if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
return Match;
if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
return Match;
// ((A&~B)|(~A&B)) -> A^B
if ((match(C, m_Not(m_Specific(D))) &&
match(B, m_Not(m_Specific(A)))))
return BinaryOperator::CreateXor(A, D);
// ((~B&A)|(~A&B)) -> A^B
if ((match(A, m_Not(m_Specific(D))) &&
match(B, m_Not(m_Specific(C)))))
return BinaryOperator::CreateXor(C, D);
// ((A&~B)|(B&~A)) -> A^B
if ((match(C, m_Not(m_Specific(B))) &&
match(D, m_Not(m_Specific(A)))))
return BinaryOperator::CreateXor(A, B);
// ((~B&A)|(B&~A)) -> A^B
if ((match(A, m_Not(m_Specific(B))) &&
match(D, m_Not(m_Specific(C)))))
return BinaryOperator::CreateXor(C, B);
}
// (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts.
if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
SI0->getOperand(1) == SI1->getOperand(1) &&
(SI0->hasOneUse() || SI1->hasOneUse())) {
Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
SI0->getName());
return BinaryOperator::Create(SI1->getOpcode(), NewOp,
SI1->getOperand(1));
}
}
// ((A|B)&1)|(B&-2) -> (A&1) | B
if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C);
if (Ret) return Ret;
}
// (B&-2)|((A|B)&1) -> (A&1) | B
if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C);
if (Ret) return Ret;
}
// (~A | ~B) == (~(A & B)) - De Morgan's Law
if (Value *Op0NotVal = dyn_castNotVal(Op0))
if (Value *Op1NotVal = dyn_castNotVal(Op1))
if (Op0->hasOneUse() && Op1->hasOneUse()) {
Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
I.getName()+".demorgan");
return BinaryOperator::CreateNot(And);
}
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
return Res;
// fold (or (cast A), (cast B)) -> (cast (or A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
!isa<ICmpInst>(Op1C->getOperand(0))) {
const Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVector() &&
// Only do this if the casts both really cause code to be
// generated.
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
I.getType()) &&
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType())) {
Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
Op1C->getOperand(0), I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
}
}
}
// (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
return Res;
}
return Changed ? &I : 0;
}
Instruction *InstCombiner::visitXor(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (isa<UndefValue>(Op1)) {
if (isa<UndefValue>(Op0))
// Handle undef ^ undef -> 0 special case. This is a common
// idiom (misuse).
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef
}
// xor X, X = 0
if (Op0 == Op1)
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
if (isa<VectorType>(I.getType()))
if (isa<ConstantAggregateZero>(Op1))
return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
// Is this a ~ operation?
if (Value *NotOp = dyn_castNotVal(&I)) {
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
if (Op0I->getOpcode() == Instruction::And ||
Op0I->getOpcode() == Instruction::Or) {
// ~(~X & Y) --> (X | ~Y) - De Morgan's Law
// ~(~X | Y) === (X & ~Y) - De Morgan's Law
if (dyn_castNotVal(Op0I->getOperand(1)))
Op0I->swapOperands();
if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
Value *NotY =
Builder->CreateNot(Op0I->getOperand(1),
Op0I->getOperand(1)->getName()+".not");
if (Op0I->getOpcode() == Instruction::And)
return BinaryOperator::CreateOr(Op0NotVal, NotY);
return BinaryOperator::CreateAnd(Op0NotVal, NotY);
}
// ~(X & Y) --> (~X | ~Y) - De Morgan's Law
// ~(X | Y) === (~X & ~Y) - De Morgan's Law
if (isFreeToInvert(Op0I->getOperand(0)) &&
isFreeToInvert(Op0I->getOperand(1))) {
Value *NotX =
Builder->CreateNot(Op0I->getOperand(0), "notlhs");
Value *NotY =
Builder->CreateNot(Op0I->getOperand(1), "notrhs");
if (Op0I->getOpcode() == Instruction::And)
return BinaryOperator::CreateOr(NotX, NotY);
return BinaryOperator::CreateAnd(NotX, NotY);
}
}
}
}
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
2009-10-12 06:00:32 +08:00
if (RHS->isOne() && Op0->hasOneUse()) {
// xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
return new ICmpInst(ICI->getInversePredicate(),
ICI->getOperand(0), ICI->getOperand(1));
if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
return new FCmpInst(FCI->getInversePredicate(),
FCI->getOperand(0), FCI->getOperand(1));
}
// fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
if (CI->hasOneUse() && Op0C->hasOneUse()) {
Instruction::CastOps Opcode = Op0C->getOpcode();
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
(RHS == ConstantExpr::getCast(Opcode,
ConstantInt::getTrue(I.getContext()),
Op0C->getDestTy()))) {
CI->setPredicate(CI->getInversePredicate());
return CastInst::Create(Opcode, CI, Op0C->getType());
}
}
}
}
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
// ~(c-X) == X-c-1 == X+(-c-1)
if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
ConstantInt::get(I.getType(), 1));
return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
}
if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
if (Op0I->getOpcode() == Instruction::Add) {
// ~(X-c) --> (-c-1)-X
if (RHS->isAllOnesValue()) {
Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
return BinaryOperator::CreateSub(
ConstantExpr::getSub(NegOp0CI,
ConstantInt::get(I.getType(), 1)),
Op0I->getOperand(0));
} else if (RHS->getValue().isSignBit()) {
// (X + C) ^ signbit -> (X + C + signbit)
Constant *C = ConstantInt::get(I.getContext(),
RHS->getValue() + Op0CI->getValue());
return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
2007-04-02 13:48:58 +08:00
}
} else if (Op0I->getOpcode() == Instruction::Or) {
// (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
// Anything in both C1 and C2 is known to be zero, remove it from
// NewRHS.
Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
NewRHS = ConstantExpr::getAnd(NewRHS,
ConstantExpr::getNot(CommonBits));
Worklist.Add(Op0I);
I.setOperand(0, Op0I->getOperand(0));
I.setOperand(1, NewRHS);
return &I;
}
}
}
}
// Try to fold constant and into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
if (isa<PHINode>(Op0))
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
}
if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1
if (X == Op1)
return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1
if (X == Op0)
return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
if (Op1I) {
Value *A, *B;
if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
if (A == Op0) { // B^(B|A) == (A|B)^B
Op1I->swapOperands();
I.swapOperands();
std::swap(Op0, Op1);
} else if (B == Op0) { // B^(A|B) == (A|B)^B
I.swapOperands(); // Simplified below.
std::swap(Op0, Op1);
}
} else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
return ReplaceInstUsesWith(I, B); // A^(A^B) == B
} else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
return ReplaceInstUsesWith(I, A); // A^(B^A) == B
} else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
Op1I->hasOneUse()){
if (A == Op0) { // A^(A&B) -> A^(B&A)
Op1I->swapOperands();
std::swap(A, B);
}
if (B == Op0) { // A^(B&A) -> (B&A)^A
I.swapOperands(); // Simplified below.
std::swap(Op0, Op1);
}
}
}
BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
if (Op0I) {
Value *A, *B;
if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
Op0I->hasOneUse()) {
if (A == Op1) // (B|A)^B == (A|B)^B
std::swap(A, B);
if (B == Op1) // (A|B)^B == A & ~B
return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
} else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
return ReplaceInstUsesWith(I, B); // (A^B)^A == B
} else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
return ReplaceInstUsesWith(I, A); // (B^A)^A == B
} else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
Op0I->hasOneUse()){
if (A == Op1) // (A&B)^A -> (B&A)^A
std::swap(A, B);
if (B == Op1 && // (B&A)^A == ~B & A
!isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
}
}
}
// (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts.
if (Op0I && Op1I && Op0I->isShift() &&
Op0I->getOpcode() == Op1I->getOpcode() &&
Op0I->getOperand(1) == Op1I->getOperand(1) &&
(Op1I->hasOneUse() || Op1I->hasOneUse())) {
Value *NewOp =
Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
Op0I->getName());
return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
Op1I->getOperand(1));
}
if (Op0I && Op1I) {
Value *A, *B, *C, *D;
// (A & B)^(A | B) -> A ^ B
if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
if ((A == C && B == D) || (A == D && B == C))
return BinaryOperator::CreateXor(A, B);
}
// (A | B)^(A & B) -> A ^ B
if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
match(Op1I, m_And(m_Value(C), m_Value(D)))) {
if ((A == C && B == D) || (A == D && B == C))
return BinaryOperator::CreateXor(A, B);
}
// (A & B)^(C & D)
if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
match(Op0I, m_And(m_Value(A), m_Value(B))) &&
match(Op1I, m_And(m_Value(C), m_Value(D)))) {
// (X & Y)^(X & Y) -> (Y^Z) & X
Value *X = 0, *Y = 0, *Z = 0;
if (A == C)
X = A, Y = B, Z = D;
else if (A == D)
X = A, Y = B, Z = C;
else if (B == C)
X = B, Y = A, Z = D;
else if (B == D)
X = B, Y = A, Z = C;
if (X) {
Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
return BinaryOperator::CreateAnd(NewOp, X);
}
}
}
// (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
LHS->getOperand(1) == RHS->getOperand(0))
LHS->swapOperands();
if (LHS->getOperand(0) == RHS->getOperand(0) &&
LHS->getOperand(1) == RHS->getOperand(1)) {
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
bool isSigned = LHS->isSigned() || RHS->isSigned();
Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
if (Instruction *I = dyn_cast<Instruction>(RV))
return I;
// Otherwise, it's a constant boolean value.
return ReplaceInstUsesWith(I, RV);
}
}
// fold (xor (cast A), (cast B)) -> (cast (xor A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
const Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
// Only do this if the casts both really cause code to be generated.
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
I.getType()) &&
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType())) {
Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
Op1C->getOperand(0), I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
}
}
return Changed ? &I : 0;
}
Instruction *InstCombiner::visitShl(BinaryOperator &I) {
return commonShiftTransforms(I);
}
Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
return commonShiftTransforms(I);
}
Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
if (Instruction *R = commonShiftTransforms(I))
return R;
Value *Op0 = I.getOperand(0);
// ashr int -1, X = -1 (for any arithmetic shift rights of ~0)
if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
if (CSI->isAllOnesValue())
return ReplaceInstUsesWith(I, CSI);
// See if we can turn a signed shr into an unsigned shr.
if (MaskedValueIsZero(Op0,
APInt::getSignBit(I.getType()->getScalarSizeInBits())))
return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
// Arithmetic shifting an all-sign-bit value is a no-op.
unsigned NumSignBits = ComputeNumSignBits(Op0);
if (NumSignBits == Op0->getType()->getScalarSizeInBits())
return ReplaceInstUsesWith(I, Op0);
return 0;
}
Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// shl X, 0 == X and shr X, 0 == X
// shl 0, X == 0 and shr 0, X == 0
if (Op1 == Constant::getNullValue(Op1->getType()) ||
Op0 == Constant::getNullValue(Op0->getType()))
return ReplaceInstUsesWith(I, Op0);
if (isa<UndefValue>(Op0)) {
if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
2004-10-17 07:28:04 +08:00
return ReplaceInstUsesWith(I, Op0);
else // undef << X -> 0, undef >>u X -> 0
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
}
if (isa<UndefValue>(Op1)) {
if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X
return ReplaceInstUsesWith(I, Op0);
else // X << undef, X >>u undef -> 0
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
}
// See if we can fold away this shift.
if (SimplifyDemandedInstructionBits(I))
return &I;
// Try to fold constant and into select arguments.
if (isa<Constant>(Op0))
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
return Res;
return 0;
}
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
BinaryOperator &I) {
bool isLeftShift = I.getOpcode() == Instruction::Shl;
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
// shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
// a signed shift.
//
if (Op1->uge(TypeBits)) {
if (I.getOpcode() != Instruction::AShr)
return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
else {
I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
return &I;
}
}
// ((X*C1) << C2) == (X * (C1 << C2))
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
if (BO->getOpcode() == Instruction::Mul && isLeftShift)
if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
return BinaryOperator::CreateMul(BO->getOperand(0),
ConstantExpr::getShl(BOOp, Op1));
// Try to fold constant and into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
if (isa<PHINode>(Op0))
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
// Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
// If 'shift2' is an ashr, we would have to get the sign bit into a funny
// place. Don't try to do this transformation in this case. Also, we
// require that the input operand is a shift-by-constant so that we have
// confidence that the shifts will get folded together. We could do this
// xform in more cases, but it is unlikely to be profitable.
if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
isa<ConstantInt>(TrOp->getOperand(1))) {
// Okay, we'll do this xform. Make the shift of shift.
Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
// (shift2 (shift1 & 0x00FF), c2)
Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
// For logical shifts, the truncation has the effect of making the high
// part of the register be zeros. Emulate this by inserting an AND to
// clear the top bits as needed. This 'and' will usually be zapped by
// other xforms later if dead.
unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
unsigned DstSize = TI->getType()->getScalarSizeInBits();
APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
// The mask we constructed says what the trunc would do if occurring
// between the shifts. We want to know the effect *after* the second
// shift. We know that it is a logical shift by a constant, so adjust the
// mask as appropriate.
if (I.getOpcode() == Instruction::Shl)
MaskV <<= Op1->getZExtValue();
else {
assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
MaskV = MaskV.lshr(Op1->getZExtValue());
}
// shift1 & 0x00FF
Value *And = Builder->CreateAnd(NSh,
ConstantInt::get(I.getContext(), MaskV),
TI->getName());
// Return the value truncated to the interesting size.
return new TruncInst(And, I.getType());
}
}
if (Op0->hasOneUse()) {
if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
// Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
Value *V1, *V2;
ConstantInt *CC;
switch (Op0BO->getOpcode()) {
default: break;
case Instruction::Add:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
// These operators commute.
// Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C)
if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
m_Specific(Op1)))) {
Value *YS = // (Y << C)
Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
// (X + (Y << C))
Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
Op0BO->getOperand(1)->getName());
uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
}
// Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
Value *Op0BOOp1 = Op0BO->getOperand(1);
if (isLeftShift && Op0BOOp1->hasOneUse() &&
match(Op0BOOp1,
m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
m_ConstantInt(CC))) &&
cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
Value *YS = // (Y << C)
Builder->CreateShl(Op0BO->getOperand(0), Op1,
Op0BO->getName());
// X & (CC << C)
Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
V1->getName()+".mask");
return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
}
}
// FALL THROUGH.
case Instruction::Sub: {
// Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
m_Specific(Op1)))) {
Value *YS = // (Y << C)
Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
// (X + (Y << C))
Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
Op0BO->getOperand(0)->getName());
uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
}
// Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
match(Op0BO->getOperand(0),
m_And(m_Shr(m_Value(V1), m_Value(V2)),
m_ConstantInt(CC))) && V2 == Op1 &&
cast<BinaryOperator>(Op0BO->getOperand(0))
->getOperand(0)->hasOneUse()) {
Value *YS = // (Y << C)
Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
// X & (CC << C)
Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
V1->getName()+".mask");
return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
}
break;
}
}
// If the operand is an bitwise operator with a constant RHS, and the
// shift is the only use, we can pull it out of the shift.
if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
bool isValid = true; // Valid only for And, Or, Xor
bool highBitSet = false; // Transform if high bit of constant set?
switch (Op0BO->getOpcode()) {
default: isValid = false; break; // Do not perform transform!
case Instruction::Add:
isValid = isLeftShift;
break;
case Instruction::Or:
case Instruction::Xor:
highBitSet = false;
break;
case Instruction::And:
highBitSet = true;
break;
}
// If this is a signed shift right, and the high bit is modified
// by the logical operation, do not perform the transformation.
// The highBitSet boolean indicates the value of the high bit of
// the constant which would cause it to be modified for this
// operation.
//
2007-12-06 14:25:04 +08:00
if (isValid && I.getOpcode() == Instruction::AShr)
isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
if (isValid) {
Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
Value *NewShift =
Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
NewShift->takeName(Op0BO);
return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
NewRHS);
}
}
}
}
// Find out if this is a shift of a shift by a constant.
BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
if (ShiftOp && !ShiftOp->isShift())
ShiftOp = 0;
if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
if (ShiftAmt1 == 0) return 0; // Will be simplified in the future.
Value *X = ShiftOp->getOperand(0);
uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift.
const IntegerType *Ty = cast<IntegerType>(I.getType());
// Check for (X << c1) << c2 and (X >> c1) >> c2
2007-02-04 07:28:07 +08:00
if (I.getOpcode() == ShiftOp->getOpcode()) {
// If this is oversized composite shift, then unsigned shifts get 0, ashr
// saturates.
if (AmtSum >= TypeBits) {
if (I.getOpcode() != Instruction::AShr)
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr.
}
return BinaryOperator::Create(I.getOpcode(), X,
ConstantInt::get(Ty, AmtSum));
}
if (ShiftOp->getOpcode() == Instruction::LShr &&
I.getOpcode() == Instruction::AShr) {
if (AmtSum >= TypeBits)
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0.
return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
}
if (ShiftOp->getOpcode() == Instruction::AShr &&
I.getOpcode() == Instruction::LShr) {
// ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
if (AmtSum >= TypeBits)
AmtSum = TypeBits-1;
Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
return BinaryOperator::CreateAnd(Shift,
ConstantInt::get(I.getContext(), Mask));
}
// Okay, if we get here, one shift must be left, and the other shift must be
// right. See if the amounts are equal.
if (ShiftAmt1 == ShiftAmt2) {
// If we have ((X >>? C) << C), turn this into X & (-1 << C).
if (I.getOpcode() == Instruction::Shl) {
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
return BinaryOperator::CreateAnd(X,
ConstantInt::get(I.getContext(),Mask));
}
// If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
if (I.getOpcode() == Instruction::LShr) {
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
return BinaryOperator::CreateAnd(X,
ConstantInt::get(I.getContext(), Mask));
}
// We can simplify ((X << C) >>s C) into a trunc + sext.
// NOTE: we could do this for any C, but that would make 'unusual' integer
// types. For now, just stick to ones well-supported by the code
// generators.
const Type *SExtType = 0;
switch (Ty->getBitWidth() - ShiftAmt1) {
case 1 :
case 8 :
case 16 :
case 32 :
case 64 :
case 128:
SExtType = IntegerType::get(I.getContext(),
Ty->getBitWidth() - ShiftAmt1);
break;
default: break;
}
if (SExtType)
return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty);
// Otherwise, we can't handle it yet.
} else if (ShiftAmt1 < ShiftAmt2) {
uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
// (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
if (I.getOpcode() == Instruction::Shl) {
assert(ShiftOp->getOpcode() == Instruction::LShr ||
ShiftOp->getOpcode() == Instruction::AShr);
Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
return BinaryOperator::CreateAnd(Shift,
ConstantInt::get(I.getContext(),Mask));
}
// (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
if (I.getOpcode() == Instruction::LShr) {
assert(ShiftOp->getOpcode() == Instruction::Shl);
Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
return BinaryOperator::CreateAnd(Shift,
ConstantInt::get(I.getContext(),Mask));
}
// We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
} else {
assert(ShiftAmt2 < ShiftAmt1);
uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
// (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
if (I.getOpcode() == Instruction::Shl) {
assert(ShiftOp->getOpcode() == Instruction::LShr ||
ShiftOp->getOpcode() == Instruction::AShr);
Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
ConstantInt::get(Ty, ShiftDiff));
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
return BinaryOperator::CreateAnd(Shift,
ConstantInt::get(I.getContext(),Mask));
}
// (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
if (I.getOpcode() == Instruction::LShr) {
assert(ShiftOp->getOpcode() == Instruction::Shl);
Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
return BinaryOperator::CreateAnd(Shift,
ConstantInt::get(I.getContext(),Mask));
}
// We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
}
}
return 0;
}
/// FindElementAtOffset - Given a type and a constant offset, determine whether
/// or not there is a sequence of GEP indices into the type that will land us at
/// the specified offset. If so, fill them into NewIndices and return the
/// resultant element type, otherwise return null.
const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
SmallVectorImpl<Value*> &NewIndices) {
if (!TD) return 0;
if (!Ty->isSized()) return 0;
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
int64_t FirstIdx = 0;
if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
Offset -= FirstIdx*TySize;
// Handle hosts where % returns negative instead of values [0..TySize).
if (Offset < 0) {
--FirstIdx;
Offset += TySize;
assert(Offset >= 0);
}
assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
}
NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
// Index into the types. If we fail, set OrigBase to null.
while (Offset) {
// Indexing into tail padding between struct/array elements.
if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
return 0;
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TD->getStructLayout(STy);
assert(Offset < (int64_t)SL->getSizeInBytes() &&
"Offset must stay within the indexed type");
unsigned Elt = SL->getElementContainingOffset(Offset);
NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
Elt));
Offset -= SL->getElementOffset(Elt);
Ty = STy->getElementType(Elt);
} else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
assert(EltSize && "Cannot index into a zero-sized array");
NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
Offset %= EltSize;
Ty = AT->getElementType();
} else {
// Otherwise, we can't index into the middle of this atomic type, bail.
return 0;
}
}
return Ty;
}
2001-12-15 00:52:21 +08:00
/// EnforceKnownAlignment - If the specified pointer points to an object that
/// we control, modify the object's alignment to PrefAlign. This isn't
/// often possible though. If alignment is important, a more reliable approach
/// is to simply align all global variables and allocation instructions to
/// their preferred alignment from the beginning.
///
static unsigned EnforceKnownAlignment(Value *V,
unsigned Align, unsigned PrefAlign) {
User *U = dyn_cast<User>(V);
if (!U) return Align;
switch (Operator::getOpcode(U)) {
default: break;
case Instruction::BitCast:
return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
case Instruction::GetElementPtr: {
// If all indexes are zero, it is just the alignment of the base pointer.
bool AllZeroOperands = true;
for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
if (!isa<Constant>(*i) ||
!cast<Constant>(*i)->isNullValue()) {
AllZeroOperands = false;
break;
}
if (AllZeroOperands) {
// Treat this like a bitcast.
return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
}
break;
}
}
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
// If there is a large requested alignment and we can, bump up the alignment
// of the global.
if (!GV->isDeclaration()) {
if (GV->getAlignment() >= PrefAlign)
Align = GV->getAlignment();
else {
GV->setAlignment(PrefAlign);
Align = PrefAlign;
}
}
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
// If there is a requested alignment and if this is an alloca, round up.
if (AI->getAlignment() >= PrefAlign)
Align = AI->getAlignment();
else {
AI->setAlignment(PrefAlign);
Align = PrefAlign;
}
}
return Align;
}
/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
/// and it is more than the alignment of the ultimate object, see if we can
/// increase the alignment of the ultimate object, making this check succeed.
unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
unsigned PrefAlign) {
unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
sizeof(PrefAlign) * CHAR_BIT;
APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
unsigned TrailZ = KnownZero.countTrailingOnes();
unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
if (PrefAlign > Align)
Align = EnforceKnownAlignment(V, Align, PrefAlign);
// We don't need to make any adjustment.
return Align;
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
if (CopyAlign < MinAlign) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
MinAlign, false));
return MI;
}
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
if (MemOpLength == 0) return 0;
// Source and destination pointer types are always "i8*" for intrinsic. See
// if the size is something we can handle with a single primitive load/store.
// A single load+store correctly handles overlapping memory in the memmove
// case.
unsigned Size = MemOpLength->getZExtValue();
if (Size == 0) return MI; // Delete this mem transfer.
if (Size > 8 || (Size&(Size-1)))
return 0; // If not 1/2/4/8 bytes, exit.
// Use an integer load+store unless we can find something better.
Type *NewPtrTy =
PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3));
// Memcpy forces the use of i8* for the source and destination. That means
// that if you're using memcpy to move one double around, you'll get a cast
// from double* to i8*. We'd much rather use a double load+store rather than
// an i64 load+store, here because this improves the odds that the source or
// dest address will be promotable. See if we can find a better type than the
// integer datatype.
if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
while (!SrcETy->isSingleValueType()) {
if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
if (STy->getNumElements() == 1)
SrcETy = STy->getElementType(0);
else
break;
} else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
if (ATy->getNumElements() == 1)
SrcETy = ATy->getElementType();
else
break;
} else
break;
}
if (SrcETy->isSingleValueType())
NewPtrTy = PointerType::getUnqual(SrcETy);
}
}
// If the memcpy/memmove provides better alignment info than we can
// infer, use it.
SrcAlign = std::max(SrcAlign, CopyAlign);
DstAlign = std::max(DstAlign, CopyAlign);
Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
InsertNewInstBefore(L, *MI);
InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
return MI;
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
return MI;
}
// Extract the length and alignment and fill if they are constant.
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(MI->getContext()))
return 0;
uint64_t Len = LenC->getZExtValue();
Alignment = MI->getAlignment();
// If the length is zero, this is a no-op
if (Len == 0) return MI; // memset(d,c,0,a) -> noop
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
Value *Dest = MI->getDest();
Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
// Alignment 0 is identity for alignment 1 for memset, but not store.
if (Alignment == 0) Alignment = 1;
// Extract the fill value and store.
uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
Dest, false, Alignment), *MI);
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setLength(Constant::getNullValue(LenC->getType()));
return MI;
}
return 0;
}
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
///
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (isFreeCall(&CI))
return visitFree(CI);
// If the caller function is nounwind, mark the call as nounwind, even if the
// callee isn't.
if (CI.getParent()->getParent()->doesNotThrow() &&
!CI.doesNotThrow()) {
CI.setDoesNotThrow();
return &CI;
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
if (!II) return visitCallSite(&CI);
// Intrinsics cannot occur in an invoke, so handle them here instead of in
// visitCallSite.
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
bool Changed = false;
// memmove/cpy/set of zero bytes is a noop.
if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
if (CI->getZExtValue() == 1) {
// Replace the instruction with just byte operations. We would
// transform other cases to loads/stores, but we don't know if
// alignment is sufficient.
}
}
// If we have a memmove and the source operation is a constant global,
// then the source and dest pointers can't alias, so we can change this
// into a call to memcpy.
if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
if (GVSrc->isConstant()) {
Module *M = CI.getParent()->getParent()->getParent();
Intrinsic::ID MemCpyID = Intrinsic::memcpy;
const Type *Tys[1];
Tys[0] = CI.getOperand(3)->getType();
CI.setOperand(0,
Intrinsic::getDeclaration(M, MemCpyID, Tys, 1));
Changed = true;
}
}
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
// memmove(x,x,size) -> noop.
if (MTI->getSource() == MTI->getDest())
return EraseInstFromFunction(CI);
}
// If we can determine a pointer alignment that is bigger than currently
// set, update the alignment.
if (isa<MemTransferInst>(MI)) {
if (Instruction *I = SimplifyMemTransfer(MI))
return I;
} else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
if (Instruction *I = SimplifyMemSet(MSI))
return I;
}
if (Changed) return II;
}
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::bswap:
// bswap(bswap(x)) -> x
if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
if (Operand->getIntrinsicID() == Intrinsic::bswap)
return ReplaceInstUsesWith(CI, Operand->getOperand(1));
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) {
if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
if (Operand->getIntrinsicID() == Intrinsic::bswap) {
unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
TI->getType()->getPrimitiveSizeInBits();
Value *CV = ConstantInt::get(Operand->getType(), C);
Value *V = Builder->CreateLShr(Operand->getOperand(1), CV);
return new TruncInst(V, TI->getType());
}
}
break;
case Intrinsic::powi:
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
// powi(x, 0) -> 1.0
if (Power->isZero())
return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
// powi(x, 1) -> x
if (Power->isOne())
return ReplaceInstUsesWith(CI, II->getOperand(1));
// powi(x, -1) -> 1/x
2010-01-01 09:54:08 +08:00
if (Power->isAllOnesValue())
return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
II->getOperand(1));
}
break;
case Intrinsic::uadd_with_overflow: {
Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
uint32_t BitWidth = IT->getBitWidth();
APInt Mask = APInt::getSignBit(BitWidth);
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
if (LHSKnownNegative || LHSKnownPositive) {
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
if (LHSKnownNegative && RHSKnownNegative) {
// The sign bit is set in both cases: this MUST overflow.
// Create a simple add instruction, and insert it into the struct.
Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI);
Worklist.Add(Add);
Constant *V[] = {
UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
return InsertValueInst::Create(Struct, Add, 0);
}
if (LHSKnownPositive && RHSKnownPositive) {
// The sign bit is clear in both cases: this CANNOT overflow.
// Create a simple add instruction, and insert it into the struct.
Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI);
Worklist.Add(Add);
Constant *V[] = {
UndefValue::get(LHS->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
return InsertValueInst::Create(Struct, Add, 0);
}
}
}
// FALL THROUGH uadd into sadd
case Intrinsic::sadd_with_overflow:
// Canonicalize constants into the RHS.
if (isa<Constant>(II->getOperand(1)) &&
!isa<Constant>(II->getOperand(2))) {
Value *LHS = II->getOperand(1);
II->setOperand(1, II->getOperand(2));
II->setOperand(2, LHS);
return II;
}
// X + undef -> undef
if (isa<UndefValue>(II->getOperand(2)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
// X + 0 -> {X, false}
if (RHS->isZero()) {
Constant *V[] = {
UndefValue::get(II->getOperand(0)->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
return InsertValueInst::Create(Struct, II->getOperand(1), 0);
}
}
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
// undef - X -> undef
// X - undef -> undef
if (isa<UndefValue>(II->getOperand(1)) ||
isa<UndefValue>(II->getOperand(2)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
// X - 0 -> {X, false}
if (RHS->isZero()) {
Constant *V[] = {
UndefValue::get(II->getOperand(1)->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
return InsertValueInst::Create(Struct, II->getOperand(1), 0);
}
}
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
// Canonicalize constants into the RHS.
if (isa<Constant>(II->getOperand(1)) &&
!isa<Constant>(II->getOperand(2))) {
Value *LHS = II->getOperand(1);
II->setOperand(1, II->getOperand(2));
II->setOperand(2, LHS);
return II;
}
// X * undef -> undef
if (isa<UndefValue>(II->getOperand(2)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
// X*0 -> {0, false}
if (RHSI->isZero())
return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
// X * 1 -> {X, false}
if (RHSI->equalsInt(1)) {
Constant *V[] = {
UndefValue::get(II->getOperand(1)->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
return InsertValueInst::Create(Struct, II->getOperand(1), 0);
}
}
break;
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::x86_sse_loadu_ps:
case Intrinsic::x86_sse2_loadu_pd:
case Intrinsic::x86_sse2_loadu_dq:
// Turn PPC lvx -> load if the pointer is known aligned.
// Turn X86 loadups -> load if the pointer is known aligned.
if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
PointerType::getUnqual(II->getType()));
return new LoadInst(Ptr);
}
break;
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
const Type *OpPtrTy =
PointerType::getUnqual(II->getOperand(1)->getType());
Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
return new StoreInst(II->getOperand(1), Ptr);
}
break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
const Type *OpPtrTy =
PointerType::getUnqual(II->getOperand(2)->getType());
Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
return new StoreInst(II->getOperand(2), Ptr);
}
break;
case Intrinsic::x86_sse_cvttss2si: {
// These intrinsics only demands the 0th element of its input vector. If
// we can simplify the input based on that, do so now.
unsigned VWidth =
cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
APInt DemandedElts(VWidth, 1);
APInt UndefElts(VWidth, 0);
if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
UndefElts)) {
II->setOperand(1, V);
return II;
}
break;
}
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
add a new SimplifyDemandedVectorElts method, which works similarly to SimplifyDemandedBits. The idea is that some operations can be simplified if not all of the computed elements are needed. Some targets (like x86) have a large number of intrinsics that operate on a single element, but pass other elts through unmodified. If those other elements are not needed, the intrinsics can be simplified to scalar operations, and insertelement ops can be removed. This turns (f.e.): ushort %Convert_sse(float %f) { %tmp = insertelement <4 x float> undef, float %f, uint 0 ; <<4 x float>> [#uses=1] %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, uint 1 ; <<4 x float>> [#uses=1] %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, uint 2 ; <<4 x float>> [#uses=1] %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, uint 3 ; <<4 x float>> [#uses=1] %tmp28 = tail call <4 x float> %llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] %tmp37 = tail call <4 x float> %llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] %tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] %tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1] %tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <int> [#uses=1] %tmp69 = cast int %tmp to ushort ; <ushort> [#uses=1] ret ushort %tmp69 } into: ushort %Convert_sse(float %f) { entry: %tmp28 = sub float %f, 1.000000e+00 ; <float> [#uses=1] %tmp37 = mul float %tmp28, 5.000000e-01 ; <float> [#uses=1] %tmp375 = insertelement <4 x float> undef, float %tmp37, uint 0 ; <<4 x float>> [#uses=1] %tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1] %tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1] %tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <int> [#uses=1] %tmp69 = cast int %tmp to ushort ; <ushort> [#uses=1] ret ushort %tmp69 } which improves codegen from: _Convert_sse: movss LCPI1_0, %xmm0 movss 4(%esp), %xmm1 subss %xmm0, %xmm1 movss LCPI1_1, %xmm0 mulss %xmm0, %xmm1 movss LCPI1_2, %xmm0 minss %xmm0, %xmm1 xorps %xmm0, %xmm0 maxss %xmm0, %xmm1 cvttss2si %xmm1, %eax andl $65535, %eax ret to: _Convert_sse: movss 4(%esp), %xmm0 subss LCPI1_0, %xmm0 mulss LCPI1_1, %xmm0 movss LCPI1_2, %xmm1 minss %xmm1, %xmm0 xorps %xmm1, %xmm1 maxss %xmm1, %xmm0 cvttss2si %xmm0, %eax andl $65535, %eax ret This is just a first step, it can be extended in many ways. Testcase here: Transforms/InstCombine/vec_demanded_elts.ll llvm-svn: 30752
2006-10-05 14:55:50 +08:00
// Check that all of the elements are integer constants or undefs.
bool AllEltsOk = true;
for (unsigned i = 0; i != 16; ++i) {
if (!isa<ConstantInt>(Mask->getOperand(i)) &&
!isa<UndefValue>(Mask->getOperand(i))) {
AllEltsOk = false;
break;
}
add a new SimplifyDemandedVectorElts method, which works similarly to SimplifyDemandedBits. The idea is that some operations can be simplified if not all of the computed elements are needed. Some targets (like x86) have a large number of intrinsics that operate on a single element, but pass other elts through unmodified. If those other elements are not needed, the intrinsics can be simplified to scalar operations, and insertelement ops can be removed. This turns (f.e.): ushort %Convert_sse(float %f) { %tmp = insertelement <4 x float> undef, float %f, uint 0 ; <<4 x float>> [#uses=1] %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, uint 1 ; <<4 x float>> [#uses=1] %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, uint 2 ; <<4 x float>> [#uses=1] %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, uint 3 ; <<4 x float>> [#uses=1] %tmp28 = tail call <4 x float> %llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] %tmp37 = tail call <4 x float> %llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] %tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] %tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1] %tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <int> [#uses=1] %tmp69 = cast int %tmp to ushort ; <ushort> [#uses=1] ret ushort %tmp69 } into: ushort %Convert_sse(float %f) { entry: %tmp28 = sub float %f, 1.000000e+00 ; <float> [#uses=1] %tmp37 = mul float %tmp28, 5.000000e-01 ; <float> [#uses=1] %tmp375 = insertelement <4 x float> undef, float %tmp37, uint 0 ; <<4 x float>> [#uses=1] %tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1] %tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1] %tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <int> [#uses=1] %tmp69 = cast int %tmp to ushort ; <ushort> [#uses=1] ret ushort %tmp69 } which improves codegen from: _Convert_sse: movss LCPI1_0, %xmm0 movss 4(%esp), %xmm1 subss %xmm0, %xmm1 movss LCPI1_1, %xmm0 mulss %xmm0, %xmm1 movss LCPI1_2, %xmm0 minss %xmm0, %xmm1 xorps %xmm0, %xmm0 maxss %xmm0, %xmm1 cvttss2si %xmm1, %eax andl $65535, %eax ret to: _Convert_sse: movss 4(%esp), %xmm0 subss LCPI1_0, %xmm0 mulss LCPI1_1, %xmm0 movss LCPI1_2, %xmm1 minss %xmm1, %xmm0 xorps %xmm1, %xmm1 maxss %xmm1, %xmm0 cvttss2si %xmm0, %eax andl $65535, %eax ret This is just a first step, it can be extended in many ways. Testcase here: Transforms/InstCombine/vec_demanded_elts.ll llvm-svn: 30752
2006-10-05 14:55:50 +08:00
}
if (AllEltsOk) {
// Cast the input vectors to byte vectors.
Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
Value *Result = UndefValue::get(Op0->getType());
// Only extract each element once.
Value *ExtractedElts[32];
memset(ExtractedElts, 0, sizeof(ExtractedElts));
for (unsigned i = 0; i != 16; ++i) {
if (isa<UndefValue>(Mask->getOperand(i)))
continue;
unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
if (ExtractedElts[Idx] == 0) {
ExtractedElts[Idx] =
Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
ConstantInt::get(Type::getInt32Ty(II->getContext()),
Idx&15, false), "tmp");
}
// Insert this value into the result vector.
Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
ConstantInt::get(Type::getInt32Ty(II->getContext()),
i, false), "tmp");
}
return CastInst::Create(Instruction::BitCast, Result, CI.getType());
}
}
break;
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
if (SS->getIntrinsicID() == Intrinsic::stacksave) {
BasicBlock::iterator BI = SS;
if (&*++BI == II)
return EraseInstFromFunction(CI);
}
}
// Scan down this block to see if there is another stack restore in the
// same block without an intervening call/alloca.
BasicBlock::iterator BI = II;
TerminatorInst *TI = II->getParent()->getTerminator();
bool CannotRemove = false;
for (++BI; &*BI != TI; ++BI) {
if (isa<AllocaInst>(BI) || isMalloc(BI)) {
CannotRemove = true;
break;
}
if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
// If there is a stackrestore below this one, remove this one.
if (II->getIntrinsicID() == Intrinsic::stackrestore)
return EraseInstFromFunction(CI);
// Otherwise, ignore the intrinsic.
} else {
// If we found a non-intrinsic call, we can't remove the stack
// restore.
CannotRemove = true;
break;
}
}
}
// If the stack restore is in a return/unwind block and if there are no
// allocas or calls between the restore and the return, nuke the restore.
if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
return EraseInstFromFunction(CI);
break;
}
}
return visitCallSite(II);
}
// InvokeInst simplification
//
Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
2003-10-08 06:32:43 +08:00
return visitCallSite(&II);
}
/// isSafeToEliminateVarargsCast - If this cast does not affect the value
/// passed through the varargs area, we can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallSite CS,
const CastInst * const CI,
const TargetData * const TD,
const int ix) {
if (!CI->isLosslessCast())
return false;
// The size of ByVal arguments is derived from the type, so we
// can't change to a type with a different size. If the size were
// passed explicitly we could avoid this check.
if (!CS.paramHasAttr(ix, Attribute::ByVal))
return true;
const Type* SrcTy =
cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
return false;
return true;
}
2003-10-08 06:32:43 +08:00
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
bool Changed = false;
// If the callee is a constexpr cast of a function, attempt to move the cast
// to the arguments of the call/invoke.
2003-10-08 06:32:43 +08:00
if (transformConstExprCastCall(CS)) return 0;
Value *Callee = CS.getCalledValue();
if (Function *CalleeF = dyn_cast<Function>(Callee))
if (CalleeF->getCallingConv() != CS.getCallingConv()) {
Instruction *OldCall = CS.getInstruction();
// If the call and callee calling conventions don't match, this call must
// be unreachable, as the call is undefined.
new StoreInst(ConstantInt::getTrue(Callee->getContext()),
UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
OldCall);
// If OldCall dues not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
2009-10-15 01:29:00 +08:00
if (!OldCall->getType()->isVoidTy())
OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
if (isa<CallInst>(OldCall)) // Not worth removing an invoke here.
return EraseInstFromFunction(*OldCall);
return 0;
}
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
// This instruction is not reachable, just remove it. We insert a store to
// undef so that we know that this code is not reachable, despite the fact
// that we can't modify the CFG here.
new StoreInst(ConstantInt::getTrue(Callee->getContext()),
UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
CS.getInstruction());
// If CS dues not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
2009-10-15 01:29:00 +08:00
if (!CS.getInstruction()->getType()->isVoidTy())
CS.getInstruction()->
replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
// Don't break the CFG, insert a dummy cond branch.
BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
ConstantInt::getTrue(Callee->getContext()), II);
}
return EraseInstFromFunction(*CS.getInstruction());
}
if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
if (In->getIntrinsicID() == Intrinsic::init_trampoline)
return transformCallThroughTrampoline(CS);
const PointerType *PTy = cast<PointerType>(Callee->getType());
const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
if (FTy->isVarArg()) {
int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
// See if we can optimize any arguments passed through the varargs area of
// the call.
for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
E = CS.arg_end(); I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
*I = CI->getOperand(0);
Changed = true;
}
}
}
if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
// Inline asm calls cannot throw - mark them 'nounwind'.
CS.setDoesNotThrow();
Changed = true;
}
return Changed ? CS.getInstruction() : 0;
2003-10-08 06:32:43 +08:00
}
// transformConstExprCastCall - If the callee is a constexpr cast of a function,
// attempt to move the cast to the arguments of the call/invoke.
//
bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
if (CE->getOpcode() != Instruction::BitCast ||
!isa<Function>(CE->getOperand(0)))
return false;
Function *Callee = cast<Function>(CE->getOperand(0));
Instruction *Caller = CS.getInstruction();
const AttrListPtr &CallerPAL = CS.getAttributes();
// Okay, this is a cast from a function to a different type. Unless doing so
// would cause a type conversion of one of our arguments, change this call to
// be a direct call with arguments casted to the appropriate types.
//
const FunctionType *FT = Callee->getFunctionType();
const Type *OldRetTy = Caller->getType();
const Type *NewRetTy = FT->getReturnType();
if (isa<StructType>(NewRetTy))
return false; // TODO: Handle multiple return values.
// Check to see if we are changing the return type...
if (OldRetTy != NewRetTy) {
if (Callee->isDeclaration() &&
// Conversion is ok if changing from one pointer type to another or from
// a pointer to an integer of the same size.
!((isa<PointerType>(OldRetTy) || !TD ||
OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
(isa<PointerType>(NewRetTy) || !TD ||
NewRetTy == TD->getIntPtrType(Caller->getContext()))))
return false; // Cannot transform this return value.
if (!Caller->use_empty() &&
// void -> non-void is handled specially
2009-10-15 01:29:00 +08:00
!NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
return false; // Cannot transform this return value.
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
Attributes RAttrs = CallerPAL.getRetAttributes();
if (RAttrs & Attribute::typeIncompatible(NewRetTy))
return false; // Attribute not compatible with transformed value.
}
// If the callsite is an invoke instruction, and the return value is used by
// a PHI node in a successor, we cannot change the return type of the call
// because there is no place to put the cast instruction (without breaking
// the critical edge). Bail out in this case.
if (!Caller->use_empty())
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
UI != E; ++UI)
if (PHINode *PN = dyn_cast<PHINode>(*UI))
if (PN->getParent() == II->getNormalDest() ||
PN->getParent() == II->getUnwindDest())
return false;
}
unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
CallSite::arg_iterator AI = CS.arg_begin();
for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
const Type *ParamTy = FT->getParamType(i);
const Type *ActTy = (*AI)->getType();
if (!CastInst::isCastable(ActTy, ParamTy))
return false; // Cannot transform this parameter value.
if (CallerPAL.getParamAttributes(i + 1)
& Attribute::typeIncompatible(ParamTy))
return false; // Attribute not compatible with transformed value.
// Converting from one pointer type to another or between a pointer and an
// integer of the same size is safe even if we do not have a body.
bool isConvertible = ActTy == ParamTy ||
(TD && ((isa<PointerType>(ParamTy) ||
ParamTy == TD->getIntPtrType(Caller->getContext())) &&
(isa<PointerType>(ActTy) ||
ActTy == TD->getIntPtrType(Caller->getContext()))));
if (Callee->isDeclaration() && !isConvertible) return false;
}
if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
Callee->isDeclaration())
return false; // Do not delete arguments unless we have a function body.
if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
!CallerPAL.isEmpty())
// In this case we have more arguments than the new function type, but we
// won't be dropping them. Check that these extra arguments have attributes
// that are compatible with being a vararg call argument.
for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
break;
Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
if (PAttrs & Attribute::VarArgsIncompatible)
return false;
}
// Okay, we decided that this is a safe thing to do: go ahead and start
// inserting cast instructions as necessary...
std::vector<Value*> Args;
Args.reserve(NumActualArgs);
SmallVector<AttributeWithIndex, 8> attrVec;
attrVec.reserve(NumCommonArgs);
// Get any return attributes.
Attributes RAttrs = CallerPAL.getRetAttributes();
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
// Add the new return attributes.
if (RAttrs)
attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
AI = CS.arg_begin();
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
const Type *ParamTy = FT->getParamType(i);
if ((*AI)->getType() == ParamTy) {
Args.push_back(*AI);
} else {
Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
false, ParamTy, false);
Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
}
// Add any parameter attributes.
if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
}
// If the function takes more arguments than the call was taking, add them
// now.
for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
Args.push_back(Constant::getNullValue(FT->getParamType(i)));
// If we are removing arguments to the function, emit an obnoxious warning.
if (FT->getNumParams() < NumActualArgs) {
if (!FT->isVarArg()) {
errs() << "WARNING: While resolving call to function '"
<< Callee->getName() << "' arguments were dropped!\n";
} else {
// Add all of the arguments in their promoted form to the arg list.
for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
const Type *PTy = getPromotedType((*AI)->getType());
if (PTy != (*AI)->getType()) {
// Must promote to pass through va_arg area!
Instruction::CastOps opcode =
CastInst::getCastOpcode(*AI, false, PTy, false);
Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
} else {
Args.push_back(*AI);
}
// Add any parameter attributes.
if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
}
}
}
if (Attributes FnAttrs = CallerPAL.getFnAttributes())
attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
2009-10-15 01:29:00 +08:00
if (NewRetTy->isVoidTy())
Caller->setName(""); // Void type should not have a name.
2009-07-25 10:45:27 +08:00
const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
attrVec.end());
Instruction *NC;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
Args.begin(), Args.end(),
Caller->getName(), Caller);
2007-07-31 03:53:57 +08:00
cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
} else {
NC = CallInst::Create(Callee, Args.begin(), Args.end(),
Caller->getName(), Caller);
CallInst *CI = cast<CallInst>(Caller);
if (CI->isTailCall())
2005-05-06 14:48:21 +08:00
cast<CallInst>(NC)->setTailCall();
cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
cast<CallInst>(NC)->setAttributes(NewCallerPAL);
}
// Insert a cast of the return type as necessary.
Value *NV = NC;
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
2009-10-15 01:29:00 +08:00
if (!NV->getType()->isVoidTy()) {
Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,
OldRetTy, false);
NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
// If this is an invoke instruction, we should insert it after the first
// non-phi, instruction in the normal successor block.
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
InsertNewInstBefore(NC, *I);
} else {
// Otherwise, it's a call, just insert cast right after the call instr
InsertNewInstBefore(NC, *Caller);
}
Worklist.AddUsersToWorkList(*Caller);
} else {
NV = UndefValue::get(Caller->getType());
}
}
if (!Caller->use_empty())
Caller->replaceAllUsesWith(NV);
EraseInstFromFunction(*Caller);
return true;
}
// transformCallThroughTrampoline - Turn a call to a function created by the
// init_trampoline intrinsic into a direct call to the underlying function.
//
Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
Value *Callee = CS.getCalledValue();
const PointerType *PTy = cast<PointerType>(Callee->getType());
const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
const AttrListPtr &Attrs = CS.getAttributes();
// If the call already has the 'nest' attribute somewhere then give up -
// otherwise 'nest' would occur twice after splicing in the chain.
if (Attrs.hasAttrSomewhere(Attribute::Nest))
return 0;
IntrinsicInst *Tramp =
cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
const AttrListPtr &NestAttrs = NestF->getAttributes();
if (!NestAttrs.isEmpty()) {
unsigned NestIdx = 1;
const Type *NestTy = 0;
Attributes NestAttr = Attribute::None;
// Look for a parameter marked with the 'nest' attribute.
for (FunctionType::param_iterator I = NestFTy->param_begin(),
E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
// Record the parameter type and any other attributes.
NestTy = *I;
NestAttr = NestAttrs.getParamAttributes(NestIdx);
break;
}
if (NestTy) {
Instruction *Caller = CS.getInstruction();
std::vector<Value*> NewArgs;
NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
SmallVector<AttributeWithIndex, 8> NewAttrs;
NewAttrs.reserve(Attrs.getNumSlots() + 1);
// Insert the nest argument into the call argument list, which may
// mean appending it. Likewise for attributes.
// Add any result attributes.
if (Attributes Attr = Attrs.getRetAttributes())
NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
{
unsigned Idx = 1;
CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
do {
if (Idx == NestIdx) {
// Add the chain argument and attributes.
Value *NestVal = Tramp->getOperand(3);
if (NestVal->getType() != NestTy)
NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
NewArgs.push_back(NestVal);
NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
}
if (I == E)
break;
// Add the original argument and attributes.
NewArgs.push_back(*I);
if (Attributes Attr = Attrs.getParamAttributes(Idx))
NewAttrs.push_back
(AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
++Idx, ++I;
} while (1);
}
// Add any function attributes.
if (Attributes Attr = Attrs.getFnAttributes())
NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
// The trampoline may have been bitcast to a bogus type (FTy).
// Handle this by synthesizing a new function type, equal to FTy
// with the chain parameter inserted.
std::vector<const Type*> NewTypes;
NewTypes.reserve(FTy->getNumParams()+1);
// Insert the chain's type into the list of parameter types, which may
// mean appending it.
{
unsigned Idx = 1;
FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end();
do {
if (Idx == NestIdx)
// Add the chain's type.
NewTypes.push_back(NestTy);
if (I == E)
break;
// Add the original type.
NewTypes.push_back(*I);
++Idx, ++I;
} while (1);
}
// Replace the trampoline call with a direct call. Let the generic
// code sort out any function type mismatches.
FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
FTy->isVarArg());
Constant *NewCallee =
NestF->getType() == PointerType::getUnqual(NewFTy) ?
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
2009-07-25 10:45:27 +08:00
const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
NewAttrs.end());
Instruction *NewCaller;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
NewCaller = InvokeInst::Create(NewCallee,
II->getNormalDest(), II->getUnwindDest(),
NewArgs.begin(), NewArgs.end(),
Caller->getName(), Caller);
cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
} else {
NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
Caller->getName(), Caller);
if (cast<CallInst>(Caller)->isTailCall())
cast<CallInst>(NewCaller)->setTailCall();
cast<CallInst>(NewCaller)->
setCallingConv(cast<CallInst>(Caller)->getCallingConv());
cast<CallInst>(NewCaller)->setAttributes(NewPAL);
}
2009-10-15 01:29:00 +08:00
if (!Caller->getType()->isVoidTy())
Caller->replaceAllUsesWith(NewCaller);
Caller->eraseFromParent();
Worklist.Remove(Caller);
return 0;
}
}
// Replace the trampoline call with a direct call. Since there is no 'nest'
// parameter, there is no need to adjust the argument list. Let the generic
// code sort out any function type mismatches.
Constant *NewCallee =
NestF->getType() == PTy ? NestF :
ConstantExpr::getBitCast(NestF, PTy);
CS.setCalledFunction(NewCallee);
return CS.getInstruction();
}
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
return ReplaceInstUsesWith(GEP, V);
Value *PtrOp = GEP.getOperand(0);
if (isa<UndefValue>(GEP.getOperand(0)))
return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
// Eliminate unneeded casts for indices.
if (TD) {
bool MadeChange = false;
unsigned PtrSize = TD->getPointerSizeInBits();
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
I != E; ++I, ++GTI) {
if (!isa<SequentialType>(*GTI)) continue;
// If we are using a wider index than needed for this platform, shrink it
// to what we need. If narrower, sign-extend it to what we need. This
// explicit cast can make subsequent optimizations more obvious.
unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
if (OpBits == PtrSize)
continue;
*I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
MadeChange = true;
}
if (MadeChange) return &GEP;
}
// Combine Indices - If the source pointer to this getelementptr instruction
// is a getelementptr instruction, combine the indices of the two
// getelementptr instructions into a single instruction.
//
if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
// Note that if our source is a gep chain itself that we wait for that
// chain to be resolved before we perform this transformation. This
// avoids us creating a TON of code in some cases.
//
if (GetElementPtrInst *SrcGEP =
dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
if (SrcGEP->getNumOperands() == 2)
return 0; // Wait until our source is folded to completion.
SmallVector<Value*, 8> Indices;
// Find out whether the last index in the source GEP is a sequential idx.
bool EndsWithSequential = false;
for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
I != E; ++I)
EndsWithSequential = !isa<StructType>(*I);
// Can we combine the two pointer arithmetics offsets?
if (EndsWithSequential) {
// Replace: gep (gep %P, long B), long A, ...
// With: T = long A+B; gep %P, T, ...
//
Value *Sum;
Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
Value *GO1 = GEP.getOperand(1);
if (SO1 == Constant::getNullValue(SO1->getType())) {
Sum = GO1;
} else if (GO1 == Constant::getNullValue(GO1->getType())) {
Sum = SO1;
} else {
// If they aren't the same type, then the input hasn't been processed
// by the loop above yet (which canonicalizes sequential index types to
// intptr_t). Just avoid transforming this until the input has been
// normalized.
if (SO1->getType() != GO1->getType())
return 0;
Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
}
// Update the GEP in place if possible.
if (Src->getNumOperands() == 2) {
GEP.setOperand(0, Src->getOperand(0));
GEP.setOperand(1, Sum);
return &GEP;
}
Indices.append(Src->op_begin()+1, Src->op_end()-1);
Indices.push_back(Sum);
Indices.append(GEP.op_begin()+2, GEP.op_end());
} else if (isa<Constant>(*GEP.idx_begin()) &&
cast<Constant>(*GEP.idx_begin())->isNullValue() &&
Src->getNumOperands() != 1) {
// Otherwise we can do the fold if the first index of the GEP is a zero
Indices.append(Src->op_begin()+1, Src->op_end());
Indices.append(GEP.idx_begin()+1, GEP.idx_end());
}
2001-12-15 00:52:21 +08:00
if (!Indices.empty())
return (cast<GEPOperator>(&GEP)->isInBounds() &&
Src->isInBounds()) ?
GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
Indices.end(), GEP.getName()) :
GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
Indices.end(), GEP.getName());
}
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
if (Value *X = getBitCastOperand(PtrOp)) {
assert(isa<PointerType>(X->getType()) && "Must be cast from pointer");
2009-08-31 04:36:46 +08:00
// If the input bitcast is actually "bitcast(bitcast(x))", then we don't
// want to change the gep until the bitcasts are eliminated.
if (getBitCastOperand(X)) {
Worklist.AddValue(PtrOp);
return 0;
}
bool HasZeroPointerIndex = false;
if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
HasZeroPointerIndex = C->isZero();
2009-08-31 04:36:46 +08:00
// Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
// into : GEP [10 x i8]* X, i32 0, ...
//
// Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
// into : GEP i8* X, ...
//
// This occurs when the program declares an array extern like "int X[];"
if (HasZeroPointerIndex) {
const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
const PointerType *XTy = cast<PointerType>(X->getType());
if (const ArrayType *CATy =
dyn_cast<ArrayType>(CPTy->getElementType())) {
// GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
if (CATy->getElementType() == XTy->getElementType()) {
// -> GEP i8* X, ...
SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end());
return cast<GEPOperator>(&GEP)->isInBounds() ?
GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(),
GEP.getName()) :
GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
GEP.getName());
2009-08-31 04:36:46 +08:00
}
if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){
// GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
if (CATy->getElementType() == XATy->getElementType()) {
// -> GEP [10 x i8]* X, i32 0, ...
// At this point, we know that the cast source type is a pointer
// to an array of the same type as the destination pointer
// array. Because the array type is never stepped over (there
// is a leading zero) we can fold the cast into this GEP.
GEP.setOperand(0, X);
return &GEP;
}
}
}
} else if (GEP.getNumOperands() == 2) {
// Transform things like:
// %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
if (TD && isa<ArrayType>(SrcElTy) &&
TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
TD->getTypeAllocSize(ResElTy)) {
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
Idx[1] = GEP.getOperand(1);
Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
// V and GEP are both pointer types --> BitCast
return new BitCastInst(NewGEP, GEP.getType());
}
// Transform things like:
// getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
if (TD && isa<ArrayType>(SrcElTy) &&
ResElTy == Type::getInt8Ty(GEP.getContext())) {
uint64_t ArrayEltSize =
TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
// Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We
// allow either a mul, shift, or constant here.
Value *NewIdx = 0;
ConstantInt *Scale = 0;
if (ArrayEltSize == 1) {
NewIdx = GEP.getOperand(1);
Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
NewIdx = ConstantInt::get(CI->getType(), 1);
Scale = CI;
} else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
if (Inst->getOpcode() == Instruction::Shl &&
isa<ConstantInt>(Inst->getOperand(1))) {
ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
1ULL << ShAmtVal);
NewIdx = Inst->getOperand(0);
} else if (Inst->getOpcode() == Instruction::Mul &&
isa<ConstantInt>(Inst->getOperand(1))) {
Scale = cast<ConstantInt>(Inst->getOperand(1));
NewIdx = Inst->getOperand(0);
}
}
// If the index will be to exactly the right offset with the scale taken
// out, perform the transformation. Note, we don't know whether Scale is
// signed or not. We'll use unsigned version of division/modulo
// operation after making sure Scale doesn't have the sign bit set.
2009-02-26 02:20:01 +08:00
if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
Scale->getZExtValue() % ArrayEltSize == 0) {
Scale = ConstantInt::get(Scale->getType(),
Scale->getZExtValue() / ArrayEltSize);
if (Scale->getZExtValue() != 1) {
Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
false /*ZExt*/);
NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
}
// Insert the new GEP instruction.
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
Idx[1] = NewIdx;
Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return new BitCastInst(NewGEP, GEP.getType());
}
}
}
2001-12-15 00:52:21 +08:00
}
/// See if we can simplify:
/// X = bitcast A* to B*
/// Y = gep X, <...constant indices...>
/// into a gep of the original struct. This is important for SROA and alias
/// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
if (TD &&
!isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
// Determine how much the GEP moves the pointer. We are guaranteed to get
// a constant back from EmitGEPOffset.
ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
int64_t Offset = OffsetV->getSExtValue();
// If this GEP instruction doesn't move the pointer, just replace the GEP
// with a bitcast of the real input to the dest type.
if (Offset == 0) {
// If the bitcast is of an allocation, and the allocation will be
// converted to match the type of the cast, don't touch this.
if (isa<AllocaInst>(BCI->getOperand(0)) ||
isMalloc(BCI->getOperand(0))) {
// See if the bitcast simplifies, if so, don't nuke this GEP yet.
if (Instruction *I = visitBitCast(*BCI)) {
if (I != BCI) {
I->takeName(BCI);
BCI->getParent()->getInstList().insert(BCI, I);
ReplaceInstUsesWith(*BCI, I);
}
return &GEP;
}
}
return new BitCastInst(BCI->getOperand(0), GEP.getType());
}
// Otherwise, if the offset is non-zero, we need to find out if there is a
// field at Offset in 'A's type. If so, we can pull the cast through the
// GEP.
SmallVector<Value*, 8> NewIndices;
const Type *InTy =
cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
if (FindElementAtOffset(InTy, Offset, NewIndices)) {
Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
NewIndices.end()) :
Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
NewIndices.end());
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
NGEP->takeName(&GEP);
return new BitCastInst(NGEP, GEP.getType());
}
}
}
2001-12-15 00:52:21 +08:00
return 0;
}
Instruction *InstCombiner::visitFree(Instruction &FI) {
Value *Op = FI.getOperand(1);
// free undef -> unreachable.
if (isa<UndefValue>(Op)) {
// Insert a new store to null because we cannot modify the CFG here.
new StoreInst(ConstantInt::getTrue(FI.getContext()),
UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI);
return EraseInstFromFunction(FI);
}
// If we have 'free null' delete the instruction. This can happen in stl code
// when lots of inlining happens.
if (isa<ConstantPointerNull>(Op))
return EraseInstFromFunction(FI);
// If we have a malloc call whose only use is a free call, delete both.
if (isMalloc(Op)) {
if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
if (Op->hasOneUse() && CI->hasOneUse()) {
EraseInstFromFunction(FI);
EraseInstFromFunction(*CI);
return EraseInstFromFunction(*cast<Instruction>(Op));
}
} else {
// Op is a call to malloc
if (Op->hasOneUse()) {
EraseInstFromFunction(FI);
return EraseInstFromFunction(*cast<Instruction>(Op));
}
}
}
return 0;
}
Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
// Change br (not X), label True, label False to: br X, label False, True
Value *X = 0;
BasicBlock *TrueDest;
BasicBlock *FalseDest;
if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
!isa<Constant>(X)) {
// Swap Destinations and condition...
BI.setCondition(X);
BI.setSuccessor(0, FalseDest);
BI.setSuccessor(1, TrueDest);
return &BI;
}
// Cannonicalize fcmp_one -> fcmp_oeq
FCmpInst::Predicate FPred; Value *Y;
if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
TrueDest, FalseDest)) &&
BI.getCondition()->hasOneUse())
if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
FPred == FCmpInst::FCMP_OGE) {
FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
// Swap Destinations and condition.
BI.setSuccessor(0, FalseDest);
BI.setSuccessor(1, TrueDest);
Worklist.Add(Cond);
return &BI;
}
// Cannonicalize icmp_ne -> icmp_eq
ICmpInst::Predicate IPred;
if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
TrueDest, FalseDest)) &&
BI.getCondition()->hasOneUse())
if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE ||
IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
IPred == ICmpInst::ICMP_SGE) {
ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
// Swap Destinations and condition.
BI.setSuccessor(0, FalseDest);
BI.setSuccessor(1, TrueDest);
Worklist.Add(Cond);
return &BI;
}
return 0;
}
Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
Value *Cond = SI.getCondition();
if (Instruction *I = dyn_cast<Instruction>(Cond)) {
if (I->getOpcode() == Instruction::Add)
if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
// change 'switch (X+4) case 1:' into 'switch (X) case -3'
for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
SI.setOperand(i,
ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
AddRHS));
SI.setOperand(0, I->getOperand(0));
Worklist.Add(I);
return &SI;
}
}
return 0;
}
Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
Value *Agg = EV.getAggregateOperand();
if (!EV.hasIndices())
return ReplaceInstUsesWith(EV, Agg);
if (Constant *C = dyn_cast<Constant>(Agg)) {
if (isa<UndefValue>(C))
return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
if (isa<ConstantAggregateZero>(C))
return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
// Extract the element indexed by the first index out of the constant
Value *V = C->getOperand(*EV.idx_begin());
if (EV.getNumIndices() > 1)
// Extract the remaining indices out of the constant indexed by the
// first index
return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
else
return ReplaceInstUsesWith(EV, V);
}
return 0; // Can't handle other constants
}
if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
// We're extracting from an insertvalue instruction, compare the indices
const unsigned *exti, *exte, *insi, *inse;
for (exti = EV.idx_begin(), insi = IV->idx_begin(),
exte = EV.idx_end(), inse = IV->idx_end();
exti != exte && insi != inse;
++exti, ++insi) {
if (*insi != *exti)
// The insert and extract both reference distinctly different elements.
// This means the extract is not influenced by the insert, and we can
// replace the aggregate operand of the extract with the aggregate
// operand of the insert. i.e., replace
// %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
// %E = extractvalue { i32, { i32 } } %I, 0
// with
// %E = extractvalue { i32, { i32 } } %A, 0
return ExtractValueInst::Create(IV->getAggregateOperand(),
EV.idx_begin(), EV.idx_end());
}
if (exti == exte && insi == inse)
// Both iterators are at the end: Index lists are identical. Replace
// %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
// %C = extractvalue { i32, { i32 } } %B, 1, 0
// with "i32 42"
return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
if (exti == exte) {
// The extract list is a prefix of the insert list. i.e. replace
// %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
// %E = extractvalue { i32, { i32 } } %I, 1
// with
// %X = extractvalue { i32, { i32 } } %A, 1
// %E = insertvalue { i32 } %X, i32 42, 0
// by switching the order of the insert and extract (though the
// insertvalue should be left in, since it may have other uses).
Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
EV.idx_begin(), EV.idx_end());
return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
insi, inse);
}
if (insi == inse)
// The insert list is a prefix of the extract list
// We can simply remove the common indices from the extract and make it
// operate on the inserted value instead of the insertvalue result.
// i.e., replace
// %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
// %E = extractvalue { i32, { i32 } } %I, 1, 0
// with
// %E extractvalue { i32 } { i32 42 }, 0
return ExtractValueInst::Create(IV->getInsertedValueOperand(),
exti, exte);
}
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
// We're extracting from an intrinsic, see if we're the only user, which
// allows us to simplify multiple result intrinsics to simpler things that
// just get one value..
if (II->hasOneUse()) {
// Check if we're grabbing the overflow bit or the result of a 'with
// overflow' intrinsic. If it's the latter we can remove the intrinsic
// and replace it with a traditional binary instruction.
switch (II->getIntrinsicID()) {
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
if (*EV.idx_begin() == 0) { // Normal result.
Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
II->replaceAllUsesWith(UndefValue::get(II->getType()));
EraseInstFromFunction(*II);
return BinaryOperator::CreateAdd(LHS, RHS);
}
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
if (*EV.idx_begin() == 0) { // Normal result.
Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
II->replaceAllUsesWith(UndefValue::get(II->getType()));
EraseInstFromFunction(*II);
return BinaryOperator::CreateSub(LHS, RHS);
}
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
if (*EV.idx_begin() == 0) { // Normal result.
Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
II->replaceAllUsesWith(UndefValue::get(II->getType()));
EraseInstFromFunction(*II);
return BinaryOperator::CreateMul(LHS, RHS);
}
break;
default:
break;
}
}
}
// Can't simplify extracts from other values. Note that nested extracts are
// already simplified implicitely by the above (extract ( extract (insert) )
// will be translated into extract ( insert ( extract ) ) first and then just
// the value inserted, if appropriate).
return 0;
}
/// TryToSinkInstruction - Try to move the specified instruction from its
/// current block into the beginning of DestBlock, which can only happen if it's
/// safe to move the instruction past all of the instructions between it and the
/// end of its block.
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
assert(I->hasOneUse() && "Invariants didn't hold!");
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
return false;
// Do not sink alloca instructions out of the entry block.
if (isa<AllocaInst>(I) && I->getParent() ==
&DestBlock->getParent()->getEntryBlock())
return false;
// We can only sink load instructions if there is nothing between the load and
// the end of block that could change the value.
if (I->mayReadFromMemory()) {
for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
Scan != E; ++Scan)
if (Scan->mayWriteToMemory())
return false;
}
BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
I->moveBefore(InsertPos);
++NumSunkInst;
return true;
}
/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
/// all reachable code to the worklist.
///
/// This has a couple of tricks to make the code faster and more powerful. In
/// particular, we constant fold and DCE instructions as we go, to avoid adding
/// them to the worklist (this significantly speeds up instcombine on code where
/// many instructions are dead or constant). Additionally, if we find a branch
/// whose condition is a known constant, we only visit the reachable successors.
///
static bool AddReachableCodeToWorklist(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 64> &Visited,
InstCombiner &IC,
const TargetData *TD) {
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(BB);
std::vector<Instruction*> InstrsForInstCombineWorklist;
InstrsForInstCombineWorklist.reserve(128);
SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
while (!Worklist.empty()) {
BB = Worklist.back();
Worklist.pop_back();
// We have now visited this block! If we've already been here, ignore it.
if (!Visited.insert(BB)) continue;
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
Instruction *Inst = BBI++;
// DCE instruction if trivially dead.
if (isInstructionTriviallyDead(Inst)) {
++NumDeadInst;
DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
Inst->eraseFromParent();
continue;
}
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
++NumConstProp;
Inst->eraseFromParent();
continue;
}
if (TD) {
// See if we can constant fold its operands.
for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
i != e; ++i) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
if (CE == 0) continue;
// If we already folded this constant, don't try again.
if (!FoldedConstants.insert(CE))
continue;
Constant *NewC = ConstantFoldConstantExpression(CE, TD);
if (NewC && NewC != CE) {
*i = NewC;
MadeIRChange = true;
}
}
}
InstrsForInstCombineWorklist.push_back(Inst);
}
// Recursively visit successors. If this is a branch or switch on a
// constant, only visit the reachable successor.
TerminatorInst *TI = BB->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
Worklist.push_back(ReachableBB);
continue;
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
// See if this is an explicit destination.
for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
if (SI->getCaseValue(i) == Cond) {
BasicBlock *ReachableBB = SI->getSuccessor(i);
Worklist.push_back(ReachableBB);
continue;
}
// Otherwise it is the default destination.
Worklist.push_back(SI->getSuccessor(0));
continue;
}
}
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
Worklist.push_back(TI->getSuccessor(i));
}
// Once we've found all of the instructions to add to instcombine's worklist,
// add them in reverse order. This way instcombine will visit from the top
// of the function down. This jives well with the way that it adds all uses
// of instructions to the worklist after doing a transformation, thus avoiding
// some N^2 behavior in pathological cases.
IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
InstrsForInstCombineWorklist.size());
return MadeIRChange;
}
bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
MadeIRChange = false;
DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getNameStr() << "\n");
2001-12-15 00:52:21 +08:00
{
// Do a depth-first traversal of the function, populate the worklist with
// the reachable instructions. Ignore blocks that are not reachable. Keep
// track of which blocks we visit.
SmallPtrSet<BasicBlock*, 64> Visited;
MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
// the instcombine code from having to deal with some bad special cases.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (!Visited.count(BB)) {
Instruction *Term = BB->getTerminator();
while (Term != BB->begin()) { // Remove instrs bottom-up
BasicBlock::iterator I = Term; --I;
DEBUG(errs() << "IC: DCE: " << *I << '\n');
// A debug intrinsic shouldn't force another iteration if we weren't
// going to do one without it.
if (!isa<DbgInfoIntrinsic>(I)) {
++NumDeadInst;
MadeIRChange = true;
}
// If I is not void type then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
2009-10-15 01:29:00 +08:00
if (!I->getType()->isVoidTy())
I->replaceAllUsesWith(UndefValue::get(I->getType()));
I->eraseFromParent();
}
}
}
2001-12-15 00:52:21 +08:00
while (!Worklist.isEmpty()) {
Instruction *I = Worklist.RemoveOne();
if (I == 0) continue; // skip null values.
2001-12-15 00:52:21 +08:00
// Check to see if we can DCE the instruction.
if (isInstructionTriviallyDead(I)) {
DEBUG(errs() << "IC: DCE: " << *I << '\n');
EraseInstFromFunction(*I);
++NumDeadInst;
MadeIRChange = true;
continue;
}
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
if (Constant *C = ConstantFoldInstruction(I, TD)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
ReplaceInstUsesWith(*I, C);
++NumConstProp;
EraseInstFromFunction(*I);
MadeIRChange = true;
continue;
}
// See if we can trivially sink this instruction to a successor basic block.
if (I->hasOneUse()) {
BasicBlock *BB = I->getParent();
Instruction *UserInst = cast<Instruction>(I->use_back());
BasicBlock *UserParent;
// Get the block the use occurs in.
if (PHINode *PN = dyn_cast<PHINode>(UserInst))
UserParent = PN->getIncomingBlock(I->use_begin().getUse());
else
UserParent = UserInst->getParent();
if (UserParent != BB) {
bool UserIsSuccessor = false;
// See if the user is one of our successors.
for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
if (*SI == UserParent) {
UserIsSuccessor = true;
break;
}
// If the user is one of our immediate successors, and if that successor
// only has us as a predecessors (we'd have to split the critical edge
// otherwise), we can keep going.
if (UserIsSuccessor && UserParent->getSinglePredecessor())
// Okay, the CFG is simple enough, try to sink this instruction.
MadeIRChange |= TryToSinkInstruction(I, UserParent);
}
}
// Now that we have an instruction, try combining it to simplify it.
Builder->SetInsertPoint(I->getParent(), I);
#ifndef NDEBUG
std::string OrigI;
#endif
DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
if (Instruction *Result = visit(*I)) {
++NumCombined;
// Should we replace the old instruction with a new one?
if (Result != I) {
DEBUG(errs() << "IC: Old = " << *I << '\n'
<< " New = " << *Result << '\n');
// Everything uses the new instruction now.
I->replaceAllUsesWith(Result);
// Push the new instruction and any users onto the worklist.
Worklist.Add(Result);
Worklist.AddUsersToWorkList(*Result);
// Move the name to the new instruction first.
Result->takeName(I);
// Insert the new instruction into the basic block...
BasicBlock *InstParent = I->getParent();
BasicBlock::iterator InsertPos = I;
if (!isa<PHINode>(Result)) // If combining a PHI, don't insert
while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
++InsertPos;
InstParent->getInstList().insert(InsertPos, Result);
EraseInstFromFunction(*I);
} else {
#ifndef NDEBUG
DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
<< " New = " << *I << '\n');
#endif
// If the instruction was modified, it's possible that it is now dead.
// if so, remove it.
if (isInstructionTriviallyDead(I)) {
EraseInstFromFunction(*I);
} else {
Worklist.Add(I);
Worklist.AddUsersToWorkList(*I);
}
}
MadeIRChange = true;
2001-12-15 00:52:21 +08:00
}
}
Worklist.Zap();
return MadeIRChange;
}
bool InstCombiner::runOnFunction(Function &F) {
MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
TD = getAnalysisIfAvailable<TargetData>();
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
IRBuilder<true, TargetFolder, InstCombineIRInserter>
TheBuilder(F.getContext(), TargetFolder(TD),
InstCombineIRInserter(Worklist));
Builder = &TheBuilder;
bool EverMadeChange = false;
// Iterate while there is work to do.
unsigned Iteration = 0;
while (DoOneIteration(F, Iteration++))
EverMadeChange = true;
Builder = 0;
return EverMadeChange;
}
FunctionPass *llvm::createInstructionCombiningPass() {
return new InstCombiner();
}