diff --git a/llvm/include/llvm/Analysis/IVUsers.h b/llvm/include/llvm/Analysis/IVUsers.h new file mode 100644 index 000000000000..36ff07b678e6 --- /dev/null +++ b/llvm/include/llvm/Analysis/IVUsers.h @@ -0,0 +1,235 @@ +//===- llvm/Analysis/IVUsers.h - Induction Variable Users -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements bookkeeping for "interesting" users of expressions +// computed from induction variables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_IVUSERS_H +#define LLVM_ANALYSIS_IVUSERS_H + +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include +#include + +namespace llvm { + +class DominatorTree; +class Instruction; +class Value; +class IVUsersOfOneStride; + +/// IVStrideUse - Keep track of one use of a strided induction variable, where +/// the stride is stored externally. The Offset member keeps track of the +/// offset from the IV, User is the actual user of the operand, and +/// 'OperandValToReplace' is the operand of the User that is the use. +class IVStrideUse : public CallbackVH, public ilist_node { +public: + IVStrideUse(IVUsersOfOneStride *parent, + const SCEVHandle &offset, + Instruction* U, Value *O, bool issigned) + : CallbackVH(U), Parent(parent), Offset(offset), + OperandValToReplace(O), IsSigned(issigned), + IsUseOfPostIncrementedValue(false) { + } + + /// getUser - Return the user instruction for this use. + Instruction *getUser() const { + return cast(getValPtr()); + } + + /// setUser - Assign a new user instruction for this use. + void setUser(Instruction *NewUser) { + setValPtr(NewUser); + } + + /// getParent - Return a pointer to the IVUsersOfOneStride that owns + /// this IVStrideUse. + IVUsersOfOneStride *getParent() const { return Parent; } + + /// getOffset - Return the offset to add to a theoeretical induction + /// variable that starts at zero and counts up by the stride to compute + /// the value for the use. This always has the same type as the stride, + /// which may need to be casted to match the type of the use. + SCEVHandle getOffset() const { return Offset; } + + /// setOffset - Assign a new offset to this use. + void setOffset(SCEVHandle Val) { + Offset = Val; + } + + /// getOperandValToReplace - Return the Value of the operand in the user + /// instruction that this IVStrideUse is representing. + Value *getOperandValToReplace() const { + return OperandValToReplace; + } + + /// setOperandValToReplace - Assign a new Value as the operand value + /// to replace. + void setOperandValToReplace(Value *Op) { + OperandValToReplace = Op; + } + + /// isSigned - The stride (and thus also the Offset) of this use may be in + /// a narrower type than the use itself (OperandValToReplace->getType()). + /// When this is the case, isSigned() indicates whether the IV expression + /// should be signed-extended instead of zero-extended to fit the type of + /// the use. + bool isSigned() const { return IsSigned; } + + /// isUseOfPostIncrementedValue - True if this should use the + /// post-incremented version of this IV, not the preincremented version. + /// This can only be set in special cases, such as the terminating setcc + /// instruction for a loop or uses dominated by the loop. + bool isUseOfPostIncrementedValue() const { + return IsUseOfPostIncrementedValue; + } + + /// setIsUseOfPostIncrmentedValue - set the flag that indicates whether + /// this is a post-increment use. + void setIsUseOfPostIncrementedValue(bool Val) { + IsUseOfPostIncrementedValue = Val; + } + +private: + /// Parent - a pointer to the IVUsersOfOneStride that owns this IVStrideUse. + IVUsersOfOneStride *Parent; + + /// Offset - The offset to add to the base induction expression. + SCEVHandle Offset; + + /// OperandValToReplace - The Value of the operand in the user instruction + /// that this IVStrideUse is representing. + WeakVH OperandValToReplace; + + /// IsSigned - Determines whether the replacement value is sign or + /// zero extended to the type of the use. + bool IsSigned; + + /// IsUseOfPostIncrementedValue - True if this should use the + /// post-incremented version of this IV, not the preincremented version. + bool IsUseOfPostIncrementedValue; + + /// Deleted - Implementation of CallbackVH virtual function to + /// recieve notification when the User is deleted. + virtual void deleted(); +}; + +template<> struct ilist_traits + : public ilist_default_traits { + // createSentinel is used to get hold of a node that marks the end of + // the list... + // The sentinel is relative to this instance, so we use a non-static + // method. + IVStrideUse *createSentinel() const { + // since i(p)lists always publicly derive from the corresponding + // traits, placing a data member in this class will augment i(p)list. + // But since the NodeTy is expected to publicly derive from + // ilist_node, there is a legal viable downcast from it + // to NodeTy. We use this trick to superpose i(p)list with a "ghostly" + // NodeTy, which becomes the sentinel. Dereferencing the sentinel is + // forbidden (save the ilist_node) so no one will ever notice + // the superposition. + return static_cast(&Sentinel); + } + static void destroySentinel(IVStrideUse*) {} + + IVStrideUse *provideInitialHead() const { return createSentinel(); } + IVStrideUse *ensureHead(IVStrideUse*) const { return createSentinel(); } + static void noteHead(IVStrideUse*, IVStrideUse*) {} + +private: + mutable ilist_node Sentinel; +}; + +/// IVUsersOfOneStride - This structure keeps track of all instructions that +/// have an operand that is based on the trip count multiplied by some stride. +struct IVUsersOfOneStride : public ilist_node { +private: + IVUsersOfOneStride(const IVUsersOfOneStride &I); // do not implement + void operator=(const IVUsersOfOneStride &I); // do not implement + +public: + IVUsersOfOneStride() : Stride(0) {} + + explicit IVUsersOfOneStride(const SCEV *stride) : Stride(stride) {} + + /// Stride - The stride for all the contained IVStrideUses. This is + /// a constant for affine strides. + const SCEV *Stride; + + /// Users - Keep track of all of the users of this stride as well as the + /// initial value and the operand that uses the IV. + ilist Users; + + void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand, + bool isSigned) { + Users.push_back(new IVStrideUse(this, Offset, User, Operand, isSigned)); + } +}; + +class IVUsers : public LoopPass { + friend class IVStrideUserVH; + Loop *L; + LoopInfo *LI; + DominatorTree *DT; + ScalarEvolution *SE; + SmallPtrSet Processed; + +public: + /// IVUses - A list of all tracked IV uses of induction variable expressions + /// we are interested in. + ilist IVUses; + + /// IVUsesByStride - A mapping from the strides in StrideOrder to the + /// uses in IVUses. + std::map IVUsesByStride; + + /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: + /// We use this to iterate over the IVUsesByStride collection without being + /// dependent on random ordering of pointers in the process. + SmallVector StrideOrder; + +private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + virtual void releaseMemory(); + +public: + static char ID; // Pass ID, replacement for typeid + IVUsers(); + + /// AddUsersIfInteresting - Inspect the specified Instruction. If it is a + /// reducible SCEV, recursively add its users to the IVUsesByStride set and + /// return true. Otherwise, return false. + bool AddUsersIfInteresting(Instruction *I); + + /// getReplacementExpr - Return a SCEV expression which computes the + /// value of the OperandValToReplace of the given IVStrideUse. + SCEVHandle getReplacementExpr(const IVStrideUse &U) const; + + void print(raw_ostream &OS, const Module* = 0) const; + virtual void print(std::ostream &OS, const Module* = 0) const; + void print(std::ostream *OS, const Module* M = 0) const { + if (OS) print(*OS, M); + } + + /// dump - This method is used for debugging. + void dump() const; +}; + +Pass *createIVUsersPass(); + +} + +#endif diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp new file mode 100644 index 000000000000..9ec9cacbbbe5 --- /dev/null +++ b/llvm/lib/Analysis/IVUsers.cpp @@ -0,0 +1,391 @@ +//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements bookkeeping for "interesting" users of expressions +// computed from induction variables. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "iv-users" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +char IVUsers::ID = 0; +static RegisterPass +X("iv-users", "Induction Variable Users", false, true); + +Pass *llvm::createIVUsersPass() { + return new IVUsers(); +} + +/// containsAddRecFromDifferentLoop - Determine whether expression S involves a +/// subexpression that is an AddRec from a loop other than L. An outer loop +/// of L is OK, but not an inner loop nor a disjoint loop. +static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) { + // This is very common, put it first. + if (isa(S)) + return false; + if (const SCEVCommutativeExpr *AE = dyn_cast(S)) { + for (unsigned int i=0; i< AE->getNumOperands(); i++) + if (containsAddRecFromDifferentLoop(AE->getOperand(i), L)) + return true; + return false; + } + if (const SCEVAddRecExpr *AE = dyn_cast(S)) { + if (const Loop *newLoop = AE->getLoop()) { + if (newLoop == L) + return false; + // if newLoop is an outer loop of L, this is OK. + if (!LoopInfoBase::isNotAlreadyContainedIn(L, newLoop)) + return false; + } + return true; + } + if (const SCEVUDivExpr *DE = dyn_cast(S)) + return containsAddRecFromDifferentLoop(DE->getLHS(), L) || + containsAddRecFromDifferentLoop(DE->getRHS(), L); +#if 0 + // SCEVSDivExpr has been backed out temporarily, but will be back; we'll + // need this when it is. + if (const SCEVSDivExpr *DE = dyn_cast(S)) + return containsAddRecFromDifferentLoop(DE->getLHS(), L) || + containsAddRecFromDifferentLoop(DE->getRHS(), L); +#endif + if (const SCEVCastExpr *CE = dyn_cast(S)) + return containsAddRecFromDifferentLoop(CE->getOperand(), L); + return false; +} + +/// getSCEVStartAndStride - Compute the start and stride of this expression, +/// returning false if the expression is not a start/stride pair, or true if it +/// is. The stride must be a loop invariant expression, but the start may be +/// a mix of loop invariant and loop variant expressions. The start cannot, +/// however, contain an AddRec from a different loop, unless that loop is an +/// outer loop of the current loop. +static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop, + SCEVHandle &Start, SCEVHandle &Stride, + bool &isSigned, + ScalarEvolution *SE, DominatorTree *DT) { + SCEVHandle TheAddRec = Start; // Initialize to zero. + bool isSExt = false; + bool isZExt = false; + + // If the outer level is an AddExpr, the operands are all start values except + // for a nested AddRecExpr. + if (const SCEVAddExpr *AE = dyn_cast(SH)) { + for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) + if (const SCEVAddRecExpr *AddRec = + dyn_cast(AE->getOperand(i))) { + if (AddRec->getLoop() == L) + TheAddRec = SE->getAddExpr(AddRec, TheAddRec); + else + return false; // Nested IV of some sort? + } else { + Start = SE->getAddExpr(Start, AE->getOperand(i)); + } + + } else if (const SCEVZeroExtendExpr *Z = dyn_cast(SH)) { + TheAddRec = Z->getOperand(); + isZExt = true; + } else if (const SCEVSignExtendExpr *S = dyn_cast(SH)) { + TheAddRec = S->getOperand(); + isSExt = true; + } else if (isa(SH)) { + TheAddRec = SH; + } else { + return false; // not analyzable. + } + + const SCEVAddRecExpr *AddRec = dyn_cast(TheAddRec); + if (!AddRec || AddRec->getLoop() != L) return false; + + // Use getSCEVAtScope to attempt to simplify other loops out of + // the picture. + SCEVHandle AddRecStart = AddRec->getStart(); + SCEVHandle BetterAddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); + if (!isa(BetterAddRecStart)) + AddRecStart = BetterAddRecStart; + + // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other + // than an outer loop of the current loop, reject it. LSR has no concept of + // operating on more than one loop at a time so don't confuse it with such + // expressions. + if (containsAddRecFromDifferentLoop(AddRecStart, L)) + return false; + + if (isSExt || isZExt) + Start = SE->getTruncateExpr(Start, AddRec->getType()); + + Start = SE->getAddExpr(Start, AddRecStart); + + if (!isa(AddRec->getStepRecurrence(*SE))) { + // If stride is an instruction, make sure it dominates the loop preheader. + // Otherwise we could end up with a use before def situation. + BasicBlock *Preheader = L->getLoopPreheader(); + if (!AddRec->getStepRecurrence(*SE)->dominates(Preheader, DT)) + return false; + + DOUT << "[" << L->getHeader()->getName() + << "] Variable stride: " << *AddRec << "\n"; + } + + Stride = AddRec->getStepRecurrence(*SE); + isSigned = isSExt; + return true; +} + +/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression +/// and now we need to decide whether the user should use the preinc or post-inc +/// value. If this user should use the post-inc version of the IV, return true. +/// +/// Choosing wrong here can break dominance properties (if we choose to use the +/// post-inc value when we cannot) or it can end up adding extra live-ranges to +/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we +/// should use the post-inc value). +static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, + Loop *L, LoopInfo *LI, DominatorTree *DT, + Pass *P) { + // If the user is in the loop, use the preinc value. + if (L->contains(User->getParent())) return false; + + BasicBlock *LatchBlock = L->getLoopLatch(); + + // Ok, the user is outside of the loop. If it is dominated by the latch + // block, use the post-inc value. + if (DT->dominates(LatchBlock, User->getParent())) + return true; + + // There is one case we have to be careful of: PHI nodes. These little guys + // can live in blocks that are not dominated by the latch block, but (since + // their uses occur in the predecessor block, not the block the PHI lives in) + // should still use the post-inc value. Check for this case now. + PHINode *PN = dyn_cast(User); + if (!PN) return false; // not a phi, not dominated by latch block. + + // Look at all of the uses of IV by the PHI node. If any use corresponds to + // a block that is not dominated by the latch block, give up and use the + // preincremented value. + unsigned NumUses = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == IV) { + ++NumUses; + if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; + } + + // Okay, all uses of IV by PN are in predecessor blocks that really are + // dominated by the latch block. Use the post-incremented value. + return true; +} + +/// AddUsersIfInteresting - Inspect the specified instruction. If it is a +/// reducible SCEV, recursively add its users to the IVUsesByStride set and +/// return true. Otherwise, return false. +bool IVUsers::AddUsersIfInteresting(Instruction *I) { + if (!SE->isSCEVable(I->getType())) + return false; // Void and FP expressions cannot be reduced. + + // LSR is not APInt clean, do not touch integers bigger than 64-bits. + if (SE->getTypeSizeInBits(I->getType()) > 64) + return false; + + if (!Processed.insert(I)) + return true; // Instruction already handled. + + // Get the symbolic expression for this instruction. + SCEVHandle ISE = SE->getSCEV(I); + if (isa(ISE)) return false; + + // Get the start and stride for this expression. + Loop *UseLoop = LI->getLoopFor(I->getParent()); + SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType()); + SCEVHandle Stride = Start; + bool isSigned; + + if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, isSigned, SE, DT)) + return false; // Non-reducible symbolic expression, bail out. + + SmallPtrSet UniqueUsers; + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *User = cast(*UI); + if (!UniqueUsers.insert(User)) + continue; + + // Do not infinitely recurse on PHI nodes. + if (isa(User) && Processed.count(User)) + continue; + + // Descend recursively, but not into PHI nodes outside the current loop. + // It's important to see the entire expression outside the loop to get + // choices that depend on addressing mode use right, although we won't + // consider references ouside the loop in all cases. + // If User is already in Processed, we don't want to recurse into it again, + // but do want to record a second reference in the same instruction. + bool AddUserToIVUsers = false; + if (LI->getLoopFor(User->getParent()) != L) { + if (isa(User) || Processed.count(User) || + !AddUsersIfInteresting(User)) { + DOUT << "FOUND USER in other loop: " << *User + << " OF SCEV: " << *ISE << "\n"; + AddUserToIVUsers = true; + } + } else if (Processed.count(User) || + !AddUsersIfInteresting(User)) { + DOUT << "FOUND USER: " << *User + << " OF SCEV: " << *ISE << "\n"; + AddUserToIVUsers = true; + } + + if (AddUserToIVUsers) { + IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride]; + if (!StrideUses) { // First occurrence of this stride? + StrideOrder.push_back(Stride); + StrideUses = new IVUsersOfOneStride(Stride); + IVUses.push_back(StrideUses); + IVUsesByStride[Stride] = StrideUses; + } + + // Okay, we found a user that we cannot reduce. Analyze the instruction + // and decide what to do with it. If we are a use inside of the loop, use + // the value before incrementation, otherwise use it after incrementation. + if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) { + // The value used will be incremented by the stride more than we are + // expecting, so subtract this off. + SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride); + StrideUses->addUser(NewStart, User, I, isSigned); + StrideUses->Users.back().setIsUseOfPostIncrementedValue(true); + DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; + } else { + StrideUses->addUser(Start, User, I, isSigned); + } + } + } + return true; +} + +IVUsers::IVUsers() + : LoopPass(&ID) { +} + +void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); +} + +bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { + + L = l; + LI = &getAnalysis(); + DT = &getAnalysis(); + SE = &getAnalysis(); + + // Find all uses of induction variables in this loop, and categorize + // them by stride. Start by finding all of the PHI nodes in the header for + // this loop. If they are induction variables, inspect their uses. + for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) + AddUsersIfInteresting(I); + + return false; +} + +/// getReplacementExpr - Return a SCEV expression which computes the +/// value of the OperandValToReplace of the given IVStrideUse. +SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const { + const Type *UseTy = U.getOperandValToReplace()->getType(); + // Start with zero. + SCEVHandle RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); + // Create the basic add recurrence. + RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L); + // Add the offset in a separate step, because it may be loop-variant. + RetVal = SE->getAddExpr(RetVal, U.getOffset()); + // For uses of post-incremented values, add an extra stride to compute + // the actual replacement value. + if (U.isUseOfPostIncrementedValue()) + RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride); + // Evaluate the expression out of the loop, if possible. + if (!L->contains(U.getUser()->getParent())) { + SCEVHandle ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); + if (!isa(ExitVal) && ExitVal->isLoopInvariant(L)) + RetVal = ExitVal; + } + // Promote the result to the type of the use. + if (SE->getTypeSizeInBits(RetVal->getType()) != + SE->getTypeSizeInBits(UseTy)) { + if (U.isSigned()) + RetVal = SE->getSignExtendExpr(RetVal, UseTy); + else + RetVal = SE->getZeroExtendExpr(RetVal, UseTy); + } + return RetVal; +} + +void IVUsers::print(raw_ostream &OS, const Module *M) const { + OS << "IV Users for loop "; + WriteAsOperand(OS, L->getHeader(), false); + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + OS << " with backedge-taken count " + << *SE->getBackedgeTakenCount(L); + } + OS << ":\n"; + + for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { + std::map::const_iterator SI = + IVUsesByStride.find(StrideOrder[Stride]); + assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); + OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n"; + + for (ilist::const_iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; ++UI) { + OS << " "; + WriteAsOperand(OS, UI->getOperandValToReplace(), false); + OS << " = "; + OS << *getReplacementExpr(*UI); + if (UI->isUseOfPostIncrementedValue()) + OS << " (post-inc)"; + OS << " in "; + UI->getUser()->print(OS); + } + } +} + +void IVUsers::print(std::ostream &o, const Module *M) const { + raw_os_ostream OS(o); + print(OS, M); +} + +void IVUsers::dump() const { + print(errs()); +} + +void IVUsers::releaseMemory() { + IVUsesByStride.clear(); + StrideOrder.clear(); + Processed.clear(); +} + +void IVStrideUse::deleted() { + // Remove this user from the list. + Parent->Users.erase(this); + // this now dangles! +} diff --git a/llvm/lib/Target/README.txt b/llvm/lib/Target/README.txt index 538d1371a16f..f68cf0e40df0 100644 --- a/llvm/lib/Target/README.txt +++ b/llvm/lib/Target/README.txt @@ -749,16 +749,6 @@ be done safely if "b" isn't modified between the strlen and memcpy of course. //===---------------------------------------------------------------------===// -We should be able to evaluate this loop: - -int test(int x_offs) { - while (x_offs > 4) - x_offs -= 4; - return x_offs; -} - -//===---------------------------------------------------------------------===// - Reassociate should turn things like: int factorial(int X) { diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 3d9017d17e33..80d34f6f16c2 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -43,6 +43,8 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Type.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -51,11 +53,12 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumRemoved , "Number of aux indvars removed"); @@ -65,6 +68,7 @@ STATISTIC(NumLFTR , "Number of loop exit tests replaced"); namespace { class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass { + IVUsers *IU; LoopInfo *LI; ScalarEvolution *SE; bool Changed; @@ -76,12 +80,15 @@ namespace { virtual bool runOnLoop(Loop *L, LPPassManager &LPM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addRequiredID(LCSSAID); AU.addRequiredID(LoopSimplifyID); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreservedID(LoopSimplifyID); + AU.addPreserved(); AU.addPreservedID(LCSSAID); AU.setPreservesCFG(); } @@ -90,17 +97,21 @@ namespace { void RewriteNonIntegerIVs(Loop *L); - void LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount, + ICmpInst *LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount, Value *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, SCEVExpander &Rewriter); void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount); - void DeleteTriviallyDeadInstructions(SmallPtrSet &Insts); + void RewriteIVExpressions(Loop *L, const Type *LargestType, + SCEVExpander &Rewriter); - void HandleFloatingPointIV(Loop *L, PHINode *PH, - SmallPtrSet &DeadInsts); + void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter); + + void FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter); + + void HandleFloatingPointIV(Loop *L, PHINode *PH); }; } @@ -112,31 +123,12 @@ Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); } -/// DeleteTriviallyDeadInstructions - If any of the instructions is the -/// specified set are trivially dead, delete them and see if this makes any of -/// their operands subsequently dead. -void IndVarSimplify:: -DeleteTriviallyDeadInstructions(SmallPtrSet &Insts) { - while (!Insts.empty()) { - Instruction *I = *Insts.begin(); - Insts.erase(I); - if (isInstructionTriviallyDead(I)) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (Instruction *U = dyn_cast(I->getOperand(i))) - Insts.insert(U); - DOUT << "INDVARS: Deleting: " << *I; - I->eraseFromParent(); - Changed = true; - } - } -} - /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the /// SCEV analysis can determine a loop-invariant trip count of the loop, which /// is actually a much broader range than just linear tests. -void IndVarSimplify::LinearFunctionTestReplace(Loop *L, +ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount, Value *IndVar, BasicBlock *ExitingBlock, @@ -196,10 +188,15 @@ void IndVarSimplify::LinearFunctionTestReplace(Loop *L, << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" << " RHS:\t" << *RHS << "\n"; - Value *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI); - BI->setCondition(Cond); + ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI); + + Instruction *OrigCond = cast(BI->getCondition()); + OrigCond->replaceAllUsesWith(Cond); + RecursivelyDeleteTriviallyDeadInstructions(OrigCond); + ++NumLFTR; Changed = true; + return Cond; } /// RewriteLoopExitValues - Check to see if this loop has a computable @@ -207,8 +204,16 @@ void IndVarSimplify::LinearFunctionTestReplace(Loop *L, /// final value of any expressions that are recurrent in the loop, and /// substitute the exit values from the loop into any instructions outside of /// the loop that use the final values of the current expressions. +/// +/// This is mostly redundant with the regular IndVarSimplify activities that +/// happen later, except that it's more powerful in some cases, because it's +/// able to brute-force evaluate arbitrary instructions as long as they have +/// constant operands at the beginning of the loop. void IndVarSimplify::RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount) { + // Verify the input to the pass in already in LCSSA form. + assert(L->isLCSSAForm()); + BasicBlock *Preheader = L->getLoopPreheader(); // Scan all of the instructions in the loop, looking at those that have @@ -226,9 +231,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, BlockToInsertInto = Preheader; BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI(); - bool HasConstantItCount = isa(BackedgeTakenCount); - - SmallPtrSet InstructionsToDelete; std::map ExitValues; // Find all values that are computed inside the loop, but used outside of it. @@ -268,18 +270,11 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, if (!L->contains(Inst->getParent())) continue; - // We require that this value either have a computable evolution or that - // the loop have a constant iteration count. In the case where the loop - // has a constant iteration count, we can sometimes force evaluation of - // the exit value through brute force. - SCEVHandle SH = SE->getSCEV(Inst); - if (!SH->hasComputableLoopEvolution(L) && !HasConstantItCount) - continue; // Cannot get exit evolution for the loop value. - // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. - SCEVHandle ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); + SCEVHandle SH = SE->getSCEV(Inst); + SCEVHandle ExitValue = SE->getSCEVAtScope(SH, L->getParentLoop()); if (isa(ExitValue) || !ExitValue->isLoopInvariant(L)) continue; @@ -298,9 +293,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, PN->setIncomingValue(i, ExitVal); - // If this instruction is dead now, schedule it to be removed. - if (Inst->use_empty()) - InstructionsToDelete.insert(Inst); + // If this instruction is dead now, delete it. + RecursivelyDeleteTriviallyDeadInstructions(Inst); // See if this is a single-entry LCSSA PHI node. If so, we can (and // have to) remove @@ -308,14 +302,12 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, // in the loop, so we don't need an LCSSA phi node anymore. if (NumPreds == 1) { PN->replaceAllUsesWith(ExitVal); - PN->eraseFromParent(); + RecursivelyDeleteTriviallyDeadInstructions(PN); break; } } } } - - DeleteTriviallyDeadInstructions(InstructionsToDelete); } void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { @@ -325,266 +317,24 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { // BasicBlock *Header = L->getHeader(); - SmallPtrSet DeadInsts; - for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - HandleFloatingPointIV(L, PN, DeadInsts); - } + SmallVector PHIs; + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast(I); ++I) + PHIs.push_back(PN); + + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null(PHIs[i])) + HandleFloatingPointIV(L, PN); // If the loop previously had floating-point IV, ScalarEvolution // may not have been able to compute a trip count. Now that we've done some // re-writing, the trip count may be computable. if (Changed) SE->forgetLoopBackedgeTakenCount(L); - - if (!DeadInsts.empty()) - DeleteTriviallyDeadInstructions(DeadInsts); -} - -/// getEffectiveIndvarType - Determine the widest type that the -/// induction-variable PHINode Phi is cast to. -/// -static const Type *getEffectiveIndvarType(const PHINode *Phi, - const ScalarEvolution *SE) { - const Type *Ty = Phi->getType(); - - for (Value::use_const_iterator UI = Phi->use_begin(), UE = Phi->use_end(); - UI != UE; ++UI) { - const Type *CandidateType = NULL; - if (const ZExtInst *ZI = dyn_cast(UI)) - CandidateType = ZI->getDestTy(); - else if (const SExtInst *SI = dyn_cast(UI)) - CandidateType = SI->getDestTy(); - else if (const IntToPtrInst *IP = dyn_cast(UI)) - CandidateType = IP->getDestTy(); - else if (const PtrToIntInst *PI = dyn_cast(UI)) - CandidateType = PI->getDestTy(); - if (CandidateType && - SE->isSCEVable(CandidateType) && - SE->getTypeSizeInBits(CandidateType) > SE->getTypeSizeInBits(Ty)) - Ty = CandidateType; - } - - return Ty; -} - -/// TestOrigIVForWrap - Analyze the original induction variable that -/// controls the loop's iteration to determine whether it would ever -/// undergo signed or unsigned overflow. -/// -/// In addition to setting the NoSignedWrap and NoUnsignedWrap -/// variables to true when appropriate (they are not set to false here), -/// return the PHI for this induction variable. Also record the initial -/// and final values and the increment; these are not meaningful unless -/// either NoSignedWrap or NoUnsignedWrap is true, and are always meaningful -/// in that case, although the final value may be 0 indicating a nonconstant. -/// -/// TODO: This duplicates a fair amount of ScalarEvolution logic. -/// Perhaps this can be merged with -/// ScalarEvolution::getBackedgeTakenCount -/// and/or ScalarEvolution::get{Sign,Zero}ExtendExpr. -/// -static const PHINode *TestOrigIVForWrap(const Loop *L, - const BranchInst *BI, - const Instruction *OrigCond, - const ScalarEvolution &SE, - bool &NoSignedWrap, - bool &NoUnsignedWrap, - const ConstantInt* &InitialVal, - const ConstantInt* &IncrVal, - const ConstantInt* &LimitVal) { - // Verify that the loop is sane and find the exit condition. - const ICmpInst *Cmp = dyn_cast(OrigCond); - if (!Cmp) return 0; - - const Value *CmpLHS = Cmp->getOperand(0); - const Value *CmpRHS = Cmp->getOperand(1); - const BasicBlock *TrueBB = BI->getSuccessor(0); - const BasicBlock *FalseBB = BI->getSuccessor(1); - ICmpInst::Predicate Pred = Cmp->getPredicate(); - - // Canonicalize a constant to the RHS. - if (isa(CmpLHS)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - std::swap(CmpLHS, CmpRHS); - } - // Canonicalize SLE to SLT. - if (Pred == ICmpInst::ICMP_SLE) - if (const ConstantInt *CI = dyn_cast(CmpRHS)) - if (!CI->getValue().isMaxSignedValue()) { - CmpRHS = ConstantInt::get(CI->getValue() + 1); - Pred = ICmpInst::ICMP_SLT; - } - // Canonicalize SGT to SGE. - if (Pred == ICmpInst::ICMP_SGT) - if (const ConstantInt *CI = dyn_cast(CmpRHS)) - if (!CI->getValue().isMaxSignedValue()) { - CmpRHS = ConstantInt::get(CI->getValue() + 1); - Pred = ICmpInst::ICMP_SGE; - } - // Canonicalize SGE to SLT. - if (Pred == ICmpInst::ICMP_SGE) { - std::swap(TrueBB, FalseBB); - Pred = ICmpInst::ICMP_SLT; - } - // Canonicalize ULE to ULT. - if (Pred == ICmpInst::ICMP_ULE) - if (const ConstantInt *CI = dyn_cast(CmpRHS)) - if (!CI->getValue().isMaxValue()) { - CmpRHS = ConstantInt::get(CI->getValue() + 1); - Pred = ICmpInst::ICMP_ULT; - } - // Canonicalize UGT to UGE. - if (Pred == ICmpInst::ICMP_UGT) - if (const ConstantInt *CI = dyn_cast(CmpRHS)) - if (!CI->getValue().isMaxValue()) { - CmpRHS = ConstantInt::get(CI->getValue() + 1); - Pred = ICmpInst::ICMP_UGE; - } - // Canonicalize UGE to ULT. - if (Pred == ICmpInst::ICMP_UGE) { - std::swap(TrueBB, FalseBB); - Pred = ICmpInst::ICMP_ULT; - } - // For now, analyze only LT loops for signed overflow. - if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_ULT) - return 0; - - bool isSigned = Pred == ICmpInst::ICMP_SLT; - - // Get the increment instruction. Look past casts if we will - // be able to prove that the original induction variable doesn't - // undergo signed or unsigned overflow, respectively. - const Value *IncrInst = CmpLHS; - if (isSigned) { - if (const SExtInst *SI = dyn_cast(CmpLHS)) { - if (!isa(CmpRHS) || - !cast(CmpRHS)->getValue() - .isSignedIntN(SE.getTypeSizeInBits(IncrInst->getType()))) - return 0; - IncrInst = SI->getOperand(0); - } - } else { - if (const ZExtInst *ZI = dyn_cast(CmpLHS)) { - if (!isa(CmpRHS) || - !cast(CmpRHS)->getValue() - .isIntN(SE.getTypeSizeInBits(IncrInst->getType()))) - return 0; - IncrInst = ZI->getOperand(0); - } - } - - // For now, only analyze induction variables that have simple increments. - const BinaryOperator *IncrOp = dyn_cast(IncrInst); - if (!IncrOp || IncrOp->getOpcode() != Instruction::Add) - return 0; - IncrVal = dyn_cast(IncrOp->getOperand(1)); - if (!IncrVal) - return 0; - - // Make sure the PHI looks like a normal IV. - const PHINode *PN = dyn_cast(IncrOp->getOperand(0)); - if (!PN || PN->getNumIncomingValues() != 2) - return 0; - unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); - unsigned BackEdge = !IncomingEdge; - if (!L->contains(PN->getIncomingBlock(BackEdge)) || - PN->getIncomingValue(BackEdge) != IncrOp) - return 0; - if (!L->contains(TrueBB)) - return 0; - - // For now, only analyze loops with a constant start value, so that - // we can easily determine if the start value is not a maximum value - // which would wrap on the first iteration. - InitialVal = dyn_cast(PN->getIncomingValue(IncomingEdge)); - if (!InitialVal) - return 0; - - // The upper limit need not be a constant; we'll check later. - LimitVal = dyn_cast(CmpRHS); - - // We detect the impossibility of wrapping in two cases, both of - // which require starting with a non-max value: - // - The IV counts up by one, and the loop iterates only while it remains - // less than a limiting value (any) in the same type. - // - The IV counts up by a positive increment other than 1, and the - // constant limiting value + the increment is less than the max value - // (computed as max-increment to avoid overflow) - if (isSigned && !InitialVal->getValue().isMaxSignedValue()) { - if (IncrVal->equalsInt(1)) - NoSignedWrap = true; // LimitVal need not be constant - else if (LimitVal) { - uint64_t numBits = LimitVal->getValue().getBitWidth(); - if (IncrVal->getValue().sgt(APInt::getNullValue(numBits)) && - (APInt::getSignedMaxValue(numBits) - IncrVal->getValue()) - .sgt(LimitVal->getValue())) - NoSignedWrap = true; - } - } else if (!isSigned && !InitialVal->getValue().isMaxValue()) { - if (IncrVal->equalsInt(1)) - NoUnsignedWrap = true; // LimitVal need not be constant - else if (LimitVal) { - uint64_t numBits = LimitVal->getValue().getBitWidth(); - if (IncrVal->getValue().ugt(APInt::getNullValue(numBits)) && - (APInt::getMaxValue(numBits) - IncrVal->getValue()) - .ugt(LimitVal->getValue())) - NoUnsignedWrap = true; - } - } - return PN; -} - -static Value *getSignExtendedTruncVar(const SCEVAddRecExpr *AR, - ScalarEvolution *SE, - const Type *LargestType, Loop *L, - const Type *myType, - SCEVExpander &Rewriter) { - SCEVHandle ExtendedStart = - SE->getSignExtendExpr(AR->getStart(), LargestType); - SCEVHandle ExtendedStep = - SE->getSignExtendExpr(AR->getStepRecurrence(*SE), LargestType); - SCEVHandle ExtendedAddRec = - SE->getAddRecExpr(ExtendedStart, ExtendedStep, L); - if (LargestType != myType) - ExtendedAddRec = SE->getTruncateExpr(ExtendedAddRec, myType); - return Rewriter.expandCodeFor(ExtendedAddRec, myType); -} - -static Value *getZeroExtendedTruncVar(const SCEVAddRecExpr *AR, - ScalarEvolution *SE, - const Type *LargestType, Loop *L, - const Type *myType, - SCEVExpander &Rewriter) { - SCEVHandle ExtendedStart = - SE->getZeroExtendExpr(AR->getStart(), LargestType); - SCEVHandle ExtendedStep = - SE->getZeroExtendExpr(AR->getStepRecurrence(*SE), LargestType); - SCEVHandle ExtendedAddRec = - SE->getAddRecExpr(ExtendedStart, ExtendedStep, L); - if (LargestType != myType) - ExtendedAddRec = SE->getTruncateExpr(ExtendedAddRec, myType); - return Rewriter.expandCodeFor(ExtendedAddRec, myType); -} - -/// allUsesAreSameTyped - See whether all Uses of I are instructions -/// with the same Opcode and the same type. -static bool allUsesAreSameTyped(unsigned int Opcode, Instruction *I) { - const Type* firstType = NULL; - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { - Instruction *II = dyn_cast(*UI); - if (!II || II->getOpcode() != Opcode) - return false; - if (!firstType) - firstType = II->getType(); - else if (firstType != II->getType()) - return false; - } - return true; } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + IU = &getAnalysis(); LI = &getAnalysis(); SE = &getAnalysis(); Changed = false; @@ -594,11 +344,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { RewriteNonIntegerIVs(L); BasicBlock *Header = L->getHeader(); - BasicBlock *ExitingBlock = L->getExitingBlock(); - SmallPtrSet DeadInsts; - - // Verify the input to the pass in already in LCSSA form. - assert(L->isLCSSAForm()); + BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null + SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -606,59 +353,45 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // loop into any instructions outside of the loop that use the final values of // the current expressions. // - SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (!isa(BackedgeTakenCount)) RewriteLoopExitValues(L, BackedgeTakenCount); - // Next, analyze all of the induction variables in the loop, canonicalizing - // auxillary induction variables. - std::vector > IndVars; - - for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - if (SE->isSCEVable(PN->getType())) { - SCEVHandle SCEV = SE->getSCEV(PN); - // FIXME: It is an extremely bad idea to indvar substitute anything more - // complex than affine induction variables. Doing so will put expensive - // polynomial evaluations inside of the loop, and the str reduction pass - // currently can only reduce affine polynomials. For now just disable - // indvar subst on anything more complex than an affine addrec. - if (const SCEVAddRecExpr *AR = dyn_cast(SCEV)) - if (AR->getLoop() == L && AR->isAffine()) - IndVars.push_back(std::make_pair(PN, SCEV)); - } - } - - // Compute the type of the largest recurrence expression, and collect - // the set of the types of the other recurrence expressions. + // Compute the type of the largest recurrence expression, and decide whether + // a canonical induction variable should be inserted. const Type *LargestType = 0; - SmallSetVector SizesToInsert; + bool NeedCannIV = false; if (!isa(BackedgeTakenCount)) { LargestType = BackedgeTakenCount->getType(); LargestType = SE->getEffectiveSCEVType(LargestType); - SizesToInsert.insert(LargestType); + // If we have a known trip count and a single exit block, we'll be + // rewriting the loop exit test condition below, which requires a + // canonical induction variable. + if (ExitingBlock) + NeedCannIV = true; } - for (unsigned i = 0, e = IndVars.size(); i != e; ++i) { - const PHINode *PN = IndVars[i].first; - const Type *PNTy = PN->getType(); - PNTy = SE->getEffectiveSCEVType(PNTy); - SizesToInsert.insert(PNTy); - const Type *EffTy = getEffectiveIndvarType(PN, SE); - EffTy = SE->getEffectiveSCEVType(EffTy); - SizesToInsert.insert(EffTy); + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + SCEVHandle Stride = IU->StrideOrder[i]; + const Type *Ty = SE->getEffectiveSCEVType(Stride->getType()); if (!LargestType || - SE->getTypeSizeInBits(EffTy) > + SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) - LargestType = EffTy; + LargestType = Ty; + + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + + if (!SI->second->Users.empty()) + NeedCannIV = true; } // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE, *LI); - // Now that we know the largest of of the induction variables in this loop, - // insert a canonical induction variable of the largest size. + // Now that we know the largest of of the induction variable expressions + // in this loop, insert a canonical induction variable of the largest size. Value *IndVar = 0; - if (!SizesToInsert.empty()) { + if (NeedCannIV) { IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType); ++NumInserted; Changed = true; @@ -667,231 +400,293 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - bool NoSignedWrap = false; - bool NoUnsignedWrap = false; - const ConstantInt* InitialVal, * IncrVal, * LimitVal; - const PHINode *OrigControllingPHI = 0; - if (!isa(BackedgeTakenCount) && ExitingBlock) + ICmpInst *NewICmp = 0; + if (!isa(BackedgeTakenCount) && ExitingBlock) { + assert(NeedCannIV && + "LinearFunctionTestReplace requires a canonical induction variable"); // Can't rewrite non-branch yet. - if (BranchInst *BI = dyn_cast(ExitingBlock->getTerminator())) { - if (Instruction *OrigCond = dyn_cast(BI->getCondition())) { - // Determine if the OrigIV will ever undergo overflow. - OrigControllingPHI = - TestOrigIVForWrap(L, BI, OrigCond, *SE, - NoSignedWrap, NoUnsignedWrap, - InitialVal, IncrVal, LimitVal); - - // We'll be replacing the original condition, so it'll be dead. - DeadInsts.insert(OrigCond); - } - - LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, - ExitingBlock, BI, Rewriter); - } - - // Now that we have a canonical induction variable, we can rewrite any - // recurrences in terms of the induction variable. Start with the auxillary - // induction variables, and recursively rewrite any of their uses. - BasicBlock::iterator InsertPt = Header->getFirstNonPHI(); - Rewriter.setInsertionPoint(InsertPt); - - // If there were induction variables of other sizes, cast the primary - // induction variable to the right size for them, avoiding the need for the - // code evaluation methods to insert induction variables of different sizes. - for (unsigned i = 0, e = SizesToInsert.size(); i != e; ++i) { - const Type *Ty = SizesToInsert[i]; - if (Ty != LargestType) { - Instruction *New = new TruncInst(IndVar, Ty, "indvar", InsertPt); - Rewriter.addInsertedValue(New, SE->getSCEV(New)); - DOUT << "INDVARS: Made trunc IV for type " << *Ty << ": " - << *New << "\n"; - } + if (BranchInst *BI = dyn_cast(ExitingBlock->getTerminator())) + NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, + ExitingBlock, BI, Rewriter); } - // Rewrite all induction variables in terms of the canonical induction - // variable. - while (!IndVars.empty()) { - PHINode *PN = IndVars.back().first; - const SCEVAddRecExpr *AR = cast(IndVars.back().second); - Value *NewVal = Rewriter.expandCodeFor(AR, PN->getType()); - DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *PN - << " into = " << *NewVal << "\n"; - NewVal->takeName(PN); + Rewriter.setInsertionPoint(Header->getFirstNonPHI()); - /// If the new canonical induction variable is wider than the original, - /// and the original has uses that are casts to wider types, see if the - /// truncate and extend can be omitted. - if (PN == OrigControllingPHI && PN->getType() != LargestType) - for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); - UI != UE; ++UI) { - Instruction *UInst = dyn_cast(*UI); - if (UInst && isa(UInst) && NoSignedWrap) { - Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, L, - UInst->getType(), Rewriter); - UInst->replaceAllUsesWith(TruncIndVar); - DeadInsts.insert(UInst); - } - // See if we can figure out sext(i+constant) doesn't wrap, so we can - // use a larger add. This is common in subscripting. - if (UInst && UInst->getOpcode()==Instruction::Add && - !UInst->use_empty() && - allUsesAreSameTyped(Instruction::SExt, UInst) && - isa(UInst->getOperand(1)) && - NoSignedWrap && LimitVal) { - uint64_t oldBitSize = LimitVal->getValue().getBitWidth(); - uint64_t newBitSize = LargestType->getPrimitiveSizeInBits(); - ConstantInt* AddRHS = dyn_cast(UInst->getOperand(1)); - if (((APInt::getSignedMaxValue(oldBitSize) - IncrVal->getValue()) - - AddRHS->getValue()).sgt(LimitVal->getValue())) { - // We've determined this is (i+constant) and it won't overflow. - if (isa(UInst->use_begin())) { - SExtInst* oldSext = dyn_cast(UInst->use_begin()); - uint64_t truncSize = oldSext->getType()->getPrimitiveSizeInBits(); - Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, - L, oldSext->getType(), Rewriter); - APInt APnewAddRHS = APInt(AddRHS->getValue()).sext(newBitSize); - if (newBitSize > truncSize) - APnewAddRHS = APnewAddRHS.trunc(truncSize); - ConstantInt* newAddRHS =ConstantInt::get(APnewAddRHS); - Value *NewAdd = - BinaryOperator::CreateAdd(TruncIndVar, newAddRHS, - UInst->getName()+".nosex", UInst); - for (Value::use_iterator UI2 = UInst->use_begin(), - UE2 = UInst->use_end(); UI2 != UE2; ++UI2) { - Instruction *II = dyn_cast(UI2); - II->replaceAllUsesWith(NewAdd); - DeadInsts.insert(II); - } - DeadInsts.insert(UInst); - } - } - } - // Try for sext(i | constant). This is safe as long as the - // high bit of the constant is not set. - if (UInst && UInst->getOpcode()==Instruction::Or && - !UInst->use_empty() && - allUsesAreSameTyped(Instruction::SExt, UInst) && NoSignedWrap && - isa(UInst->getOperand(1))) { - ConstantInt* RHS = dyn_cast(UInst->getOperand(1)); - if (!RHS->getValue().isNegative()) { - uint64_t newBitSize = LargestType->getPrimitiveSizeInBits(); - SExtInst* oldSext = dyn_cast(UInst->use_begin()); - uint64_t truncSize = oldSext->getType()->getPrimitiveSizeInBits(); - Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, - L, oldSext->getType(), Rewriter); - APInt APnewOrRHS = APInt(RHS->getValue()).sext(newBitSize); - if (newBitSize > truncSize) - APnewOrRHS = APnewOrRHS.trunc(truncSize); - ConstantInt* newOrRHS =ConstantInt::get(APnewOrRHS); - Value *NewOr = - BinaryOperator::CreateOr(TruncIndVar, newOrRHS, - UInst->getName()+".nosex", UInst); - for (Value::use_iterator UI2 = UInst->use_begin(), - UE2 = UInst->use_end(); UI2 != UE2; ++UI2) { - Instruction *II = dyn_cast(UI2); - II->replaceAllUsesWith(NewOr); - DeadInsts.insert(II); - } - DeadInsts.insert(UInst); - } - } - // A zext of a signed variable known not to overflow is still safe. - if (UInst && isa(UInst) && (NoUnsignedWrap || NoSignedWrap)) { - Value *TruncIndVar = getZeroExtendedTruncVar(AR, SE, LargestType, L, - UInst->getType(), Rewriter); - UInst->replaceAllUsesWith(TruncIndVar); - DeadInsts.insert(UInst); - } - // If we have zext(i&constant), it's always safe to use the larger - // variable. This is not common but is a bottleneck in Openssl. - // (RHS doesn't have to be constant. There should be a better approach - // than bottom-up pattern matching for this...) - if (UInst && UInst->getOpcode()==Instruction::And && - !UInst->use_empty() && - allUsesAreSameTyped(Instruction::ZExt, UInst) && - isa(UInst->getOperand(1))) { - uint64_t newBitSize = LargestType->getPrimitiveSizeInBits(); - ConstantInt* AndRHS = dyn_cast(UInst->getOperand(1)); - ZExtInst* oldZext = dyn_cast(UInst->use_begin()); - uint64_t truncSize = oldZext->getType()->getPrimitiveSizeInBits(); - Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, - L, oldZext->getType(), Rewriter); - APInt APnewAndRHS = APInt(AndRHS->getValue()).zext(newBitSize); - if (newBitSize > truncSize) - APnewAndRHS = APnewAndRHS.trunc(truncSize); - ConstantInt* newAndRHS = ConstantInt::get(APnewAndRHS); - Value *NewAnd = - BinaryOperator::CreateAnd(TruncIndVar, newAndRHS, - UInst->getName()+".nozex", UInst); - for (Value::use_iterator UI2 = UInst->use_begin(), - UE2 = UInst->use_end(); UI2 != UE2; ++UI2) { - Instruction *II = dyn_cast(UI2); - II->replaceAllUsesWith(NewAnd); - DeadInsts.insert(II); - } - DeadInsts.insert(UInst); - } - // If we have zext((i+constant)&constant), we can use the larger - // variable even if the add does overflow. This works whenever the - // constant being ANDed is the same size as i, which it presumably is. - // We don't need to restrict the expression being and'ed to i+const, - // but we have to promote everything in it, so it's convenient. - // zext((i | constant)&constant) is also valid and accepted here. - if (UInst && (UInst->getOpcode()==Instruction::Add || - UInst->getOpcode()==Instruction::Or) && - UInst->hasOneUse() && - isa(UInst->getOperand(1))) { - uint64_t newBitSize = LargestType->getPrimitiveSizeInBits(); - ConstantInt* AddRHS = dyn_cast(UInst->getOperand(1)); - Instruction *UInst2 = dyn_cast(UInst->use_begin()); - if (UInst2 && UInst2->getOpcode() == Instruction::And && - !UInst2->use_empty() && - allUsesAreSameTyped(Instruction::ZExt, UInst2) && - isa(UInst2->getOperand(1))) { - ZExtInst* oldZext = dyn_cast(UInst2->use_begin()); - uint64_t truncSize = oldZext->getType()->getPrimitiveSizeInBits(); - Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, - L, oldZext->getType(), Rewriter); - ConstantInt* AndRHS = dyn_cast(UInst2->getOperand(1)); - APInt APnewAddRHS = APInt(AddRHS->getValue()).zext(newBitSize); - if (newBitSize > truncSize) - APnewAddRHS = APnewAddRHS.trunc(truncSize); - ConstantInt* newAddRHS = ConstantInt::get(APnewAddRHS); - Value *NewAdd = ((UInst->getOpcode()==Instruction::Add) ? - BinaryOperator::CreateAdd(TruncIndVar, newAddRHS, - UInst->getName()+".nozex", UInst2) : - BinaryOperator::CreateOr(TruncIndVar, newAddRHS, - UInst->getName()+".nozex", UInst2)); - APInt APcopy2 = APInt(AndRHS->getValue()); - ConstantInt* newAndRHS = ConstantInt::get(APcopy2.zext(newBitSize)); - Value *NewAnd = - BinaryOperator::CreateAnd(NewAdd, newAndRHS, - UInst->getName()+".nozex", UInst2); - for (Value::use_iterator UI2 = UInst2->use_begin(), - UE2 = UInst2->use_end(); UI2 != UE2; ++UI2) { - Instruction *II = dyn_cast(UI2); - II->replaceAllUsesWith(NewAnd); - DeadInsts.insert(II); - } - DeadInsts.insert(UInst); - DeadInsts.insert(UInst2); - } - } - } + // Rewrite IV-derived expressions. + RewriteIVExpressions(L, LargestType, Rewriter); - // Replace the old PHI Node with the inserted computation. - PN->replaceAllUsesWith(NewVal); - DeadInsts.insert(PN); - IndVars.pop_back(); - ++NumRemoved; - Changed = true; - } + // Loop-invariant instructions in the preheader that aren't used in the + // loop may be sunk below the loop to reduce register pressure. + SinkUnusedInvariants(L, Rewriter); - DeleteTriviallyDeadInstructions(DeadInsts); - assert(L->isLCSSAForm()); + // Reorder instructions to avoid use-before-def conditions. + FixUsesBeforeDefs(L, Rewriter); + + // For completeness, inform IVUsers of the IV use in the newly-created + // loop exit test instruction. + if (NewICmp) + IU->AddUsersIfInteresting(cast(NewICmp->getOperand(0))); + + // Clean up dead instructions. + DeleteDeadPHIs(L->getHeader()); + // Check a post-condition. + assert(L->isLCSSAForm() && "Indvars did not leave the loop in lcssa form!"); return Changed; } +void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, + SCEVExpander &Rewriter) { + SmallVector DeadInsts; + + // Rewrite all induction variable expressions in terms of the canonical + // induction variable. + // + // If there were induction variables of other sizes or offsets, manually + // add the offsets to the primary induction variable and cast, avoiding + // the need for the code evaluation methods to insert induction variables + // of different sizes. + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + SCEVHandle Stride = IU->StrideOrder[i]; + + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + ilist &List = SI->second->Users; + for (ilist::iterator UI = List.begin(), + E = List.end(); UI != E; ++UI) { + SCEVHandle Offset = UI->getOffset(); + Value *Op = UI->getOperandValToReplace(); + Instruction *User = UI->getUser(); + bool isSigned = UI->isSigned(); + + // Compute the final addrec to expand into code. + SCEVHandle AR = IU->getReplacementExpr(*UI); + + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!Stride->isLoopInvariant(L) && + !isa(AR) && + L->contains(User->getParent())) + continue; + + Value *NewVal = 0; + if (AR->isLoopInvariant(L)) { + BasicBlock::iterator I = Rewriter.getInsertionPoint(); + // Expand loop-invariant values in the loop preheader. They will + // be sunk to the exit block later, if possible. + NewVal = + Rewriter.expandCodeFor(AR, LargestType, + L->getLoopPreheader()->getTerminator()); + Rewriter.setInsertionPoint(I); + ++NumReplaced; + } else { + const Type *IVTy = Offset->getType(); + const Type *UseTy = Op->getType(); + + // Promote the Offset and Stride up to the canonical induction + // variable's bit width. + SCEVHandle PromotedOffset = Offset; + SCEVHandle PromotedStride = Stride; + if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) { + // It doesn't matter for correctness whether zero or sign extension + // is used here, since the value is truncated away below, but if the + // value is signed, sign extension is more likely to be folded. + if (isSigned) { + PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType); + PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType); + } else { + PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType); + // If the stride is obviously negative, use sign extension to + // produce things like x-1 instead of x+255. + if (isa(PromotedStride) && + cast(PromotedStride) + ->getValue()->getValue().isNegative()) + PromotedStride = SE->getSignExtendExpr(PromotedStride, + LargestType); + else + PromotedStride = SE->getZeroExtendExpr(PromotedStride, + LargestType); + } + } + + // Create the SCEV representing the offset from the canonical + // induction variable, still in the canonical induction variable's + // type, so that all expanded arithmetic is done in the same type. + SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType), + PromotedStride, L); + // Add the PromotedOffset as a separate step, because it may not be + // loop-invariant. + NewAR = SE->getAddExpr(NewAR, PromotedOffset); + + // Expand the addrec into instructions. + Value *V = Rewriter.expandCodeFor(NewAR, LargestType); + + // Insert an explicit cast if necessary to truncate the value + // down to the original stride type. This is done outside of + // SCEVExpander because in SCEV expressions, a truncate of an + // addrec is always folded. + if (LargestType != IVTy) { + if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) + NewAR = SE->getTruncateExpr(NewAR, IVTy); + if (Rewriter.isInsertedExpression(NewAR)) + V = Rewriter.expandCodeFor(NewAR, IVTy); + else { + V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false, + IVTy, false), + V, IVTy); + assert(!isa(V) && !isa(V) && + "LargestType wasn't actually the largest type!"); + // Force the rewriter to use this trunc whenever this addrec + // appears so that it doesn't insert new phi nodes or + // arithmetic in a different type. + Rewriter.addInsertedValue(V, NewAR); + } + } + + DOUT << "INDVARS: Made offset-and-trunc IV for offset " + << *IVTy << " " << *Offset << ": "; + DEBUG(WriteAsOperand(*DOUT, V, false)); + DOUT << "\n"; + + // Now expand it into actual Instructions and patch it into place. + NewVal = Rewriter.expandCodeFor(AR, UseTy); + } + + // Patch the new value into place. + if (Op->hasName()) + NewVal->takeName(Op); + User->replaceUsesOfWith(Op, NewVal); + UI->setOperandValToReplace(NewVal); + DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op + << " into = " << *NewVal << "\n"; + ++NumRemoved; + Changed = true; + + // The old value may be dead now. + DeadInsts.push_back(Op); + } + } + + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) { + Instruction *Inst = dyn_cast_or_null(DeadInsts.pop_back_val()); + if (Inst) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } +} + +/// If there's a single exit block, sink any loop-invariant values that +/// were defined in the preheader but not used inside the loop into the +/// exit block to reduce register pressure in the loop. +void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) { + BasicBlock *ExitBlock = L->getExitBlock(); + if (!ExitBlock) return; + + Instruction *NonPHI = ExitBlock->getFirstNonPHI(); + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock::iterator I = Preheader->getTerminator(); + while (I != Preheader->begin()) { + --I; + // New instructions were inserted at the end of the preheader. Only + // consider those new instructions. + if (!Rewriter.isInsertedInstruction(I)) + break; + // Determine if there is a use in or before the loop (direct or + // otherwise). + bool UsedInLoop = false; + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + BasicBlock *UseBB = cast(UI)->getParent(); + if (PHINode *P = dyn_cast(UI)) { + unsigned i = + PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); + UseBB = P->getIncomingBlock(i); + } + if (UseBB == Preheader || L->contains(UseBB)) { + UsedInLoop = true; + break; + } + } + // If there is, the def must remain in the preheader. + if (UsedInLoop) + continue; + // Otherwise, sink it to the exit block. + Instruction *ToMove = I; + bool Done = false; + if (I != Preheader->begin()) + --I; + else + Done = true; + ToMove->moveBefore(NonPHI); + if (Done) + break; + } +} + +/// Re-schedule the inserted instructions to put defs before uses. This +/// fixes problems that arrise when SCEV expressions contain loop-variant +/// values unrelated to the induction variable which are defined inside the +/// loop. FIXME: It would be better to insert instructions in the right +/// place so that this step isn't needed. +void IndVarSimplify::FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter) { + // Visit all the blocks in the loop in pre-order dom-tree dfs order. + DominatorTree *DT = &getAnalysis(); + std::map NumPredsLeft; + SmallVector Worklist; + Worklist.push_back(DT->getNode(L->getHeader())); + do { + DomTreeNode *Node = Worklist.pop_back_val(); + for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) + if (L->contains((*I)->getBlock())) + Worklist.push_back(*I); + BasicBlock *BB = Node->getBlock(); + // Visit all the instructions in the block top down. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Count the number of operands that aren't properly dominating. + unsigned NumPreds = 0; + if (Rewriter.isInsertedInstruction(I) && !isa(I)) + for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (Instruction *Inst = dyn_cast(OI)) + if (L->contains(Inst->getParent()) && !NumPredsLeft.count(Inst)) + ++NumPreds; + NumPredsLeft[I] = NumPreds; + // Notify uses of the position of this instruction, and move the + // users (and their dependents, recursively) into place after this + // instruction if it is their last outstanding operand. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + Instruction *Inst = cast(UI); + std::map::iterator Z = NumPredsLeft.find(Inst); + if (Z != NumPredsLeft.end() && Z->second != 0 && --Z->second == 0) { + SmallVector UseWorkList; + UseWorkList.push_back(Inst); + BasicBlock::iterator InsertPt = next(I); + while (isa(InsertPt)) ++InsertPt; + do { + Instruction *Use = UseWorkList.pop_back_val(); + Use->moveBefore(InsertPt); + NumPredsLeft.erase(Use); + for (Value::use_iterator IUI = Use->use_begin(), + IUE = Use->use_end(); IUI != IUE; ++IUI) { + Instruction *IUIInst = cast(IUI); + if (L->contains(IUIInst->getParent()) && + Rewriter.isInsertedInstruction(IUIInst) && + !isa(IUIInst)) + UseWorkList.push_back(IUIInst); + } + } while (!UseWorkList.empty()); + } + } + } + } while (!Worklist.empty()); +} + /// Return true if it is OK to use SIToFPInst for an inducation variable /// with given inital and exit values. static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV, @@ -933,8 +728,7 @@ static bool convertToInt(const APFloat &APF, uint64_t *intVal) { /// for(int i = 0; i < 10000; ++i) /// bar((double)i); /// -void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH, - SmallPtrSet &DeadInsts) { +void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; @@ -1041,25 +835,34 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH, ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(), EC->getParent()->getTerminator()); + // In the following deltions, PH may become dead and may be deleted. + // Use a WeakVH to observe whether this happens. + WeakVH WeakPH = PH; + // Delete old, floating point, exit comparision instruction. EC->replaceAllUsesWith(NewEC); - DeadInsts.insert(EC); + RecursivelyDeleteTriviallyDeadInstructions(EC); // Delete old, floating point, increment instruction. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - DeadInsts.insert(Incr); + RecursivelyDeleteTriviallyDeadInstructions(Incr); - // Replace floating induction variable. Give SIToFPInst preference over - // UIToFPInst because it is faster on platforms that are widely used. - if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) { - SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv", - PH->getParent()->getFirstNonPHI()); - PH->replaceAllUsesWith(Conv); - } else { - UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv", - PH->getParent()->getFirstNonPHI()); - PH->replaceAllUsesWith(Conv); + // Replace floating induction variable, if it isn't already deleted. + // Give SIToFPInst preference over UIToFPInst because it is faster on + // platforms that are widely used. + if (WeakPH && !PH->use_empty()) { + if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) { + SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv", + PH->getParent()->getFirstNonPHI()); + PH->replaceAllUsesWith(Conv); + } else { + UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv", + PH->getParent()->getFirstNonPHI()); + PH->replaceAllUsesWith(Conv); + } + RecursivelyDeleteTriviallyDeadInstructions(PH); } - DeadInsts.insert(PH); -} + // Add a new IVUsers entry for the newly-created integer PHI. + IU->AddUsersIfInteresting(NewPHI); +} diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 127ef56cbdbb..4f6d53179edb 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -20,6 +20,7 @@ #include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -53,40 +54,6 @@ namespace { struct BasedUser; - /// IVStrideUse - Keep track of one use of a strided induction variable, where - /// the stride is stored externally. The Offset member keeps track of the - /// offset from the IV, User is the actual user of the operand, and - /// 'OperandValToReplace' is the operand of the User that is the use. - struct VISIBILITY_HIDDEN IVStrideUse { - SCEVHandle Offset; - Instruction *User; - Value *OperandValToReplace; - - // isUseOfPostIncrementedValue - True if this should use the - // post-incremented version of this IV, not the preincremented version. - // This can only be set in special cases, such as the terminating setcc - // instruction for a loop or uses dominated by the loop. - bool isUseOfPostIncrementedValue; - - IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O) - : Offset(Offs), User(U), OperandValToReplace(O), - isUseOfPostIncrementedValue(false) {} - }; - - /// IVUsersOfOneStride - This structure keeps track of all instructions that - /// have an operand that is based on the trip count multiplied by some stride. - /// The stride for all of these users is common and kept external to this - /// structure. - struct VISIBILITY_HIDDEN IVUsersOfOneStride { - /// Users - Keep track of all of the users of this stride as well as the - /// initial value and the operand that uses the IV. - std::vector Users; - - void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) { - Users.push_back(IVStrideUse(Offset, User, Operand)); - } - }; - /// IVInfo - This structure keeps track of one IV expression inserted during /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as /// well as the PHI node and increment value created for rewrite. @@ -110,15 +77,12 @@ namespace { }; class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass { + IVUsers *IU; LoopInfo *LI; DominatorTree *DT; ScalarEvolution *SE; bool Changed; - /// IVUsesByStride - Keep track of all uses of induction variables that we - /// are interested in. The key of the map is the stride of the access. - std::map IVUsesByStride; - /// IVsByStride - Keep track of all IVs that have been inserted for a /// particular stride. std::map IVsByStride; @@ -127,14 +91,9 @@ namespace { /// reused (nor should they be rewritten to reuse other strides). SmallSet StrideNoReuse; - /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: - /// We use this to iterate over the IVUsesByStride collection without being - /// dependent on random ordering of pointers in the process. - SmallVector StrideOrder; - /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. - SmallVector DeadInsts; + SmallVector DeadInsts; /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. @@ -161,11 +120,11 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } private: - bool AddUsersIfInteresting(Instruction *I, Loop *L, - SmallPtrSet &Processed); ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse, const SCEVHandle* &CondStride); @@ -191,6 +150,8 @@ namespace { const std::vector& UsersToProcess); bool ValidScale(bool, int64_t, const std::vector& UsersToProcess); + bool ValidOffset(bool, int64_t, int64_t, + const std::vector& UsersToProcess); SCEVHandle CollectIVUsers(const SCEVHandle &Stride, IVUsersOfOneStride &Uses, Loop *L, @@ -242,21 +203,8 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { if (DeadInsts.empty()) return; - // Sort the deadinsts list so that we can trivially eliminate duplicates as we - // go. The code below never adds a non-dead instruction to the worklist, but - // callers may not be so careful. - array_pod_sort(DeadInsts.begin(), DeadInsts.end()); - - // Drop duplicate instructions and those with uses. - for (unsigned i = 0, e = DeadInsts.size()-1; i < e; ++i) { - Instruction *I = DeadInsts[i]; - if (!I->use_empty()) DeadInsts[i] = 0; - while (i != e && DeadInsts[i+1] == I) - DeadInsts[++i] = 0; - } - while (!DeadInsts.empty()) { - Instruction *I = DeadInsts.back(); + Instruction *I = dyn_cast_or_null(DeadInsts.back()); DeadInsts.pop_back(); if (I == 0 || !isInstructionTriviallyDead(I)) @@ -313,111 +261,6 @@ static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) { return false; } -/// getSCEVStartAndStride - Compute the start and stride of this expression, -/// returning false if the expression is not a start/stride pair, or true if it -/// is. The stride must be a loop invariant expression, but the start may be -/// a mix of loop invariant and loop variant expressions. The start cannot, -/// however, contain an AddRec from a different loop, unless that loop is an -/// outer loop of the current loop. -static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, - SCEVHandle &Start, SCEVHandle &Stride, - ScalarEvolution *SE, DominatorTree *DT) { - SCEVHandle TheAddRec = Start; // Initialize to zero. - - // If the outer level is an AddExpr, the operands are all start values except - // for a nested AddRecExpr. - if (const SCEVAddExpr *AE = dyn_cast(SH)) { - for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) - if (const SCEVAddRecExpr *AddRec = - dyn_cast(AE->getOperand(i))) { - if (AddRec->getLoop() == L) - TheAddRec = SE->getAddExpr(AddRec, TheAddRec); - else - return false; // Nested IV of some sort? - } else { - Start = SE->getAddExpr(Start, AE->getOperand(i)); - } - - } else if (isa(SH)) { - TheAddRec = SH; - } else { - return false; // not analyzable. - } - - const SCEVAddRecExpr *AddRec = dyn_cast(TheAddRec); - if (!AddRec || AddRec->getLoop() != L) return false; - - // FIXME: Generalize to non-affine IV's. - if (!AddRec->isAffine()) return false; - - // If Start contains an SCEVAddRecExpr from a different loop, other than an - // outer loop of the current loop, reject it. SCEV has no concept of - // operating on more than one loop at a time so don't confuse it with such - // expressions. - if (containsAddRecFromDifferentLoop(AddRec->getOperand(0), L)) - return false; - - Start = SE->getAddExpr(Start, AddRec->getOperand(0)); - - if (!isa(AddRec->getOperand(1))) { - // If stride is an instruction, make sure it dominates the loop preheader. - // Otherwise we could end up with a use before def situation. - BasicBlock *Preheader = L->getLoopPreheader(); - if (!AddRec->getOperand(1)->dominates(Preheader, DT)) - return false; - - DOUT << "[" << L->getHeader()->getName() - << "] Variable stride: " << *AddRec << "\n"; - } - - Stride = AddRec->getOperand(1); - return true; -} - -/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression -/// and now we need to decide whether the user should use the preinc or post-inc -/// value. If this user should use the post-inc version of the IV, return true. -/// -/// Choosing wrong here can break dominance properties (if we choose to use the -/// post-inc value when we cannot) or it can end up adding extra live-ranges to -/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we -/// should use the post-inc value). -static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, - Loop *L, DominatorTree *DT, Pass *P, - SmallVectorImpl &DeadInsts){ - // If the user is in the loop, use the preinc value. - if (L->contains(User->getParent())) return false; - - BasicBlock *LatchBlock = L->getLoopLatch(); - - // Ok, the user is outside of the loop. If it is dominated by the latch - // block, use the post-inc value. - if (DT->dominates(LatchBlock, User->getParent())) - return true; - - // There is one case we have to be careful of: PHI nodes. These little guys - // can live in blocks that do not dominate the latch block, but (since their - // uses occur in the predecessor block, not the block the PHI lives in) should - // still use the post-inc value. Check for this case now. - PHINode *PN = dyn_cast(User); - if (!PN) return false; // not a phi, not dominated by latch block. - - // Look at all of the uses of IV by the PHI node. If any use corresponds to - // a block that is not dominated by the latch block, give up and use the - // preincremented value. - unsigned NumUses = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == IV) { - ++NumUses; - if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) - return false; - } - - // Okay, all uses of IV by PN are in predecessor blocks that really are - // dominated by the latch block. Use the post-incremented value. - return true; -} - /// isAddressUse - Returns true if the specified instruction is using the /// specified value as an address. static bool isAddressUse(Instruction *Inst, Value *OperandVal) { @@ -467,90 +310,6 @@ static const Type *getAccessType(const Instruction *Inst) { return UseTy; } -/// AddUsersIfInteresting - Inspect the specified instruction. If it is a -/// reducible SCEV, recursively add its users to the IVUsesByStride set and -/// return true. Otherwise, return false. -bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, - SmallPtrSet &Processed) { - if (!SE->isSCEVable(I->getType())) - return false; // Void and FP expressions cannot be reduced. - - // LSR is not APInt clean, do not touch integers bigger than 64-bits. - if (SE->getTypeSizeInBits(I->getType()) > 64) - return false; - - if (!Processed.insert(I)) - return true; // Instruction already handled. - - // Get the symbolic expression for this instruction. - SCEVHandle ISE = SE->getSCEV(I); - if (isa(ISE)) return false; - - // Get the start and stride for this expression. - SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType()); - SCEVHandle Stride = Start; - if (!getSCEVStartAndStride(ISE, L, Start, Stride, SE, DT)) - return false; // Non-reducible symbolic expression, bail out. - - std::vector IUsers; - // Collect all I uses now because IVUseShouldUsePostIncValue may - // invalidate use_iterator. - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) - IUsers.push_back(cast(*UI)); - - for (unsigned iused_index = 0, iused_size = IUsers.size(); - iused_index != iused_size; ++iused_index) { - - Instruction *User = IUsers[iused_index]; - - // Do not infinitely recurse on PHI nodes. - if (isa(User) && Processed.count(User)) - continue; - - // Descend recursively, but not into PHI nodes outside the current loop. - // It's important to see the entire expression outside the loop to get - // choices that depend on addressing mode use right, although we won't - // consider references ouside the loop in all cases. - // If User is already in Processed, we don't want to recurse into it again, - // but do want to record a second reference in the same instruction. - bool AddUserToIVUsers = false; - if (LI->getLoopFor(User->getParent()) != L) { - if (isa(User) || Processed.count(User) || - !AddUsersIfInteresting(User, L, Processed)) { - DOUT << "FOUND USER in other loop: " << *User - << " OF SCEV: " << *ISE << "\n"; - AddUserToIVUsers = true; - } - } else if (Processed.count(User) || - !AddUsersIfInteresting(User, L, Processed)) { - DOUT << "FOUND USER: " << *User - << " OF SCEV: " << *ISE << "\n"; - AddUserToIVUsers = true; - } - - if (AddUserToIVUsers) { - IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride]; - if (StrideUses.Users.empty()) // First occurrence of this stride? - StrideOrder.push_back(Stride); - - // Okay, we found a user that we cannot reduce. Analyze the instruction - // and decide what to do with it. If we are a use inside of the loop, use - // the value before incrementation, otherwise use it after incrementation. - if (IVUseShouldUsePostIncValue(User, I, L, DT, this, DeadInsts)) { - // The value used will be incremented by the stride more than we are - // expecting, so subtract this off. - SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride); - StrideUses.addUser(NewStart, User, I); - StrideUses.Users.back().isUseOfPostIncrementedValue = true; - DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; - } else { - StrideUses.addUser(Start, User, I); - } - } - } - return true; -} - namespace { /// BasedUser - For a particular base value, keep information about how we've /// partitioned the expression so far. @@ -571,6 +330,13 @@ namespace { /// EmittedBase. Value *OperandValToReplace; + /// isSigned - The stride (and thus also the Base) of this use may be in + /// a narrower type than the use itself (OperandValToReplace->getType()). + /// When this is the case, the isSigned field indicates whether the + /// IV expression should be signed-extended instead of zero-extended to + /// fit the type of the use. + bool isSigned; + /// Imm - The immediate value that should be added to the base immediately /// before Inst, because it will be folded into the imm field of the /// instruction. This is also sometimes used for loop-variant values that @@ -589,10 +355,11 @@ namespace { bool isUseOfPostIncrementedValue; BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : SE(se), Base(IVSU.Offset), Inst(IVSU.User), - OperandValToReplace(IVSU.OperandValToReplace), + : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), + OperandValToReplace(IVSU.getOperandValToReplace()), + isSigned(IVSU.isSigned()), Imm(SE->getIntegerSCEV(0, Base->getType())), - isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {} + isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} // Once we rewrite the code to insert the new IVs we want, update the // operands of Inst to use the new expression 'NewBase', with 'Imm' added @@ -600,7 +367,7 @@ namespace { void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl &DeadInsts); + SmallVectorImpl &DeadInsts); Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, const Type *Ty, @@ -638,19 +405,27 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, InsertLoop = InsertLoop->getParentLoop(); } - Value *Base = Rewriter.expandCodeFor(NewBase, Ty, BaseInsertPt); + Value *Base = Rewriter.expandCodeFor(NewBase, NewBase->getType(), + BaseInsertPt); + + SCEVHandle NewValSCEV = SE->getUnknown(Base); // If there is no immediate value, skip the next part. - if (Imm->isZero()) - return Base; + if (!Imm->isZero()) { + // If we are inserting the base and imm values in the same block, make sure + // to adjust the IP position if insertion reused a result. + if (IP == BaseInsertPt) + IP = Rewriter.getInsertionPoint(); + + // Always emit the immediate (if non-zero) into the same block as the user. + NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); + } + + if (isSigned) + NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty); + else + NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty); - // If we are inserting the base and imm values in the same block, make sure to - // adjust the IP position if insertion reused a result. - if (IP == BaseInsertPt) - IP = Rewriter.getInsertionPoint(); - - // Always emit the immediate (if non-zero) into the same block as the user. - SCEVHandle NewValSCEV = SE->getAddExpr(SE->getUnknown(Base), Imm); return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); } @@ -664,7 +439,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl &DeadInsts){ + SmallVectorImpl &DeadInsts) { if (!isa(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; @@ -1158,6 +933,39 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, return true; } +/// ValidOffset - Check whether the given Offset is valid for all loads and +/// stores in UsersToProcess. +/// +bool LoopStrengthReduce::ValidOffset(bool HasBaseReg, + int64_t Offset, + int64_t Scale, + const std::vector& UsersToProcess) { + if (!TLI) + return true; + + for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { + // If this is a load or other access, pass the type of the access in. + const Type *AccessTy = Type::VoidTy; + if (isAddressUse(UsersToProcess[i].Inst, + UsersToProcess[i].OperandValToReplace)) + AccessTy = getAccessType(UsersToProcess[i].Inst); + else if (isa(UsersToProcess[i].Inst)) + continue; + + TargetLowering::AddrMode AM; + if (const SCEVConstant *SC = dyn_cast(UsersToProcess[i].Imm)) + AM.BaseOffs = SC->getValue()->getSExtValue(); + AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset; + AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero(); + AM.Scale = Scale; + + // If load[imm+r*scale] is illegal, bail out. + if (!TLI->isLegalAddressingMode(AM, AccessTy)) + return false; + } + return true; +} + /// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not /// a nop. bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, @@ -1196,10 +1004,10 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, if (const SCEVConstant *SC = dyn_cast(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); - for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e; - ++NewStride) { + for (unsigned NewStride = 0, e = IU->StrideOrder.size(); + NewStride != e; ++NewStride) { std::map::iterator SI = - IVsByStride.find(StrideOrder[NewStride]); + IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa(SI->first) || StrideNoReuse.count(SI->first)) continue; @@ -1215,24 +1023,44 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // multiplications. if (Scale == 1 || (AllUsesAreAddresses && - ValidScale(HasBaseReg, Scale, UsersToProcess))) + ValidScale(HasBaseReg, Scale, UsersToProcess))) { + // Prefer to reuse an IV with a base of zero. for (std::vector::iterator II = SI->second.IVs.begin(), IE = SI->second.IVs.end(); II != IE; ++II) - // FIXME: Only handle base == 0 for now. - // Only reuse previous IV if it would not require a type conversion. + // Only reuse previous IV if it would not require a type conversion + // and if the base difference can be folded. if (II->Base->isZero() && !RequiresTypeConversion(II->Base->getType(), Ty)) { IV = *II; return SE->getIntegerSCEV(Scale, Stride->getType()); } + // Otherwise, settle for an IV with a foldable base. + if (AllUsesAreAddresses) + for (std::vector::iterator II = SI->second.IVs.begin(), + IE = SI->second.IVs.end(); II != IE; ++II) + // Only reuse previous IV if it would not require a type conversion + // and if the base difference can be folded. + if (SE->getEffectiveSCEVType(II->Base->getType()) == + SE->getEffectiveSCEVType(Ty) && + isa(II->Base)) { + int64_t Base = + cast(II->Base)->getValue()->getSExtValue(); + if (Base > INT32_MIN && Base <= INT32_MAX && + ValidOffset(HasBaseReg, -Base * Scale, + Scale, UsersToProcess)) { + IV = *II; + return SE->getIntegerSCEV(Scale, Stride->getType()); + } + } + } } } else if (AllUsesAreOutsideLoop) { // Accept nonconstant strides here; it is really really right to substitute // an existing IV if we can. - for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e; - ++NewStride) { + for (unsigned NewStride = 0, e = IU->StrideOrder.size(); + NewStride != e; ++NewStride) { std::map::iterator SI = - IVsByStride.find(StrideOrder[NewStride]); + IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa(SI->first)) continue; int64_t SSInt = cast(SI->first)->getValue()->getSExtValue(); @@ -1249,10 +1077,10 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, } // Special case, old IV is -1*x and this one is x. Can treat this one as // -1*old. - for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e; - ++NewStride) { + for (unsigned NewStride = 0, e = IU->StrideOrder.size(); + NewStride != e; ++NewStride) { std::map::iterator SI = - IVsByStride.find(StrideOrder[NewStride]); + IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end()) continue; if (const SCEVMulExpr *ME = dyn_cast(SI->first)) @@ -1303,10 +1131,15 @@ SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride, bool &AllUsesAreAddresses, bool &AllUsesAreOutsideLoop, std::vector &UsersToProcess) { + // FIXME: Generalize to non-affine IV's. + if (!Stride->isLoopInvariant(L)) + return SE->getIntegerSCEV(0, Stride->getType()); + UsersToProcess.reserve(Uses.Users.size()); - for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) { - UsersToProcess.push_back(BasedUser(Uses.Users[i], SE)); - + for (ilist::iterator I = Uses.Users.begin(), + E = Uses.Users.end(); I != E; ++I) { + UsersToProcess.push_back(BasedUser(*I, SE)); + // Move any loop variant operands from the offset field to the immediate // field of the use, so that we don't try to use something before it is // computed. @@ -1404,7 +1237,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( // TODO: For now, don't do full strength reduction if there could // potentially be greater-stride multiples of the current stride // which could reuse the current stride IV. - if (StrideOrder.back() != Stride) + if (IU->StrideOrder.back() != Stride) return false; // Iterate through the uses to find conditions that automatically rule out @@ -1853,8 +1686,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp); - if (SE->getTypeSizeInBits(RewriteOp->getType()) != - SE->getTypeSizeInBits(ReplacedTy)) { + if (SE->getEffectiveSCEVType(RewriteOp->getType()) != + SE->getEffectiveSCEVType(ReplacedTy)) { assert(SE->getTypeSizeInBits(RewriteOp->getType()) > SE->getTypeSizeInBits(ReplacedTy) && "Unexpected widening cast!"); @@ -1884,8 +1717,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // it here. if (!ReuseIV.Base->isZero()) { SCEVHandle typedBase = ReuseIV.Base; - if (SE->getTypeSizeInBits(RewriteExpr->getType()) != - SE->getTypeSizeInBits(ReuseIV.Base->getType())) { + if (SE->getEffectiveSCEVType(RewriteExpr->getType()) != + SE->getEffectiveSCEVType(ReuseIV.Base->getType())) { // It's possible the original IV is a larger type than the new IV, // in which case we have to truncate the Base. We checked in // RequiresTypeConversion that this is valid. @@ -1929,7 +1762,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // Mark old value we replaced as possibly dead, so that it is eliminated // if we just replaced the last use of that value. - DeadInsts.push_back(cast(User.OperandValToReplace)); + DeadInsts.push_back(User.OperandValToReplace); UsersToProcess.pop_back(); ++NumReduced; @@ -1949,19 +1782,19 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, /// false. bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEVHandle *&CondStride) { - for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse; - ++Stride) { - std::map::iterator SI = - IVUsesByStride.find(StrideOrder[Stride]); - assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); - - for (std::vector::iterator UI = SI->second.Users.begin(), - E = SI->second.Users.end(); UI != E; ++UI) - if (UI->User == Cond) { + for (unsigned Stride = 0, e = IU->StrideOrder.size(); + Stride != e && !CondUse; ++Stride) { + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + + for (ilist::iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; ++UI) + if (UI->getUser() == Cond) { // NOTE: we could handle setcc instructions with multiple uses here, but // InstCombine does it as well for simple uses, it's not clear that it // occurs enough in real life to handle. - CondUse = &*UI; + CondUse = UI; CondStride = &SI->first; return true; } @@ -2022,9 +1855,18 @@ namespace { ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse, const SCEVHandle* &CondStride) { - if (StrideOrder.size() < 2 || - IVUsesByStride[*CondStride].Users.size() != 1) + // If there's only one stride in the loop, there's nothing to do here. + if (IU->StrideOrder.size() < 2) return Cond; + // If there are other users of the condition's stride, don't bother + // trying to change the condition because the stride will still + // remain. + std::map::iterator I = + IU->IVUsesByStride.find(*CondStride); + if (I == IU->IVUsesByStride.end() || + I->second->Users.size() != 1) + return Cond; + // Only handle constant strides for now. const SCEVConstant *SC = dyn_cast(*CondStride); if (!SC) return Cond; @@ -2051,9 +1893,9 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, return Cond; // Look for a suitable stride / iv as replacement. - for (unsigned i = 0, e = StrideOrder.size(); i != e; ++i) { - std::map::iterator SI = - IVUsesByStride.find(StrideOrder[i]); + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); if (!isa(SI->first)) continue; int64_t SSInt = cast(SI->first)->getValue()->getSExtValue(); @@ -2069,6 +1911,9 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // Check for overflow. if (!Mul.isSignedIntN(BitWidth)) continue; + // Check for overflow in the stride's type too. + if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType()))) + continue; // Watch out for overflow. if (ICmpInst::isSignedPredicate(Predicate) && @@ -2079,9 +1924,27 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, continue; // Pick the best iv to use trying to avoid a cast. NewCmpLHS = NULL; - for (std::vector::iterator UI = SI->second.Users.begin(), - E = SI->second.Users.end(); UI != E; ++UI) { - NewCmpLHS = UI->OperandValToReplace; + for (ilist::iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; ++UI) { + Value *Op = UI->getOperandValToReplace(); + + // If the IVStrideUse implies a cast, check for an actual cast which + // can be used to find the original IV expression. + if (SE->getEffectiveSCEVType(Op->getType()) != + SE->getEffectiveSCEVType(SI->first->getType())) { + CastInst *CI = dyn_cast(Op); + // If it's not a simple cast, it's complicated. + if (!CI) + continue; + // If it's a cast from a type other than the stride type, + // it's complicated. + if (CI->getOperand(0)->getType() != SI->first->getType()) + continue; + // Ok, we found the IV expression in the stride's type. + Op = CI->getOperand(0); + } + + NewCmpLHS = Op; if (NewCmpLHS->getType() == CmpTy) break; } @@ -2105,13 +1968,13 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // Don't rewrite if use offset is non-constant and the new type is // of a different type. // FIXME: too conservative? - if (NewTyBits != TyBits && !isa(CondUse->Offset)) + if (NewTyBits != TyBits && !isa(CondUse->getOffset())) continue; bool AllUsesAreAddresses = true; bool AllUsesAreOutsideLoop = true; std::vector UsersToProcess; - SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L, + SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -2127,7 +1990,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (Scale < 0 && !Cond->isEquality()) Predicate = ICmpInst::getSwappedPredicate(Predicate); - NewStride = &StrideOrder[i]; + NewStride = &IU->StrideOrder[i]; if (!isa(NewCmpTy)) NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); else { @@ -2135,10 +1998,11 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy); } NewOffset = TyBits == NewTyBits - ? SE->getMulExpr(CondUse->Offset, + ? SE->getMulExpr(CondUse->getOffset(), SE->getConstant(ConstantInt::get(CmpTy, Scale))) : SE->getConstant(ConstantInt::get(NewCmpIntTy, - cast(CondUse->Offset)->getValue()->getSExtValue()*Scale)); + cast(CondUse->getOffset())->getValue() + ->getSExtValue()*Scale)); break; } } @@ -2165,13 +2029,12 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, OldCond); // Remove the old compare instruction. The old indvar is probably dead too. - DeadInsts.push_back(cast(CondUse->OperandValToReplace)); + DeadInsts.push_back(CondUse->getOperandValToReplace()); OldCond->replaceAllUsesWith(Cond); OldCond->eraseFromParent(); - IVUsesByStride[*CondStride].Users.pop_back(); - IVUsesByStride[*NewStride].addUser(NewOffset, Cond, NewCmpLHS); - CondUse = &IVUsesByStride[*NewStride].Users.back(); + IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false); + CondUse = &IU->IVUsesByStride[*NewStride]->Users.back(); CondStride = NewStride; ++NumEliminated; Changed = true; @@ -2287,12 +2150,12 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, // Delete the max calculation instructions. Cond->replaceAllUsesWith(NewCond); - Cond->eraseFromParent(); + CondUse->setUser(NewCond); Instruction *Cmp = cast(Sel->getOperand(0)); + Cond->eraseFromParent(); Sel->eraseFromParent(); if (Cmp->use_empty()) Cmp->eraseFromParent(); - CondUse->User = NewCond; return NewCond; } @@ -2304,19 +2167,19 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { if (isa(BackedgeTakenCount)) return; - for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; + for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { - std::map::iterator SI = - IVUsesByStride.find(StrideOrder[Stride]); - assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); if (!isa(SI->first)) continue; - for (std::vector::iterator UI = SI->second.Users.begin(), - E = SI->second.Users.end(); UI != E; /* empty */) { - std::vector::iterator CandidateUI = UI; + for (ilist::iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; /* empty */) { + ilist::iterator CandidateUI = UI; ++UI; - Instruction *ShadowUse = CandidateUI->User; + Instruction *ShadowUse = CandidateUI->getUser(); const Type *DestTy = NULL; /* If shadow use is a int->float cast then insert a second IV @@ -2331,9 +2194,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { for (unsigned i = 0; i < n; ++i, ++d) foo(d); */ - if (UIToFPInst *UCast = dyn_cast(CandidateUI->User)) + if (UIToFPInst *UCast = dyn_cast(CandidateUI->getUser())) DestTy = UCast->getDestTy(); - else if (SIToFPInst *SCast = dyn_cast(CandidateUI->User)) + else if (SIToFPInst *SCast = dyn_cast(CandidateUI->getUser())) DestTy = SCast->getDestTy(); if (!DestTy) continue; @@ -2400,7 +2263,6 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { /* Remove cast operation */ ShadowUse->replaceAllUsesWith(NewPH); ShadowUse->eraseFromParent(); - SI->second.Users.erase(CandidateUI); NumShadow++; break; } @@ -2450,11 +2312,12 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // transform the icmp to use post-inc iv. Otherwise do so only if it would // not reuse another iv and its iv would be reused by other uses. We are // optimizing for the case where the icmp is the only use of the iv. - IVUsersOfOneStride &StrideUses = IVUsesByStride[*CondStride]; - for (unsigned i = 0, e = StrideUses.Users.size(); i != e; ++i) { - if (StrideUses.Users[i].User == Cond) + IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride]; + for (ilist::iterator I = StrideUses.Users.begin(), + E = StrideUses.Users.end(); I != E; ++I) { + if (I->getUser() == Cond) continue; - if (!StrideUses.Users[i].isUseOfPostIncrementedValue) + if (!I->isUseOfPostIncrementedValue()) return; } @@ -2463,10 +2326,10 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // StrengthReduceStridedIVUsers? if (const SCEVConstant *SC = dyn_cast(*CondStride)) { int64_t SInt = SC->getValue()->getSExtValue(); - for (unsigned NewStride = 0, ee = StrideOrder.size(); NewStride != ee; + for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee; ++NewStride) { - std::map::iterator SI = - IVUsesByStride.find(StrideOrder[NewStride]); + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); if (!isa(SI->first) || SI->first == *CondStride) continue; int64_t SSInt = @@ -2479,7 +2342,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { bool AllUsesAreAddresses = true; bool AllUsesAreOutsideLoop = true; std::vector UsersToProcess; - SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L, + SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -2518,17 +2381,18 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { LatchBlock->getInstList().insert(TermBr, Cond); // Clone the IVUse, as the old use still exists! - IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond, - CondUse->OperandValToReplace); - CondUse = &IVUsesByStride[*CondStride].Users.back(); + IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond, + CondUse->getOperandValToReplace(), + false); + CondUse = &IU->IVUsesByStride[*CondStride]->Users.back(); } } // If we get to here, we know that we can transform the setcc instruction to // use the post-incremented version of the IV, allowing us to coalesce the // live ranges for the IV correctly. - CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride); - CondUse->isUseOfPostIncrementedValue = true; + CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride)); + CondUse->setIsUseOfPostIncrementedValue(true); Changed = true; ++NumLoopCond; @@ -2644,19 +2508,13 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { + IU = &getAnalysis(); LI = &getAnalysis(); DT = &getAnalysis(); SE = &getAnalysis(); Changed = false; - // Find all uses of induction variables in this loop, and categorize - // them by stride. Start by finding all of the PHI nodes in the header for - // this loop. If they are induction variables, inspect their uses. - SmallPtrSet Processed; // Don't reprocess instructions. - for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) - AddUsersIfInteresting(I, L, Processed); - - if (!IVUsesByStride.empty()) { + if (!IU->IVUsesByStride.empty()) { #ifndef NDEBUG DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart() << "\" "; @@ -2664,7 +2522,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { #endif // Sort the StrideOrder so we process larger strides first. - std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare(SE)); + std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(), + StrideCompare(SE)); // Optimize induction variables. Some indvar uses can be transformed to use // strides that will be needed for other purposes. A common example of this @@ -2695,11 +2554,15 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // Also, note that we iterate over IVUsesByStride indirectly by using // StrideOrder. This extra layer of indirection makes the ordering of // strides deterministic - not dependent on map order. - for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { - std::map::iterator SI = - IVUsesByStride.find(StrideOrder[Stride]); - assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); - StrengthReduceStridedIVUsers(SI->first, SI->second, L); + for (unsigned Stride = 0, e = IU->StrideOrder.size(); + Stride != e; ++Stride) { + std::map::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + // FIXME: Generalize to non-affine IV's. + if (!SI->first->isLoopInvariant(L)) + continue; + StrengthReduceStridedIVUsers(SI->first, *SI->second, L); } } @@ -2708,9 +2571,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { OptimizeLoopCountIV(L); // We're done analyzing this loop; release all the state we built up for it. - IVUsesByStride.clear(); IVsByStride.clear(); - StrideOrder.clear(); StrideNoReuse.clear(); // Clean up after ourselves diff --git a/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll b/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll index d11b025d5890..275febaf7dca 100644 --- a/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -1,10 +1,11 @@ ; RUN: llvm-as < %s | llc -march=x86-64 -f -o %t ; RUN: grep inc %t | count 1 ; RUN: grep dec %t | count 2 -; RUN: grep addq %t | count 13 -; RUN: grep leaq %t | count 8 -; RUN: grep leal %t | count 4 -; RUN: grep movq %t | count 5 +; RUN: grep addq %t | count 8 +; RUN: grep addb %t | count 2 +; RUN: grep leaq %t | count 12 +; RUN: grep leal %t | count 2 +; RUN: grep movq %t | count 4 ; IV users in each of the loops from other loops shouldn't cause LSR ; to insert new induction variables. Previously it would create a diff --git a/llvm/test/CodeGen/X86/masked-iv-safe.ll b/llvm/test/CodeGen/X86/masked-iv-safe.ll index e9b80a4c42c3..e10253516ba5 100644 --- a/llvm/test/CodeGen/X86/masked-iv-safe.ll +++ b/llvm/test/CodeGen/X86/masked-iv-safe.ll @@ -3,14 +3,13 @@ ; RUN: not grep movz %t ; RUN: not grep sar %t ; RUN: not grep shl %t -; RUN: grep add %t | count 6 -; RUN: grep inc %t | count 2 -; RUN: grep dec %t | count 4 +; RUN: grep add %t | count 2 +; RUN: grep inc %t | count 4 +; RUN: grep dec %t | count 2 ; RUN: grep lea %t | count 2 ; Optimize away zext-inreg and sext-inreg on the loop induction ; variable using trip-count information. -; Also, the loop-reversal algorithm kicks in twice. define void @count_up(double* %d, i64 %n) nounwind { entry: diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-5.ll b/llvm/test/CodeGen/X86/subreg-to-reg-5.ll index eee751a87d53..81b262ace84d 100644 --- a/llvm/test/CodeGen/X86/subreg-to-reg-5.ll +++ b/llvm/test/CodeGen/X86/subreg-to-reg-5.ll @@ -8,7 +8,8 @@ entry: bb2: ; preds = %bb3, %entry %B_addr.0.rec = phi i64 [ %indvar.next154, %bb3 ], [ 0, %entry ] ; [#uses=2] - br i1 false, label %bb3, label %bb4 + %z = icmp slt i64 %B_addr.0.rec, 20000 + br i1 %z, label %bb3, label %bb4 bb3: ; preds = %bb2 %indvar.next154 = add i64 %B_addr.0.rec, 1 ; [#uses=1] @@ -17,7 +18,7 @@ bb3: ; preds = %bb2 bb4: ; preds = %bb2 %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec ; [#uses=1] %t1 = ptrtoint float* %B_addr.0 to i64 ; [#uses=1] - %t2 = and i64 %t1, 15 ; [#uses=1] + %t2 = and i64 %t1, 4294967295 ; [#uses=1] %t3 = icmp eq i64 %t2, 0 ; [#uses=1] br i1 %t3, label %bb5, label %bb10.preheader @@ -25,7 +26,7 @@ bb10.preheader: ; preds = %bb4 br label %bb9 bb5: ; preds = %bb4 - unreachable + ret float 7.0 bb9: ; preds = %bb10.preheader %t5 = getelementptr float* %B, i64 0 ; [#uses=1] diff --git a/llvm/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll b/llvm/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll index 5cc595e00957..4d26803b437d 100644 --- a/llvm/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll +++ b/llvm/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll @@ -1,5 +1,4 @@ -; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep {sext} -; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep {zext} +; RUN: llvm-as < %s | opt -indvars -instcombine | llvm-dis | not grep {\[sz\]ext} ; ModuleID = '' ;extern int *a, *b, *c, *d, *e, *f; /* 64 bit */ ;extern int K[256]; diff --git a/llvm/test/Transforms/IndVarSimplify/ada-loops.ll b/llvm/test/Transforms/IndVarSimplify/ada-loops.ll new file mode 100644 index 000000000000..56325b36cbbe --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/ada-loops.ll @@ -0,0 +1,90 @@ +; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t +; RUN: grep phi %t | count 4 +; RUN: grep {= phi i32} %t | count 4 +; RUN: not grep {sext i} %t +; RUN: not grep {zext i} %t +; RUN: not grep {trunc i} %t +; RUN: not grep {add i8} %t +; PR1301 + +; Do a bunch of analysis and prove that the loops can use an i32 trip +; count without casting. + +; ModuleID = 'ada.bc' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "i686-pc-linux-gnu" + +define void @kinds__sbytezero([256 x i32]* nocapture %a) nounwind { +bb.thread: + %tmp46 = getelementptr [256 x i32]* %a, i32 0, i32 0 ; [#uses=1] + store i32 0, i32* %tmp46 + br label %bb + +bb: ; preds = %bb, %bb.thread + %i.0.reg2mem.0 = phi i8 [ -128, %bb.thread ], [ %tmp8, %bb ] ; [#uses=1] + %tmp8 = add i8 %i.0.reg2mem.0, 1 ; [#uses=3] + %tmp1 = sext i8 %tmp8 to i32 ; [#uses=1] + %tmp3 = add i32 %tmp1, 128 ; [#uses=1] + %tmp4 = getelementptr [256 x i32]* %a, i32 0, i32 %tmp3 ; [#uses=1] + store i32 0, i32* %tmp4 + %0 = icmp eq i8 %tmp8, 127 ; [#uses=1] + br i1 %0, label %return, label %bb + +return: ; preds = %bb + ret void +} + +define void @kinds__ubytezero([256 x i32]* nocapture %a) nounwind { +bb.thread: + %tmp35 = getelementptr [256 x i32]* %a, i32 0, i32 0 ; [#uses=1] + store i32 0, i32* %tmp35 + br label %bb + +bb: ; preds = %bb, %bb.thread + %i.0.reg2mem.0 = phi i8 [ 0, %bb.thread ], [ %tmp7, %bb ] ; [#uses=1] + %tmp7 = add i8 %i.0.reg2mem.0, 1 ; [#uses=3] + %tmp1 = zext i8 %tmp7 to i32 ; [#uses=1] + %tmp3 = getelementptr [256 x i32]* %a, i32 0, i32 %tmp1 ; [#uses=1] + store i32 0, i32* %tmp3 + %0 = icmp eq i8 %tmp7, -1 ; [#uses=1] + br i1 %0, label %return, label %bb + +return: ; preds = %bb + ret void +} + +define void @kinds__srangezero([21 x i32]* nocapture %a) nounwind { +bb.thread: + br label %bb + +bb: ; preds = %bb, %bb.thread + %i.0.reg2mem.0 = phi i8 [ -10, %bb.thread ], [ %tmp7, %bb ] ; [#uses=2] + %tmp12 = sext i8 %i.0.reg2mem.0 to i32 ; [#uses=1] + %tmp4 = add i32 %tmp12, 10 ; [#uses=1] + %tmp5 = getelementptr [21 x i32]* %a, i32 0, i32 %tmp4 ; [#uses=1] + store i32 0, i32* %tmp5 + %tmp7 = add i8 %i.0.reg2mem.0, 1 ; [#uses=2] + %0 = icmp sgt i8 %tmp7, 10 ; [#uses=1] + br i1 %0, label %return, label %bb + +return: ; preds = %bb + ret void +} + +define void @kinds__urangezero([21 x i32]* nocapture %a) nounwind { +bb.thread: + br label %bb + +bb: ; preds = %bb, %bb.thread + %i.0.reg2mem.0 = phi i8 [ 10, %bb.thread ], [ %tmp7, %bb ] ; [#uses=2] + %tmp12 = sext i8 %i.0.reg2mem.0 to i32 ; [#uses=1] + %tmp4 = add i32 %tmp12, -10 ; [#uses=1] + %tmp5 = getelementptr [21 x i32]* %a, i32 0, i32 %tmp4 ; [#uses=1] + store i32 0, i32* %tmp5 + %tmp7 = add i8 %i.0.reg2mem.0, 1 ; [#uses=2] + %0 = icmp sgt i8 %tmp7, 30 ; [#uses=1] + br i1 %0, label %return, label %bb + +return: ; preds = %bb + ret void +} diff --git a/llvm/test/Transforms/IndVarSimplify/iv-zext.ll b/llvm/test/Transforms/IndVarSimplify/iv-zext.ll new file mode 100644 index 000000000000..76d48de0d3c5 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/iv-zext.ll @@ -0,0 +1,33 @@ +; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t +; RUN: not grep and %t +; RUN: not grep zext %t + +target datalayout = "-p:64:64:64" + +define void @foo(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} diff --git a/llvm/test/Transforms/IndVarSimplify/loop_evaluate_6.ll b/llvm/test/Transforms/IndVarSimplify/loop_evaluate_6.ll new file mode 100644 index 000000000000..35fbf52b7ef0 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/loop_evaluate_6.ll @@ -0,0 +1,29 @@ +; RUN: llvm-as < %s | opt -indvars -loop-deletion | llvm-dis | grep phi | count 1 + +; Indvars should be able to evaluate this loop, allowing loop deletion +; to delete it. + +define i32 @test(i32 %x_offs) nounwind readnone { +entry: + %0 = icmp sgt i32 %x_offs, 4 ; [#uses=1] + br i1 %0, label %bb.nph, label %bb2 + +bb.nph: ; preds = %entry + br label %bb + +bb: ; preds = %bb1, %bb.nph + %x_offs_addr.01 = phi i32 [ %1, %bb1 ], [ %x_offs, %bb.nph ] ; [#uses=1] + %1 = add i32 %x_offs_addr.01, -4 ; [#uses=3] + br label %bb1 + +bb1: ; preds = %bb + %2 = icmp sgt i32 %1, 4 ; [#uses=1] + br i1 %2, label %bb, label %bb1.bb2_crit_edge + +bb1.bb2_crit_edge: ; preds = %bb1 + br label %bb2 + +bb2: ; preds = %bb1.bb2_crit_edge, %entry + %x_offs_addr.0.lcssa = phi i32 [ %1, %bb1.bb2_crit_edge ], [ %x_offs, %entry ] ; [#uses=1] + ret i32 %x_offs_addr.0.lcssa +} diff --git a/llvm/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll b/llvm/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll index 153a1811e63b..f873b3d73e2c 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep mul | count 3 +; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep {mul.*%lsr.iv} | count 2 ; The multiply in bb2 must not be reduced to an add, as the sext causes the ; %1 argument to become negative after a while. ; ModuleID = ''