diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 24ef17279417..43247ea9b76b 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/PredIteratorCache.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" @@ -187,7 +188,8 @@ namespace { void PromoteAliasSet(AliasSet &AS, SmallVectorImpl &ExitBlocks, - SmallVectorImpl &InsertPts); + SmallVectorImpl &InsertPts, + PredIteratorCache &PIC); }; } @@ -222,6 +224,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { TD = getAnalysisIfAvailable(); TLI = &getAnalysis(); + assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); + CurAST = new AliasSetTracker(*AA); // Collect Alias info from subloops. for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); @@ -284,11 +288,12 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { if (!DisablePromotion && (Preheader || L->hasDedicatedExits())) { SmallVector ExitBlocks; SmallVector InsertPts; + PredIteratorCache PIC; // Loop over all of the alias sets in the tracker object. for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); I != E; ++I) - PromoteAliasSet(*I, ExitBlocks, InsertPts); + PromoteAliasSet(*I, ExitBlocks, InsertPts, PIC); // Once we have promoted values across the loop body we have to recursively // reform LCSSA as any nested loop may now have values defined within the @@ -298,19 +303,14 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // it as it went. if (Changed) formLCSSARecursively(*L, *DT, getAnalysisIfAvailable()); - - } else if (Changed) { - // If we have successfully changed the loop but not used SSAUpdater to - // re-write instructions throughout the loop body, re-form LCSSA just for - // this loop. - formLCSSA(*L, *DT, getAnalysisIfAvailable()); } - // Regardless of how we changed the loop, reform LCSSA on its parent as - // hoisting or sinking could have disrupted it. - if (Changed) - if (Loop *ParentL = L->getParentLoop()) - formLCSSA(*ParentL, *DT, getAnalysisIfAvailable()); + // Check that neither this loop nor its parent have had LCSSA broken. LICM is + // specifically moving instructions across the loop boundary and so it is + // especially in need of sanity checking here. + assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); + assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && + "Parent loop not left in LCSSA form after LICM!"); // Clear out loops state information for the next iteration CurLoop = 0; @@ -528,22 +528,6 @@ bool LICM::isNotUsedInLoop(Instruction &I) { return true; } -static BasicBlock::iterator -replaceTrivialPHIsAndGetInsertionPt(BasicBlock &BB, Instruction &I) { - BasicBlock::iterator II = BB.begin(); - while (PHINode *PN = dyn_cast(II)) { - ++II; - if (isTriviallyReplacablePHI(*PN, I)) { - PN->replaceAllUsesWith(&I); - PN->eraseFromParent(); - } - } - if (isa(II)) - ++II; - - return II; -} - /// sink - When an instruction is found to only be used outside of the loop, /// this function moves it to the exit blocks and patches up SSA form as needed. /// This method is guaranteed to remove the original instruction from its @@ -552,126 +536,59 @@ replaceTrivialPHIsAndGetInsertionPt(BasicBlock &BB, Instruction &I) { void LICM::sink(Instruction &I) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); - SmallVector ExitBlocks; - CurLoop->getUniqueExitBlocks(ExitBlocks); - if (isa(I)) ++NumMovedLoads; else if (isa(I)) ++NumMovedCalls; ++NumSunk; Changed = true; - // The case where there is only a single exit node of this loop is common - // enough that we handle it as a special (more efficient) case. It is more - // efficient to handle because there are no PHI nodes that need to be placed. - if (ExitBlocks.size() == 1) { - if (!DT->dominates(I.getParent(), ExitBlocks[0])) { - // Instruction is not used, just delete it. - CurAST->deleteValue(&I); - // If I has users in unreachable blocks, eliminate. - // If I is not void type then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!I.use_empty()) - I.replaceAllUsesWith(UndefValue::get(I.getType())); - I.eraseFromParent(); - } else { - // Look for any LCSSA PHI nodes for this instruction in the exit blocks - // and replace them. - BasicBlock::iterator II = - replaceTrivialPHIsAndGetInsertionPt(*ExitBlocks[0], I); +#ifndef NDEBUG + SmallVector ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + SmallPtrSet ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); +#endif - // Move the instruction to the start of the exit block, after any PHI - // nodes in it. - I.moveBefore(II); + // If this instruction is only used outside of the loop, then all users are + // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of + // the instruction. + while (!I.use_empty()) { + // The user must be a PHI node. + PHINode *PN = cast(I.use_back()); - // This instruction is no longer in the AST for the current loop, because - // we just sunk it out of the loop. If we just sunk it into an outer - // loop, we will rediscover the operation when we process it. - CurAST->deleteValue(&I); - } - return; + BasicBlock *ExitBlock = PN->getParent(); + assert(ExitBlockSet.count(ExitBlock) && + "The LCSSA PHI is not in an exit block!"); + + Instruction *New = I.clone(); + ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(), New); + if (!I.getName().empty()) + New->setName(I.getName() + ".le"); + + // Build LCSSA PHI nodes for any in-loop operands. Note that this is + // particularly cheap because we can rip off the PHI node that we're + // replacing for the number and blocks of the predecessors. + // OPT: If this shows up in a profile, we can instead finish sinking all + // invariant instructions, and then walk their operands to re-establish + // LCSSA. That will eliminate creating PHI nodes just to nuke them when + // sinking bottom-up. + for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; + ++OI) + if (Instruction *OInst = dyn_cast(*OI)) + if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) + if (!OLoop->contains(PN)) { + PHINode *OpPN = PHINode::Create( + OInst->getType(), PN->getNumIncomingValues(), + OInst->getName() + ".lcssa", ExitBlock->begin()); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + OpPN->addIncoming(OInst, PN->getIncomingBlock(i)); + *OI = OpPN; + } + + PN->replaceAllUsesWith(New); + PN->eraseFromParent(); } - if (ExitBlocks.empty()) { - // The instruction is actually dead if there ARE NO exit blocks. - CurAST->deleteValue(&I); - // If I has users in unreachable blocks, eliminate. - // If I is not void type then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!I.use_empty()) - I.replaceAllUsesWith(UndefValue::get(I.getType())); - I.eraseFromParent(); - return; - } - - // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the - // hard work of inserting PHI nodes as necessary. - SmallVector NewPHIs; - SSAUpdater SSA(&NewPHIs); - - if (!I.use_empty()) - SSA.Initialize(I.getType(), I.getName()); - - // Insert a copy of the instruction in each exit block of the loop that is - // dominated by the instruction. Each exit block is known to only be in the - // ExitBlocks list once. - BasicBlock *InstOrigBB = I.getParent(); - unsigned NumInserted = 0; - - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - BasicBlock *ExitBlock = ExitBlocks[i]; - - if (!DT->dominates(InstOrigBB, ExitBlock)) - continue; - - // Look for any LCSSA PHI nodes for this instruction in the exit blocks - // and replace them. Then get the insertion point after the last PHI. - BasicBlock::iterator InsertPt = - replaceTrivialPHIsAndGetInsertionPt(*ExitBlock, I); - - // If this is the first exit block processed, just move the original - // instruction, otherwise clone the original instruction and insert - // the copy. - Instruction *New; - if (NumInserted++ == 0) { - I.moveBefore(InsertPt); - New = &I; - } else { - New = I.clone(); - if (!I.getName().empty()) - New->setName(I.getName()+".le"); - ExitBlock->getInstList().insert(InsertPt, New); - } - - // Now that we have inserted the instruction, inform SSAUpdater. - if (!I.use_empty()) - SSA.AddAvailableValue(ExitBlock, New); - } - - // If the instruction doesn't dominate any exit blocks, it must be dead. - if (NumInserted == 0) { - CurAST->deleteValue(&I); - if (!I.use_empty()) - I.replaceAllUsesWith(UndefValue::get(I.getType())); - I.eraseFromParent(); - return; - } - - // Next, rewrite uses of the instruction, inserting PHI nodes as needed. - for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { - // Grab the use before incrementing the iterator. - Use &U = UI.getUse(); - // Increment the iterator before removing the use from the list. - ++UI; - SSA.RewriteUseAfterInsertions(U); - } - - // Update CurAST for NewPHIs if I had pointer type. - if (I.getType()->isPointerTy()) - for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) - CurAST->copyValue(&I, NewPHIs[i]); - - // Finally, remove the instruction from CurAST. It is no longer in the loop. CurAST->deleteValue(&I); + I.eraseFromParent(); } /// hoist - When an instruction is found to only use loop invariant operands @@ -742,21 +659,39 @@ namespace { SmallPtrSet &PointerMustAliases; SmallVectorImpl &LoopExitBlocks; SmallVectorImpl &LoopInsertPts; + PredIteratorCache &PredCache; AliasSetTracker &AST; + LoopInfo &LI; DebugLoc DL; int Alignment; MDNode *TBAATag; + + Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { + if (Instruction *I = dyn_cast(V)) + if (Loop *L = LI.getLoopFor(I->getParent())) + if (!L->contains(BB)) { + // We need to create an LCSSA PHI node for the incoming value and + // store that. + PHINode *PN = PHINode::Create( + I->getType(), PredCache.GetNumPreds(BB), + I->getName() + ".lcssa", BB->begin()); + for (BasicBlock **PI = PredCache.GetPreds(BB); *PI; ++PI) + PN->addIncoming(I, *PI); + return PN; + } + return V; + } + public: - LoopPromoter(Value *SP, - const SmallVectorImpl &Insts, SSAUpdater &S, - SmallPtrSet &PMA, - SmallVectorImpl &LEB, - SmallVectorImpl &LIP, - AliasSetTracker &ast, DebugLoc dl, int alignment, + LoopPromoter(Value *SP, const SmallVectorImpl &Insts, + SSAUpdater &S, SmallPtrSet &PMA, + SmallVectorImpl &LEB, + SmallVectorImpl &LIP, PredIteratorCache &PIC, + AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, MDNode *TBAATag) - : LoadAndStorePromoter(Insts, S), SomePtr(SP), - PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), - AST(ast), DL(dl), Alignment(alignment), TBAATag(TBAATag) {} + : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), + LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), + LI(li), DL(dl), Alignment(alignment), TBAATag(TBAATag) {} virtual bool isInstInList(Instruction *I, const SmallVectorImpl &) const { @@ -776,8 +711,10 @@ namespace { for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); + LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); + Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); Instruction *InsertPos = LoopInsertPts[i]; - StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); + StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); if (TBAATag) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); @@ -801,7 +738,8 @@ namespace { /// void LICM::PromoteAliasSet(AliasSet &AS, SmallVectorImpl &ExitBlocks, - SmallVectorImpl &InsertPts) { + SmallVectorImpl &InsertPts, + PredIteratorCache &PIC) { // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. @@ -933,7 +871,7 @@ void LICM::PromoteAliasSet(AliasSet &AS, SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - InsertPts, *CurAST, DL, Alignment, TBAATag); + InsertPts, PIC, *CurAST, *LI, DL, Alignment, TBAATag); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. diff --git a/llvm/test/Transforms/LICM/sinking.ll b/llvm/test/Transforms/LICM/sinking.ll index 02d5b84154ee..ccc9186f7a48 100644 --- a/llvm/test/Transforms/LICM/sinking.ll +++ b/llvm/test/Transforms/LICM/sinking.ll @@ -53,7 +53,7 @@ Exit: ; CHECK-LABEL: @test3( ; CHECK: Exit.loopexit: -; CHECK-NEXT: %X = add i32 0, 1 +; CHECK-NEXT: %X.le = add i32 0, 1 ; CHECK-NEXT: br label %Exit } @@ -78,7 +78,7 @@ Out: ; preds = %Loop ; CHECK: Out: ; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn ; CHECK-NEXT: mul i32 %N, %[[LCSSAPHI]] -; CHECK-NEXT: sub i32 %tmp.6, %N +; CHECK-NEXT: sub i32 %tmp.6.le, %N ; CHECK-NEXT: ret i32 } @@ -101,8 +101,8 @@ Out: ; preds = %Loop ret i32 %tmp.6 ; CHECK-LABEL: @test5( ; CHECK: Out: -; CHECK-NEXT: %tmp.6 = load i32* @X -; CHECK-NEXT: ret i32 %tmp.6 +; CHECK-NEXT: %tmp.6.le = load i32* @X +; CHECK-NEXT: ret i32 %tmp.6.le } @@ -125,9 +125,9 @@ Out: ; preds = %Loop ret i32 %sunk2 ; CHECK-LABEL: @test6( ; CHECK: Out: -; CHECK-NEXT: %dead = getelementptr %Ty* @X2, i64 0, i32 0 -; CHECK-NEXT: %sunk2 = load i32* %dead -; CHECK-NEXT: ret i32 %sunk2 +; CHECK-NEXT: %dead.le = getelementptr %Ty* @X2, i64 0, i32 0 +; CHECK-NEXT: %sunk2.le = load i32* %dead.le +; CHECK-NEXT: ret i32 %sunk2.le } @@ -155,12 +155,12 @@ Out2: ; preds = %ContLoop ; CHECK: Out1: ; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn ; CHECK-NEXT: mul i32 %N, %[[LCSSAPHI]] -; CHECK-NEXT: sub i32 %tmp.6, %N +; CHECK-NEXT: sub i32 %tmp.6.le, %N ; CHECK-NEXT: ret ; CHECK: Out2: ; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn ; CHECK-NEXT: mul i32 %N, %[[LCSSAPHI]] -; CHECK-NEXT: sub i32 %tmp.6.le, %N +; CHECK-NEXT: sub i32 %tmp.6.le4, %N ; CHECK-NEXT: ret } @@ -187,8 +187,8 @@ exit2: ; preds = %Cont ; CHECK-NEXT: ret i32 0 ; CHECK: exit2: ; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %X -; CHECK-NEXT: %V = add i32 %[[LCSSAPHI]], 1 -; CHECK-NEXT: ret i32 %V +; CHECK-NEXT: %V.le = add i32 %[[LCSSAPHI]], 1 +; CHECK-NEXT: ret i32 %V.le } @@ -212,7 +212,7 @@ return.i: ; preds = %no_exit.1.i ; CHECK-LABEL: @test9( ; CHECK: loopentry.3.i.preheader.loopexit: -; CHECK-NEXT: %inc.1.i = add i32 0, 1 +; CHECK-NEXT: %inc.1.i.le = add i32 0, 1 ; CHECK-NEXT: br label %loopentry.3.i.preheader } @@ -234,8 +234,8 @@ Out: ; preds = %Loop ; CHECK-LABEL: @test10( ; CHECK: Out: ; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn -; CHECK-NEXT: %tmp.6 = sdiv i32 %N, %[[LCSSAPHI]] -; CHECK-NEXT: ret i32 %tmp.6 +; CHECK-NEXT: %tmp.6.le = sdiv i32 %N, %[[LCSSAPHI]] +; CHECK-NEXT: ret i32 %tmp.6.le } ; Should delete, not sink, dead instructions. @@ -251,4 +251,69 @@ Out: ; CHECK-NEXT: ret void } +@c = common global [1 x i32] zeroinitializer, align 4 +; Test a *many* way nested loop with multiple exit blocks both of which exit +; multiple loop nests. This exercises LCSSA corner cases. +define i32 @PR18753(i1* %a, i1* %b, i1* %c, i1* %d) { +entry: + br label %l1.header + +l1.header: + %iv = phi i64 [ %iv.next, %l1.latch ], [ 0, %entry ] + %arrayidx.i = getelementptr inbounds [1 x i32]* @c, i64 0, i64 %iv + br label %l2.header + +l2.header: + %x0 = load i1* %c, align 4 + br i1 %x0, label %l1.latch, label %l3.preheader + +l3.preheader: + br label %l3.header + +l3.header: + %x1 = load i1* %d, align 4 + br i1 %x1, label %l2.latch, label %l4.preheader + +l4.preheader: + br label %l4.header + +l4.header: + %x2 = load i1* %a + br i1 %x2, label %l3.latch, label %l4.body + +l4.body: + call void @f(i32* %arrayidx.i) + %x3 = load i1* %b + %l = trunc i64 %iv to i32 + br i1 %x3, label %l4.latch, label %exit + +l4.latch: + call void @g() + %x4 = load i1* %b, align 4 + br i1 %x4, label %l4.header, label %exit + +l3.latch: + br label %l3.header + +l2.latch: + br label %l2.header + +l1.latch: + %iv.next = add nsw i64 %iv, 1 + br label %l1.header + +exit: + %lcssa = phi i32 [ %l, %l4.latch ], [ %l, %l4.body ] +; CHECK-LABEL: @PR18753( +; CHECK: exit: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i64 [ %iv, %l4.latch ], [ %iv, %l4.body ] +; CHECK-NEXT: %l.le = trunc i64 %[[LCSSAPHI]] to i32 +; CHECK-NEXT: ret i32 %l.le + + ret i32 %lcssa +} + +declare void @f(i32*) + +declare void @g() diff --git a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll index 69d1ca69a4ac..c58903d49d55 100644 --- a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll +++ b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll @@ -29,9 +29,9 @@ target triple = "x86_64-apple-macosx" ; CHECK-LABEL: entry: ; CHECK-LABEL: for.cond1.preheader: ; CHECK-LABEL: for.body3: -; CHECK: %cmp4 -; CHECK: %conv = zext i1 %cmp4 to i32 -; CHECK: %xor = xor i32 %conv6, 1 +; CHECK: %cmp4.le.le +; CHECK: %conv.le.le = zext i1 %cmp4.le.le to i32 +; CHECK: %xor.le.le = xor i32 %conv6.le.le, 1 define void @foo() { entry: br label %for.cond