forked from OSchip/llvm-project
Teach LoopSimplify how to merge multiple loop exits into a single exit,
when one of them can be converted to a trivial icmp and conditional branch. This addresses what is essentially a phase ordering problem. SimplifyCFG knows how to do this transformation, but it doesn't do so if the primary block has any instructions in it other than an icmp and a branch. In the given testcase, the block contains other instructions, however they are loop-invariant and can be hoisted. SimplifyCFG doesn't have LoopInfo though, so it can't hoist them. And, it's important that the blocks be merged before LoopRotation, as it doesn't support multiple-exit loops. llvm-svn: 74396
This commit is contained in:
parent
8918b481bf
commit
c8ca49659a
|
@ -19,6 +19,7 @@ namespace llvm {
|
|||
|
||||
class User;
|
||||
class BasicBlock;
|
||||
class BranchInst;
|
||||
class Instruction;
|
||||
class Value;
|
||||
class Pass;
|
||||
|
@ -94,6 +95,12 @@ void MergeBasicBlockIntoOnlyPred(BasicBlock *BB);
|
|||
///
|
||||
bool SimplifyCFG(BasicBlock *BB);
|
||||
|
||||
/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
|
||||
/// and if a predecessor branches to us and one of our successors, fold the
|
||||
/// setcc into the predecessor and use logical operations to pick the right
|
||||
/// destination.
|
||||
bool FoldBranchToCommonDest(BranchInst *BI);
|
||||
|
||||
/// DemoteRegToStack - This function takes a virtual register computed by an
|
||||
/// Instruction and replaces it with a slot in the stack frame, allocated via
|
||||
/// alloca. This allows the CFG to be changed around without fear of
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include "llvm/Analysis/Dominators.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Support/CFG.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/ADT/SetOperations.h"
|
||||
|
@ -258,6 +259,80 @@ ReprocessLoop:
|
|||
PN->eraseFromParent();
|
||||
}
|
||||
|
||||
// If this loop has muliple exits and the exits all go to the same
|
||||
// block, attempt to merge the exits. This helps several passes, such
|
||||
// as LoopRotation, which do not support loops with multiple exits.
|
||||
// SimplifyCFG also does this (and this code uses the same utility
|
||||
// function), however this code is loop-aware, where SimplifyCFG is
|
||||
// not. That gives it the advantage of being able to hoist
|
||||
// loop-invariant instructions out of the way to open up more
|
||||
// opportunities, and the disadvantage of having the responsibility
|
||||
// to preserve dominator information.
|
||||
if (ExitBlocks.size() > 1 && L->getUniqueExitBlock()) {
|
||||
SmallVector<BasicBlock*, 8> ExitingBlocks;
|
||||
L->getExitingBlocks(ExitingBlocks);
|
||||
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
|
||||
BasicBlock *ExitingBlock = ExitingBlocks[i];
|
||||
if (!ExitingBlock->getSinglePredecessor()) continue;
|
||||
BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
|
||||
if (!BI || !BI->isConditional()) continue;
|
||||
CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
|
||||
if (!CI || CI->getParent() != ExitingBlock) continue;
|
||||
|
||||
// Attempt to hoist out all instructions except for the
|
||||
// comparison and the branch.
|
||||
bool AllInvariant = true;
|
||||
for (BasicBlock::iterator I = ExitingBlock->begin(),
|
||||
E = ExitingBlock->end(); I != E; ) {
|
||||
Instruction *Inst = I++;
|
||||
if (Inst == BI || Inst == CI)
|
||||
continue;
|
||||
if (Inst->isTrapping()) {
|
||||
AllInvariant = false;
|
||||
break;
|
||||
}
|
||||
for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j)
|
||||
if (!L->isLoopInvariant(Inst->getOperand(j))) {
|
||||
AllInvariant = false;
|
||||
break;
|
||||
}
|
||||
if (!AllInvariant)
|
||||
break;
|
||||
// Hoist.
|
||||
Inst->moveBefore(L->getLoopPreheader()->getTerminator());
|
||||
}
|
||||
if (!AllInvariant) continue;
|
||||
|
||||
// The block has now been cleared of all instructions except for
|
||||
// a comparison and a conditional branch. SimplifyCFG may be able
|
||||
// to fold it now.
|
||||
if (!FoldBranchToCommonDest(BI)) continue;
|
||||
|
||||
// Success. The block is now dead, so remove it from the loop,
|
||||
// update the dominator tree and dominance frontier, and delete it.
|
||||
assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
|
||||
Changed = true;
|
||||
L->removeBlockFromLoop(ExitingBlock);
|
||||
|
||||
DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
|
||||
DomTreeNode *Node = DT->getNode(ExitingBlock);
|
||||
const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
|
||||
Node->getChildren();
|
||||
for (unsigned k = 0, g = Children.size(); k != g; ++k) {
|
||||
DT->changeImmediateDominator(Children[k], Node->getIDom());
|
||||
if (DF) DF->changeImmediateDominator(Children[k]->getBlock(),
|
||||
Node->getIDom()->getBlock(),
|
||||
DT);
|
||||
}
|
||||
DT->eraseNode(ExitingBlock);
|
||||
if (DF) DF->removeBlock(ExitingBlock);
|
||||
|
||||
BI->getSuccessor(0)->removePredecessor(ExitingBlock);
|
||||
BI->getSuccessor(1)->removePredecessor(ExitingBlock);
|
||||
ExitingBlock->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
|
|
@ -1492,7 +1492,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
|
|||
/// and if a predecessor branches to us and one of our successors, fold the
|
||||
/// setcc into the predecessor and use logical operations to pick the right
|
||||
/// destination.
|
||||
static bool FoldBranchToCommonDest(BranchInst *BI) {
|
||||
bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
|
||||
BasicBlock *BB = BI->getParent();
|
||||
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
|
||||
if (Cond == 0) return false;
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
; RUN: llvm-as < %s | opt -loopsimplify -loop-rotate -instcombine -indvars \
|
||||
; RUN: | llvm-dis > %t
|
||||
; RUN: not grep sext %t
|
||||
; RUN: grep {phi i64} %t | count 1
|
||||
|
||||
; Loopsimplify should be able to merge the two loop exits
|
||||
; into one, so that loop rotate can rotate the loop, so
|
||||
; that indvars can promote the induction variable to i64
|
||||
; without needing casts.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
|
||||
define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
|
||||
entry:
|
||||
%t0 = load float* %peakWeight, align 4 ; <float> [#uses=1]
|
||||
br label %bb1
|
||||
|
||||
bb: ; preds = %bb2
|
||||
%t1 = sext i32 %hiPart.0 to i64 ; <i64> [#uses=1]
|
||||
%t2 = getelementptr float* %pTmp1, i64 %t1 ; <float*> [#uses=1]
|
||||
%t3 = load float* %t2, align 4 ; <float> [#uses=1]
|
||||
%t4 = fadd float %t3, %distERBhi.0 ; <float> [#uses=1]
|
||||
%t5 = add i32 %hiPart.0, 1 ; <i32> [#uses=2]
|
||||
%t6 = sext i32 %t5 to i64 ; <i64> [#uses=1]
|
||||
%t7 = getelementptr float* %peakWeight, i64 %t6 ; <float*> [#uses=1]
|
||||
%t8 = load float* %t7, align 4 ; <float> [#uses=1]
|
||||
%t9 = fadd float %t8, %peakCount.0 ; <float> [#uses=1]
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb, %entry
|
||||
%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ] ; <float> [#uses=2]
|
||||
%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ] ; <i32> [#uses=3]
|
||||
%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ] ; <float> [#uses=3]
|
||||
%t10 = fcmp uge float %distERBhi.0, 2.500000e+00 ; <i1> [#uses=1]
|
||||
br i1 %t10, label %bb3, label %bb2
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%t11 = add i32 %bandEdgeIndex, -1 ; <i32> [#uses=1]
|
||||
%t12 = icmp sgt i32 %t11, %hiPart.0 ; <i1> [#uses=1]
|
||||
br i1 %t12, label %bb, label %bb3
|
||||
|
||||
bb3: ; preds = %bb2, %bb1
|
||||
%t13 = fdiv float %peakCount.0, %distERBhi.0 ; <float> [#uses=1]
|
||||
ret float %t13
|
||||
}
|
Loading…
Reference in New Issue