This patch extends the LoopUnroll pass to be able to unroll loops

with unknown trip counts. This is left off by default, and a
command-line option enables it. It also begins to separate loop
unrolling into a utility routine; eventually it might be made usable
from other passes.

It currently works by inserting conditional branches between each
unrolled iteration, unless it proves that the trip count is a
multiple of a constant integer > 1, which it currently only does in
the rare case that the trip count expression is a Mul operator with
a ConstantInt operand. Eventually this information might be provided
by other sources, for example by a pass that peels/splits the loop
for this purpose.

llvm-svn: 36990
This commit is contained in:
Dan Gohman 2007-05-11 20:53:41 +00:00
parent 600db3eb96
commit 2980d9da45
1 changed files with 196 additions and 61 deletions

View File

@ -31,6 +31,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@ -39,12 +40,19 @@
#include <algorithm>
using namespace llvm;
STATISTIC(NumUnrolled, "Number of loops completely unrolled");
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
namespace {
cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
cl::desc("The cut-off point for loop unrolling"));
UnrollThreshold
("unroll-threshold", cl::init(100), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
cl::opt<unsigned>
UnrollCount
("unroll-count", cl::init(0), cl::Hidden,
cl::desc("Use this unroll count for all loops, for testing purposes"));
class VISIBILITY_HIDDEN LoopUnroll : public LoopPass {
LoopInfo *LI; // The current loop information
@ -52,7 +60,13 @@ namespace {
static char ID; // Pass ID, replacement for typeid
LoopUnroll() : LoopPass((intptr_t)&ID) {}
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
static const unsigned NoThreshold = UINT_MAX;
bool runOnLoop(Loop *L, LPPassManager &LPM);
bool unrollLoop(Loop *L, unsigned Count, unsigned Threshold);
BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB);
/// This transformation requires natural loop information & requires that
@ -162,43 +176,137 @@ BasicBlock *LoopUnroll::FoldBlockIntoPredecessor(BasicBlock *BB) {
}
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
bool Changed = false;
LI = &getAnalysis<LoopInfo>();
// Unroll the loop.
if (!unrollLoop(L, UnrollCount, UnrollThreshold))
return false;
// Update the loop information for this loop.
// If we completely unrolled the loop, remove it from the parent.
if (L->getNumBackEdges() == 0)
LPM.deleteLoopFromQueue(L);
return true;
}
/// Unroll the given loop by UnrollCount, or by a heuristically-determined
/// value if Count is zero. If Threshold is non-NULL, it points to
/// a Threshold value to limit code size expansion. If the loop size would
/// expand beyond the threshold value, unrolling is suppressed. The return
/// value is false if no transformations are performed.
///
bool LoopUnroll::unrollLoop(Loop *L, unsigned Count, unsigned Threshold) {
assert(L->isLCSSAForm());
BasicBlock *Header = L->getHeader();
BasicBlock *LatchBlock = L->getLoopLatch();
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
if (BI == 0) return Changed; // Must end in a conditional branch
ConstantInt *TripCountC = dyn_cast_or_null<ConstantInt>(L->getTripCount());
if (!TripCountC) return Changed; // Must have constant trip count!
// Guard against huge trip counts. This also guards against assertions in
// APInt from the use of getZExtValue, below.
if (TripCountC->getValue().getActiveBits() > 32)
return Changed; // More than 2^32 iterations???
uint64_t TripCountFull = TripCountC->getZExtValue();
if (TripCountFull == 0)
return Changed; // Zero iteraitons?
unsigned LoopSize = ApproximateLoopSize(L);
DOUT << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << " Loop Size = "
<< LoopSize << " Trip Count = " << TripCountFull << " - ";
uint64_t Size = (uint64_t)LoopSize*TripCountFull;
if (Size > UnrollThreshold) {
DOUT << "TOO LARGE: " << Size << ">" << UnrollThreshold << "\n";
return Changed;
<< "] Loop %" << Header->getName() << "\n";
if (!BI || BI->isUnconditional()) {
// The loop-rorate pass can be helpful to avoid this in many cases.
DOUT << " Can't unroll; loop not terminated by a conditional branch.\n";
return false;
}
// Determine the trip count and/or trip multiple. A TripCount value of zero
// is used to mean an unknown trip count. The TripMultiple value is the
// greatest known integer multiple of the trip count.
unsigned TripCount = 0;
unsigned TripMultiple = 1;
if (Value *TripCountValue = L->getTripCount()) {
if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCountValue)) {
// Guard against huge trip counts. This also guards against assertions in
// APInt from the use of getZExtValue, below.
if (TripCountC->getValue().getActiveBits() <= 32) {
TripCount = (unsigned)TripCountC->getZExtValue();
}
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCountValue)) {
switch (BO->getOpcode()) {
case BinaryOperator::Mul:
if (ConstantInt *MultipleC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
if (MultipleC->getValue().getActiveBits() <= 32) {
TripMultiple = (unsigned)MultipleC->getZExtValue();
}
}
break;
default: break;
}
}
}
if (TripCount != 0)
DOUT << " Trip Count = " << TripCount << "\n";
if (TripMultiple != 1)
DOUT << " Trip Multiple = " << TripMultiple << "\n";
// Automatically select an unroll count.
if (Count == 0) {
// Conservative heuristic: if we know the trip count, see if we can
// completely unroll (subject to the threshold, checked below); otherwise
// don't unroll.
if (TripCount != 0) {
Count = TripCount;
} else {
return false;
}
}
// Effectively "DCE" unrolled iterations that are beyond the tripcount
// and will never be executed.
if (TripCount != 0 && Count > TripCount)
Count = TripCount;
assert(Count > 0);
assert(TripMultiple > 0);
assert(TripCount == 0 || TripCount % TripMultiple == 0);
// Enforce the threshold.
if (Threshold != NoThreshold) {
unsigned LoopSize = ApproximateLoopSize(L);
DOUT << " Loop Size = " << LoopSize << "\n";
uint64_t Size = (uint64_t)LoopSize*Count;
if (TripCount != 1 && Size > Threshold) {
DOUT << " TOO LARGE TO UNROLL: "
<< Size << ">" << Threshold << "\n";
return false;
}
}
// Are we eliminating the loop control altogether?
bool CompletelyUnroll = Count == TripCount;
// If we know the trip count, we know the multiple...
unsigned BreakoutTrip = 0;
if (TripCount != 0) {
BreakoutTrip = TripCount % Count;
TripMultiple = 0;
} else {
// Figure out what multiple to use.
BreakoutTrip = TripMultiple =
(unsigned)GreatestCommonDivisor64(Count, TripMultiple);
}
if (CompletelyUnroll) {
DOUT << "COMPLETELY UNROLLING loop %" << Header->getName()
<< " with trip count " << TripCount << "!\n";
} else {
DOUT << "UNROLLING loop %" << Header->getName()
<< " by " << Count;
if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
DOUT << " with a breakout at trip " << BreakoutTrip;
} else if (TripMultiple != 1) {
DOUT << " with " << TripMultiple << " trips per branch";
}
DOUT << "!\n";
}
DOUT << "UNROLLING!\n";
std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
unsigned TripCount = (unsigned)TripCountFull;
BasicBlock *LoopExit = BI->getSuccessor(L->contains(BI->getSuccessor(0)));
bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
@ -214,16 +322,12 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
LastValueMap[I] = I;
}
// Remove the exit branch from the loop
LatchBlock->getInstList().erase(BI);
std::vector<BasicBlock*> Headers;
std::vector<BasicBlock*> Latches;
Headers.push_back(Header);
Latches.push_back(LatchBlock);
assert(TripCount != 0 && "Trip count of 0 is impossible!");
for (unsigned It = 1; It != TripCount; ++It) {
for (unsigned It = 1; It != Count; ++It) {
char SuffixBuffer[100];
sprintf(SuffixBuffer, ".%d", It);
@ -277,9 +381,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// we can insert the proper branches later.
if (*BB == Header)
Headers.push_back(New);
if (*BB == LatchBlock)
if (*BB == LatchBlock) {
Latches.push_back(New);
// Also, clear out the new latch's back edge so that it doesn't look
// like a new loop, so that it's amenable to being merged with adjacent
// blocks later on.
TerminatorInst *Term = New->getTerminator();
assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
Term->setSuccessor(!ContinueOnTrue, NULL);
}
NewBlocks.push_back(New);
}
@ -289,12 +402,11 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
E = NewBlocks[i]->end(); I != E; ++I)
RemapInstruction(I, LastValueMap);
}
// The latch block exits the loop. If there are any PHI nodes in the
// successor blocks, update them to use the appropriate values computed as the
// last iteration of the loop.
if (TripCount > 1) {
if (Count != 1) {
SmallPtrSet<PHINode*, 8> Users;
for (Value::use_iterator UI = LatchBlock->use_begin(),
UE = LatchBlock->use_end(); UI != UE; ++UI)
@ -316,28 +428,54 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
}
// Now loop over the PHI nodes in the original block, setting them to their
// incoming values.
BasicBlock *Preheader = L->getLoopPreheader();
for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
PHINode *PN = OrigPHINode[i];
PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
Header->getInstList().erase(PN);
}
// Insert the branches that link the different iterations together
for (unsigned i = 0; i < Latches.size()-1; ++i) {
new BranchInst(Headers[i+1], Latches[i]);
if (BasicBlock *Fold = FoldBlockIntoPredecessor(Headers[i+1])) {
std::replace(Latches.begin(), Latches.end(), Headers[i+1], Fold);
std::replace(Headers.begin(), Headers.end(), Headers[i+1], Fold);
// Now, if we're doing complete unrolling, loop over the PHI nodes in the
// original block, setting them to their incoming values.
if (CompletelyUnroll) {
BasicBlock *Preheader = L->getLoopPreheader();
for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
PHINode *PN = OrigPHINode[i];
PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
Header->getInstList().erase(PN);
}
}
// Now that all the basic blocks for the unrolled iterations are in place,
// set up the branches to connect them.
for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
// The original branch was replicated in each unrolled iteration.
BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
// The branch destination.
unsigned j = (i + 1) % e;
BasicBlock *Dest = Headers[j];
bool NeedConditional = true;
// For a complete unroll, make the last iteration end with a branch
// to the exit block.
if (CompletelyUnroll && j == 0) {
Dest = LoopExit;
NeedConditional = false;
}
// If we know the trip count or a multiple of it, we can safely use an
// unconditional branch for some iterations.
if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
NeedConditional = false;
}
if (NeedConditional) {
// Update the conditional branch's successor for the following
// iteration.
Term->setSuccessor(!ContinueOnTrue, Dest);
} else {
Term->setUnconditionalDest(Dest);
// Merge adjacent basic blocks, if possible.
if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest)) {
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
std::replace(Headers.begin(), Headers.end(), Dest, Fold);
}
}
}
// Finally, add an unconditional branch to the block to continue into the exit
// block.
new BranchInst(LoopExit, Latches[Latches.size()-1]);
FoldBlockIntoPredecessor(LoopExit);
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
@ -356,10 +494,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
}
// Update the loop information for this loop.
// Remove the loop from the parent.
LPM.deleteLoopFromQueue(L);
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;
return true;
}