forked from OSchip/llvm-project
Revert D82927 "[Loop Fusion] Integrate Loop Peeling into Loop Fusion"
This reverts commit bb8850d34d
.
It broke 3 check-llvm-transforms-loopfusion tests in an ASAN build.
LoopFuse.cpp `for (BasicBlock *Pred : predecessors(BB)) {` may operate on a deleted BB.
This commit is contained in:
parent
1030e82598
commit
8a268bec1b
|
@ -46,7 +46,6 @@
|
|||
|
||||
#include "llvm/Transforms/Scalar/LoopFuse.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/DependenceAnalysis.h"
|
||||
#include "llvm/Analysis/DomTreeUpdater.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
|
@ -54,7 +53,6 @@
|
|||
#include "llvm/Analysis/PostDominators.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
|
@ -66,7 +64,6 @@
|
|||
#include "llvm/Transforms/Utils.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/CodeMoverUtils.h"
|
||||
#include "llvm/Transforms/Utils/UnrollLoop.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -117,11 +114,6 @@ static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
|
|||
"Use all available analyses")),
|
||||
cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore);
|
||||
|
||||
static cl::opt<unsigned> FusionPeelMaxCount(
|
||||
"loop-fusion-peel-max-count", cl::init(0), cl::Hidden,
|
||||
cl::desc("Max number of iterations to be peeled from a loop, such that "
|
||||
"fusion can take place"));
|
||||
|
||||
#ifndef NDEBUG
|
||||
static cl::opt<bool>
|
||||
VerboseFusionDebugging("loop-fusion-verbose-debug",
|
||||
|
@ -165,12 +157,6 @@ struct FusionCandidate {
|
|||
bool Valid;
|
||||
/// Guard branch of the loop, if it exists
|
||||
BranchInst *GuardBranch;
|
||||
/// Peeling Paramaters of the Loop.
|
||||
TTI::PeelingPreferences PP;
|
||||
/// Can you Peel this Loop?
|
||||
bool AbleToPeel;
|
||||
/// Has this loop been Peeled
|
||||
bool Peeled;
|
||||
|
||||
/// Dominator and PostDominator trees are needed for the
|
||||
/// FusionCandidateCompare function, required by FusionCandidateSet to
|
||||
|
@ -182,13 +168,11 @@ struct FusionCandidate {
|
|||
OptimizationRemarkEmitter &ORE;
|
||||
|
||||
FusionCandidate(Loop *L, const DominatorTree *DT,
|
||||
const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE,
|
||||
TTI::PeelingPreferences PP)
|
||||
const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
|
||||
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
|
||||
ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
|
||||
Latch(L->getLoopLatch()), L(L), Valid(true),
|
||||
GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
|
||||
Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
|
||||
GuardBranch(L->getLoopGuardBranch()), DT(DT), PDT(PDT), ORE(ORE) {
|
||||
|
||||
// Walk over all blocks in the loop and check for conditions that may
|
||||
// prevent fusion. For each block, walk over all instructions and collect
|
||||
|
@ -259,17 +243,6 @@ struct FusionCandidate {
|
|||
return Preheader;
|
||||
}
|
||||
|
||||
/// After Peeling the loop is modified quite a bit, hence all of the Blocks
|
||||
/// need to be updated accordingly.
|
||||
void updateAfterPeeling() {
|
||||
Preheader = L->getLoopPreheader();
|
||||
Header = L->getHeader();
|
||||
ExitingBlock = L->getExitingBlock();
|
||||
ExitBlock = L->getExitBlock();
|
||||
Latch = L->getLoopLatch();
|
||||
verify();
|
||||
}
|
||||
|
||||
/// Given a guarded loop, get the successor of the guard that is not in the
|
||||
/// loop.
|
||||
///
|
||||
|
@ -281,8 +254,6 @@ struct FusionCandidate {
|
|||
assert(GuardBranch && "Only valid on guarded loops.");
|
||||
assert(GuardBranch->isConditional() &&
|
||||
"Expecting guard to be a conditional branch.");
|
||||
if (Peeled)
|
||||
return GuardBranch->getSuccessor(1);
|
||||
return (GuardBranch->getSuccessor(0) == Preheader)
|
||||
? GuardBranch->getSuccessor(1)
|
||||
: GuardBranch->getSuccessor(0);
|
||||
|
@ -544,17 +515,13 @@ private:
|
|||
ScalarEvolution &SE;
|
||||
PostDominatorTree &PDT;
|
||||
OptimizationRemarkEmitter &ORE;
|
||||
AssumptionCache &AC;
|
||||
|
||||
const TargetTransformInfo &TTI;
|
||||
|
||||
public:
|
||||
LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
|
||||
ScalarEvolution &SE, PostDominatorTree &PDT,
|
||||
OptimizationRemarkEmitter &ORE, const DataLayout &DL,
|
||||
AssumptionCache &AC, const TargetTransformInfo &TTI)
|
||||
OptimizationRemarkEmitter &ORE, const DataLayout &DL)
|
||||
: LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
|
||||
DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {}
|
||||
DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE) {}
|
||||
|
||||
/// This is the main entry point for loop fusion. It will traverse the
|
||||
/// specified function and collect candidate loops to fuse, starting at the
|
||||
|
@ -639,9 +606,7 @@ private:
|
|||
/// Flow Equivalent sets, sorted by dominance.
|
||||
void collectFusionCandidates(const LoopVector &LV) {
|
||||
for (Loop *L : LV) {
|
||||
TTI::PeelingPreferences PP =
|
||||
gatherPeelingPreferences(L, SE, TTI, None, None);
|
||||
FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP);
|
||||
FusionCandidate CurrCand(L, &DT, &PDT, ORE);
|
||||
if (!CurrCand.isEligibleForFusion(SE))
|
||||
continue;
|
||||
|
||||
|
@ -691,126 +656,33 @@ private:
|
|||
/// Determine if two fusion candidates have the same trip count (i.e., they
|
||||
/// execute the same number of iterations).
|
||||
///
|
||||
/// This function will return a pair of values. The first is a boolean,
|
||||
/// stating whether or not the two candidates are known at compile time to
|
||||
/// have the same TripCount. The second is the difference in the two
|
||||
/// TripCounts. This information can be used later to determine whether or not
|
||||
/// peeling can be performed on either one of the candiates.
|
||||
std::pair<bool, Optional<unsigned>>
|
||||
haveIdenticalTripCounts(const FusionCandidate &FC0,
|
||||
const FusionCandidate &FC1) const {
|
||||
|
||||
/// Note that for now this method simply returns a boolean value because there
|
||||
/// are no mechanisms in loop fusion to handle different trip counts. In the
|
||||
/// future, this behaviour can be extended to adjust one of the loops to make
|
||||
/// the trip counts equal (e.g., loop peeling). When this is added, this
|
||||
/// interface may need to change to return more information than just a
|
||||
/// boolean value.
|
||||
bool identicalTripCounts(const FusionCandidate &FC0,
|
||||
const FusionCandidate &FC1) const {
|
||||
const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
|
||||
if (isa<SCEVCouldNotCompute>(TripCount0)) {
|
||||
UncomputableTripCount++;
|
||||
LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
|
||||
return {false, None};
|
||||
return false;
|
||||
}
|
||||
|
||||
const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
|
||||
if (isa<SCEVCouldNotCompute>(TripCount1)) {
|
||||
UncomputableTripCount++;
|
||||
LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
|
||||
return {false, None};
|
||||
return false;
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
|
||||
<< *TripCount1 << " are "
|
||||
<< (TripCount0 == TripCount1 ? "identical" : "different")
|
||||
<< "\n");
|
||||
|
||||
if (TripCount0 == TripCount1)
|
||||
return {true, 0};
|
||||
|
||||
LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, "
|
||||
"determining the difference between trip counts\n");
|
||||
|
||||
// Currently only considering loops with a single exit point
|
||||
// and a non-constant trip count.
|
||||
unsigned TC0 = SE.getSmallConstantTripCount(FC0.L);
|
||||
unsigned TC1 = SE.getSmallConstantTripCount(FC1.L);
|
||||
|
||||
// If any of the tripcounts are zero that means that loop(s) do not have
|
||||
// a single exit or a constant tripcount.
|
||||
if (TC0 == 0 || TC1 == 0) {
|
||||
LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not "
|
||||
"have a constant number of iterations. Peeling "
|
||||
"is not benefical\n");
|
||||
return {false, None};
|
||||
}
|
||||
|
||||
Optional<unsigned> Difference = None;
|
||||
int Diff = TC0 - TC1;
|
||||
|
||||
if (Diff > 0)
|
||||
Difference = Diff;
|
||||
else {
|
||||
LLVM_DEBUG(
|
||||
dbgs()
|
||||
<< "Difference is less than 0. FC1 (second loop) has more "
|
||||
"iterations than the first one. Currently not supported.\n");
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference
|
||||
<< "\n");
|
||||
|
||||
return {false, Difference};
|
||||
}
|
||||
|
||||
void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1,
|
||||
unsigned PeelCount) {
|
||||
assert(FC0.AbleToPeel && "Should be able to peel loop");
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
|
||||
<< " iterations of the first loop. \n");
|
||||
|
||||
FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true);
|
||||
if (FC0.Peeled) {
|
||||
LLVM_DEBUG(dbgs() << "Done Peeling\n");
|
||||
|
||||
#ifndef NDEBUG
|
||||
auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);
|
||||
|
||||
assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 &&
|
||||
"Loops should have identical trip counts after peeling");
|
||||
#endif
|
||||
|
||||
FC0.PP.PeelCount = PeelCount;
|
||||
|
||||
// Peeling does not update the PDT
|
||||
PDT.recalculate(*FC0.Preheader->getParent());
|
||||
|
||||
FC0.updateAfterPeeling();
|
||||
|
||||
// In this case the iterations of the loop are constant, so the first
|
||||
// loop will execute completely (will not jump from one of
|
||||
// the peeled blocks to the second loop). Here we are updating the
|
||||
// branch conditions of each of the peeled blocks, such that it will
|
||||
// branch to its successor which is not the Preheader of the second Loop.
|
||||
// Doing this update will ensure that the entry block of the first loop
|
||||
// dominates the entry block of the second loop.
|
||||
BasicBlock *BB =
|
||||
FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;
|
||||
SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
|
||||
for (BasicBlock *Pred : predecessors(BB)) {
|
||||
if (Pred != FC0.ExitBlock) {
|
||||
BranchInst *Old = dyn_cast<BranchInst>(Pred->getTerminator());
|
||||
BasicBlock *Succ = Old->getSuccessor(0);
|
||||
if (Succ == BB)
|
||||
Succ = Old->getSuccessor(1);
|
||||
BranchInst *NewBranch = BranchInst::Create(Succ);
|
||||
ReplaceInstWithInst(Old, NewBranch);
|
||||
TreeUpdates.emplace_back(
|
||||
DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB));
|
||||
}
|
||||
}
|
||||
DTU.applyUpdates(TreeUpdates);
|
||||
DTU.flush();
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount
|
||||
<< " iterations from the first loop.\n"
|
||||
"Both Loops have the same number of iterations now.\n");
|
||||
}
|
||||
return (TripCount0 == TripCount1);
|
||||
}
|
||||
|
||||
/// Walk each set of control flow equivalent fusion candidates and attempt to
|
||||
|
@ -844,32 +716,7 @@ private:
|
|||
FC0->verify();
|
||||
FC1->verify();
|
||||
|
||||
// Check if the candidates have identical tripcounts (first value of
|
||||
// pair), and if not check the difference in the tripcounts between
|
||||
// the loops (second value of pair). The difference is not equal to
|
||||
// None iff the loops iterate a constant number of times, and have a
|
||||
// single exit.
|
||||
std::pair<bool, Optional<unsigned>> IdenticalTripCountRes =
|
||||
haveIdenticalTripCounts(*FC0, *FC1);
|
||||
bool SameTripCount = IdenticalTripCountRes.first;
|
||||
Optional<unsigned> TCDifference = IdenticalTripCountRes.second;
|
||||
|
||||
// Here we are checking that FC0 (the first loop) can be peeled, and
|
||||
// both loops have different tripcounts.
|
||||
if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
|
||||
if (*TCDifference > FusionPeelMaxCount) {
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "Difference in loop trip counts: " << *TCDifference
|
||||
<< " is greater than maximum peel count specificed: "
|
||||
<< FusionPeelMaxCount << "\n");
|
||||
} else {
|
||||
// Dependent on peeling being performed on the first loop, and
|
||||
// assuming all other conditions for fusion return true.
|
||||
SameTripCount = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!SameTripCount) {
|
||||
if (!identicalTripCounts(*FC0, *FC1)) {
|
||||
LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
|
||||
"counts. Not fusing.\n");
|
||||
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
|
||||
|
@ -887,7 +734,7 @@ private:
|
|||
// Ensure that FC0 and FC1 have identical guards.
|
||||
// If one (or both) are not guarded, this check is not necessary.
|
||||
if (FC0->GuardBranch && FC1->GuardBranch &&
|
||||
!haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
|
||||
!haveIdenticalGuards(*FC0, *FC1)) {
|
||||
LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
|
||||
"guards. Not Fusing.\n");
|
||||
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
|
||||
|
@ -956,23 +803,13 @@ private:
|
|||
LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
|
||||
<< *FC1 << "\n");
|
||||
|
||||
FusionCandidate FC0Copy = *FC0;
|
||||
// Peel the loop after determining that fusion is legal. The Loops
|
||||
// will still be safe to fuse after the peeling is performed.
|
||||
bool Peel = TCDifference && *TCDifference > 0;
|
||||
if (Peel)
|
||||
peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
|
||||
|
||||
// Report fusion to the Optimization Remarks.
|
||||
// Note this needs to be done *before* performFusion because
|
||||
// performFusion will change the original loops, making it not
|
||||
// possible to identify them after fusion is complete.
|
||||
reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
|
||||
FuseCounter);
|
||||
reportLoopFusion<OptimizationRemark>(*FC0, *FC1, FuseCounter);
|
||||
|
||||
FusionCandidate FusedCand(
|
||||
performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE,
|
||||
FC0Copy.PP);
|
||||
FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE);
|
||||
FusedCand.verify();
|
||||
assert(FusedCand.isEligibleForFusion(SE) &&
|
||||
"Fused candidate should be eligible for fusion!");
|
||||
|
@ -1249,17 +1086,16 @@ private:
|
|||
return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
|
||||
}
|
||||
|
||||
/// Modify the latch branch of FC to be unconditional since successors of the
|
||||
/// branch are the same.
|
||||
/// Simplify the condition of the latch branch of \p FC to true, when both of
|
||||
/// its successors are the same.
|
||||
void simplifyLatchBranch(const FusionCandidate &FC) const {
|
||||
BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
|
||||
if (FCLatchBranch) {
|
||||
assert(FCLatchBranch->isConditional() &&
|
||||
FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
|
||||
"Expecting the two successors of FCLatchBranch to be the same");
|
||||
BranchInst *NewBranch =
|
||||
BranchInst::Create(FCLatchBranch->getSuccessor(0));
|
||||
ReplaceInstWithInst(FCLatchBranch, NewBranch);
|
||||
FCLatchBranch->setCondition(
|
||||
llvm::ConstantInt::getTrue(FCLatchBranch->getCondition()->getType()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1319,8 +1155,7 @@ private:
|
|||
if (FC0.GuardBranch)
|
||||
return fuseGuardedLoops(FC0, FC1);
|
||||
|
||||
assert(FC1.Preheader ==
|
||||
(FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));
|
||||
assert(FC1.Preheader == FC0.ExitBlock);
|
||||
assert(FC1.Preheader->size() == 1 &&
|
||||
FC1.Preheader->getSingleSuccessor() == FC1.Header);
|
||||
|
||||
|
@ -1343,7 +1178,7 @@ private:
|
|||
FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
|
||||
|
||||
// Then modify the control flow and update DT and PDT.
|
||||
SmallVector<DominatorTree::UpdateType, 16> TreeUpdates;
|
||||
SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
|
||||
|
||||
// The old exiting block of the first loop (FC0) has to jump to the header
|
||||
// of the second as we need to execute the code in the second header block
|
||||
|
@ -1362,27 +1197,12 @@ private:
|
|||
// to FC1.Header? I think this is basically what the three sequences are
|
||||
// trying to accomplish; however, doing this directly in the CFG may mean
|
||||
// the DT/PDT becomes invalid
|
||||
if (!FC0.Peeled) {
|
||||
FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
|
||||
FC1.Header);
|
||||
TreeUpdates.emplace_back(DominatorTree::UpdateType(
|
||||
DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
|
||||
TreeUpdates.emplace_back(DominatorTree::UpdateType(
|
||||
DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
|
||||
} else {
|
||||
TreeUpdates.emplace_back(DominatorTree::UpdateType(
|
||||
DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader));
|
||||
|
||||
// Remove the ExitBlock of the first Loop (also not needed)
|
||||
FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
|
||||
FC1.Header);
|
||||
TreeUpdates.emplace_back(DominatorTree::UpdateType(
|
||||
DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
|
||||
FC0.ExitBlock->getTerminator()->eraseFromParent();
|
||||
TreeUpdates.emplace_back(DominatorTree::UpdateType(
|
||||
DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
|
||||
new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
|
||||
}
|
||||
FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
|
||||
FC1.Header);
|
||||
TreeUpdates.emplace_back(DominatorTree::UpdateType(
|
||||
DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
|
||||
TreeUpdates.emplace_back(DominatorTree::UpdateType(
|
||||
DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
|
||||
|
||||
// The pre-header of L1 is not necessary anymore.
|
||||
assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader));
|
||||
|
@ -1426,7 +1246,7 @@ private:
|
|||
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
|
||||
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
|
||||
|
||||
// Modify the latch branch of FC0 to be unconditional as both successors of
|
||||
// Change the condition of FC0 latch branch to true, as both successors of
|
||||
// the branch are the same.
|
||||
simplifyLatchBranch(FC0);
|
||||
|
||||
|
@ -1448,11 +1268,6 @@ private:
|
|||
|
||||
LI.removeBlock(FC1.Preheader);
|
||||
DTU.deleteBB(FC1.Preheader);
|
||||
if (FC0.Peeled) {
|
||||
LI.removeBlock(FC0.ExitBlock);
|
||||
DTU.deleteBB(FC0.ExitBlock);
|
||||
}
|
||||
|
||||
DTU.flush();
|
||||
|
||||
// Is there a way to keep SE up-to-date so we don't need to forget the loops
|
||||
|
@ -1549,15 +1364,10 @@ private:
|
|||
BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent();
|
||||
BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
|
||||
BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
|
||||
BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor();
|
||||
|
||||
// Move instructions from the exit block of FC0 to the beginning of the exit
|
||||
// block of FC1, in the case that the FC0 loop has not been peeled. In the
|
||||
// case that FC0 loop is peeled, then move the instructions of the successor
|
||||
// of the FC0 Exit block to the beginning of the exit block of FC1.
|
||||
moveInstructionsToTheBeginning(
|
||||
(FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock,
|
||||
DT, PDT, DI);
|
||||
// block of FC1.
|
||||
moveInstructionsToTheBeginning(*FC0.ExitBlock, *FC1.ExitBlock, DT, PDT, DI);
|
||||
|
||||
// Move instructions from the guard block of FC1 to the end of the guard
|
||||
// block of FC0.
|
||||
|
@ -1577,9 +1387,8 @@ private:
|
|||
// for FC1 (where FC1 guard would have gone if FC1 was not executed).
|
||||
FC1NonLoopBlock->replacePhiUsesWith(FC1GuardBlock, FC0GuardBlock);
|
||||
FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
|
||||
|
||||
BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;
|
||||
BBToUpdate->getTerminator()->replaceUsesOfWith(FC1GuardBlock, FC1.Header);
|
||||
FC0.ExitBlock->getTerminator()->replaceUsesOfWith(FC1GuardBlock,
|
||||
FC1.Header);
|
||||
|
||||
// The guard of FC1 is not necessary anymore.
|
||||
FC1.GuardBranch->eraseFromParent();
|
||||
|
@ -1594,15 +1403,6 @@ private:
|
|||
TreeUpdates.emplace_back(DominatorTree::UpdateType(
|
||||
DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
|
||||
|
||||
if (FC0.Peeled) {
|
||||
// Remove the Block after the ExitBlock of FC0
|
||||
TreeUpdates.emplace_back(DominatorTree::UpdateType(
|
||||
DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock));
|
||||
FC0ExitBlockSuccessor->getTerminator()->eraseFromParent();
|
||||
new UnreachableInst(FC0ExitBlockSuccessor->getContext(),
|
||||
FC0ExitBlockSuccessor);
|
||||
}
|
||||
|
||||
assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) &&
|
||||
"Expecting guard block to have no predecessors");
|
||||
assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) &&
|
||||
|
@ -1709,7 +1509,7 @@ private:
|
|||
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
|
||||
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
|
||||
|
||||
// Modify the latch branch of FC0 to be unconditional as both successors of
|
||||
// Change the condition of FC0 latch branch to true, as both successors of
|
||||
// the branch are the same.
|
||||
simplifyLatchBranch(FC0);
|
||||
|
||||
|
@ -1740,10 +1540,6 @@ private:
|
|||
LI.removeBlock(FC1GuardBlock);
|
||||
LI.removeBlock(FC1.Preheader);
|
||||
LI.removeBlock(FC0.ExitBlock);
|
||||
if (FC0.Peeled) {
|
||||
LI.removeBlock(FC0ExitBlockSuccessor);
|
||||
DTU.deleteBB(FC0ExitBlockSuccessor);
|
||||
}
|
||||
DTU.deleteBB(FC1GuardBlock);
|
||||
DTU.deleteBB(FC1.Preheader);
|
||||
DTU.deleteBB(FC0.ExitBlock);
|
||||
|
@ -1810,8 +1606,6 @@ struct LoopFuseLegacy : public FunctionPass {
|
|||
AU.addRequired<PostDominatorTreeWrapperPass>();
|
||||
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
|
||||
AU.addRequired<DependenceAnalysisWrapperPass>();
|
||||
AU.addRequired<AssumptionCacheTracker>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
|
||||
AU.addPreserved<ScalarEvolutionWrapperPass>();
|
||||
AU.addPreserved<LoopInfoWrapperPass>();
|
||||
|
@ -1828,12 +1622,9 @@ struct LoopFuseLegacy : public FunctionPass {
|
|||
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
|
||||
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
|
||||
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
|
||||
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
|
||||
const TargetTransformInfo &TTI =
|
||||
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
|
||||
LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
|
||||
return LF.fuseLoops(F);
|
||||
}
|
||||
};
|
||||
|
@ -1846,11 +1637,9 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
|
|||
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
|
||||
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
|
||||
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
|
||||
auto &AC = AM.getResult<AssumptionAnalysis>(F);
|
||||
const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
|
||||
LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
|
||||
bool Changed = LF.fuseLoops(F);
|
||||
if (!Changed)
|
||||
return PreservedAnalyses::all();
|
||||
|
@ -1873,8 +1662,6 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|||
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
|
||||
|
||||
FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }
|
||||
|
|
|
@ -1,85 +0,0 @@
|
|||
; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s
|
||||
|
||||
; This will test if we are able to fuse two guarded loops which have constant
|
||||
; but different trip counts. The first two iterations of the first loop should
|
||||
; be peeled off, and then the loops should be fused together.
|
||||
|
||||
@B = common global [1024 x i32] zeroinitializer, align 16
|
||||
|
||||
; CHECK: void @main
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK: br i1 %cmp4, label %for.first.entry, label %for.end
|
||||
; CHECK: for.first.entry
|
||||
; CHECK-NEXT: br label %for.first.peel.begin
|
||||
; CHECK: for.first.peel.begin:
|
||||
; CHECK-NEXT: br label %for.first.peel
|
||||
; CHECK: for.first.peel:
|
||||
; CHECK: br label %for.first.peel.next
|
||||
; CHECK: for.first.peel.next:
|
||||
; CHECK-NEXT: br label %for.first.peel2
|
||||
; CHECK: for.first.peel2:
|
||||
; CHECK: br label %for.first.peel.next1
|
||||
; CHECK: for.first.peel.next1:
|
||||
; CHECK-NEXT: br label %for.first.peel.next11
|
||||
; CHECK: for.first.peel.next11:
|
||||
; CHECK-NEXT: br label %for.first.entry.peel.newph
|
||||
; CHECK: for.first.entry.peel.newph:
|
||||
; CHECK: br label %for.first
|
||||
; CHECK: for.first:
|
||||
; CHECK: br i1 %cmp3, label %for.first, label %for.second.exit
|
||||
; CHECK: for.second.exit:
|
||||
; CHECK: br label %for.end
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
|
||||
define void @main(i32* noalias %A) {
|
||||
entry:
|
||||
%cmp4 = icmp slt i64 0, 45
|
||||
br i1 %cmp4, label %for.first.entry, label %for.second.guard
|
||||
|
||||
for.first.entry: ; preds = %entry
|
||||
br label %for.first
|
||||
|
||||
for.first: ; preds = %for.first.entry, %for.first
|
||||
%i.05 = phi i64 [ %inc, %for.first ], [ 0, %for.first.entry ]
|
||||
%sub = sub nsw i64 %i.05, 3
|
||||
%add = add nsw i64 %i.05, 3
|
||||
%mul = mul nsw i64 %sub, %add
|
||||
%rem = srem i64 %mul, %i.05
|
||||
%conv = trunc i64 %rem to i32
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.05
|
||||
store i32 %conv, i32* %arrayidx, align 4
|
||||
%inc = add nsw i64 %i.05, 1
|
||||
%cmp = icmp slt i64 %inc, 45
|
||||
br i1 %cmp, label %for.first, label %for.first.exit
|
||||
|
||||
for.first.exit: ; preds = %for.first
|
||||
br label %for.second.guard
|
||||
|
||||
for.second.guard: ; preds = %for.first.exit, %entry
|
||||
%cmp31 = icmp slt i64 2, 45
|
||||
br i1 %cmp31, label %for.second.entry, label %for.end
|
||||
|
||||
for.second.entry: ; preds = %for.second.guard
|
||||
br label %for.second
|
||||
|
||||
for.second: ; preds = %for.second.entry, %for.second
|
||||
%i1.02 = phi i64 [ %inc14, %for.second ], [ 2, %for.second.entry ]
|
||||
%sub7 = sub nsw i64 %i1.02, 3
|
||||
%add8 = add nsw i64 %i1.02, 3
|
||||
%mul9 = mul nsw i64 %sub7, %add8
|
||||
%rem10 = srem i64 %mul9, %i1.02
|
||||
%conv11 = trunc i64 %rem10 to i32
|
||||
%arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.02
|
||||
store i32 %conv11, i32* %arrayidx12, align 4
|
||||
%inc14 = add nsw i64 %i1.02, 1
|
||||
%cmp3 = icmp slt i64 %inc14, 45
|
||||
br i1 %cmp3, label %for.second, label %for.second.exit
|
||||
|
||||
for.second.exit: ; preds = %for.second
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.second.exit, %for.second.guard
|
||||
ret void
|
||||
}
|
||||
|
|
@ -1,72 +0,0 @@
|
|||
; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s
|
||||
|
||||
; This will test that we do not fuse two guarded loops together.
|
||||
; These loops do not have the same trip count, fusing should be possible after
|
||||
; peeling the loops. However, the exit block of the first loop makes the loops
|
||||
; unsafe to peel.
|
||||
; The expected output of this test is the function as below.
|
||||
|
||||
; CHECK: void @unsafe_exitblock
|
||||
; CHECK: for.first.guard
|
||||
; CHECK: br i1 %cmp3, label %for.first.preheader, label %for.second.guard
|
||||
; CHECK: for.first.preheader:
|
||||
; CHECK-NEXT: br label %for.first
|
||||
; CHECK: for.first:
|
||||
; CHECK: br i1 %cmp, label %for.first, label %for.first.exit
|
||||
; CHECK: for.first.exit:
|
||||
; CHECK-NEXT: call void @bar()
|
||||
; CHECK-NEXT: br label %for.second.guard
|
||||
; CHECK: for.second.guard:
|
||||
; CHECK: br i1 %cmp21, label %for.second.preheader, label %for.end
|
||||
; CHECK: for.second.preheader:
|
||||
; CHECK-NEXT: br label %for.second
|
||||
; CHECK: for.second:
|
||||
; CHECK: br i1 %cmp2, label %for.second, label %for.second.exit
|
||||
; CHECK: for.second.exit:
|
||||
; CHECK-NEXT: br label %for.end
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
|
||||
define void @unsafe_exitblock(i32* noalias %A, i32* noalias %B) {
|
||||
for.first.guard:
|
||||
%cmp3 = icmp slt i64 0, 45
|
||||
br i1 %cmp3, label %for.first.preheader, label %for.second.guard
|
||||
|
||||
for.first.preheader:
|
||||
br label %for.first
|
||||
|
||||
for.first:
|
||||
%i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
|
||||
store i32 0, i32* %arrayidx, align 4
|
||||
%inc = add nsw i64 %i.04, 1
|
||||
%cmp = icmp slt i64 %inc, 45
|
||||
br i1 %cmp, label %for.first, label %for.first.exit
|
||||
|
||||
for.first.exit:
|
||||
call void @bar()
|
||||
br label %for.second.guard
|
||||
|
||||
for.second.guard:
|
||||
%cmp21 = icmp slt i64 2,45
|
||||
br i1 %cmp21, label %for.second.preheader, label %for.end
|
||||
|
||||
for.second.preheader:
|
||||
br label %for.second
|
||||
|
||||
for.second:
|
||||
%j.02 = phi i64 [ %inc6, %for.second ], [ 2, %for.second.preheader ]
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
|
||||
store i32 0, i32* %arrayidx4, align 4
|
||||
%inc6 = add nsw i64 %j.02, 1
|
||||
%cmp2 = icmp slt i64 %inc6, 45
|
||||
br i1 %cmp2, label %for.second, label %for.second.exit
|
||||
|
||||
for.second.exit:
|
||||
br label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @bar()
|
|
@ -1,83 +0,0 @@
|
|||
; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s
|
||||
|
||||
; This will check that we do not fuse these two loops together. These loops are
|
||||
; valid cadidates for peeling, however they are not adjacent.
|
||||
; The expected output of this test is the function below.
|
||||
|
||||
; CHECK: void @function
|
||||
; CHECK-NEXT: for.first.preheader:
|
||||
; CHECK-NEXT: br label %for.first
|
||||
; CHECK: for.first:
|
||||
; CHECK: br label %for.first.latch
|
||||
; CHECK: for.first.latch:
|
||||
; CHECK: br i1 %exitcond4, label %for.first, label %for.first.exit
|
||||
; CHECK: for.first.exit:
|
||||
; CHECK-NEXT: br label %for.next
|
||||
; CHECK: for.next:
|
||||
; CHECK-NEXT: br label %for.second.preheader
|
||||
; CHECK: for.second.preheader:
|
||||
; CHECK: br label %for.second
|
||||
; CHECK: for.second:
|
||||
; CHECK: br label %for.second.latch
|
||||
; CHECK: for.second.latch:
|
||||
; CHECK: br i1 %exitcond, label %for.second, label %for.end
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
|
||||
@B = common global [1024 x i32] zeroinitializer, align 16
|
||||
|
||||
define void @function(i32* noalias %arg) {
|
||||
for.first.preheader:
|
||||
br label %for.first
|
||||
|
||||
for.first: ; preds = %for.first.preheader, %for.first.latch
|
||||
%.014 = phi i32 [ 0, %for.first.preheader ], [ %tmp15, %for.first.latch ]
|
||||
%indvars.iv23 = phi i64 [ 0, %for.first.preheader ], [ %indvars.iv.next3, %for.first.latch ]
|
||||
%tmp = add nsw i32 %.014, -3
|
||||
%tmp8 = add nuw nsw i64 %indvars.iv23, 3
|
||||
%tmp9 = trunc i64 %tmp8 to i32
|
||||
%tmp10 = mul nsw i32 %tmp, %tmp9
|
||||
%tmp11 = trunc i64 %indvars.iv23 to i32
|
||||
%tmp12 = srem i32 %tmp10, %tmp11
|
||||
%tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23
|
||||
store i32 %tmp12, i32* %tmp13, align 4
|
||||
br label %for.first.latch
|
||||
|
||||
for.first.latch: ; preds = %for.first
|
||||
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1
|
||||
%tmp15 = add nuw nsw i32 %.014, 1
|
||||
%exitcond4 = icmp ne i64 %indvars.iv.next3, 100
|
||||
br i1 %exitcond4, label %for.first, label %for.first.exit
|
||||
|
||||
for.first.exit: ; preds: %for.first.latch
|
||||
br label %for.next
|
||||
|
||||
for.next: ; preds = %for.first.exit
|
||||
br label %for.second.preheader
|
||||
|
||||
for.second.preheader: ; preds = %for.next
|
||||
br label %for.second
|
||||
|
||||
for.second: ; preds = %for.second.preheader, %for.second.latch
|
||||
%.02 = phi i32 [ 0, %for.second.preheader ], [ %tmp28, %for.second.latch ]
|
||||
%indvars.iv1 = phi i64 [ 3, %for.second.preheader ], [ %indvars.iv.next, %for.second.latch ]
|
||||
%tmp20 = add nsw i32 %.02, -3
|
||||
%tmp21 = add nuw nsw i64 %indvars.iv1, 3
|
||||
%tmp22 = trunc i64 %tmp21 to i32
|
||||
%tmp23 = mul nsw i32 %tmp20, %tmp22
|
||||
%tmp24 = trunc i64 %indvars.iv1 to i32
|
||||
%tmp25 = srem i32 %tmp23, %tmp24
|
||||
%tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1
|
||||
store i32 %tmp25, i32* %tmp26, align 4
|
||||
br label %for.second.latch
|
||||
|
||||
for.second.latch: ; preds = %for.second
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
|
||||
%tmp28 = add nuw nsw i32 %.02, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, 100
|
||||
br i1 %exitcond, label %for.second, label %for.end
|
||||
|
||||
for.end: ; preds = %for.second.latch
|
||||
ret void
|
||||
}
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s
|
||||
|
||||
; This will test whether we can fuse two loops together if they have constant
|
||||
; but a different tripcount.
|
||||
; The first three iterations of the first loop should be peeled, and then the
|
||||
; two loops should be fused together in this example.
|
||||
|
||||
; C Code
|
||||
;
|
||||
; int B[1024];
|
||||
;
|
||||
; void function(int *arg) {
|
||||
; for (int i = 0; i != 100; ++i)
|
||||
; arg[i] = ((i - 3)*(i+3)) % i;
|
||||
;
|
||||
; for (int i = 3; i != 100; ++i)
|
||||
; B[i] = ((i-6)*(i+3)) % i;
|
||||
; }
|
||||
|
||||
; CHECK: void @function
|
||||
; CHECK-NEXT: for.first.preheader:
|
||||
; CHECK-NEXT: br label %for.first.peel.begin
|
||||
; CHECK: for.first.peel.begin:
|
||||
; CHECK-NEXT: br label %for.first.peel
|
||||
; CHECK: for.first.peel
|
||||
; CHECK: br label %for.first.latch.peel
|
||||
; CHECK: for.first.latch.peel:
|
||||
; CHECK: br label %for.first.peel.next
|
||||
; CHECK: for.first.peel.next:
|
||||
; CHECK-NEXT: br label %for.first.peel2
|
||||
; CHECK: for.first.peel2:
|
||||
; CHECK: br label %for.first.latch.peel10
|
||||
; CHECK: for.first.latch.peel10:
|
||||
; CHECK: br label %for.first.peel.next1
|
||||
; CHECK: for.first.peel.next1:
|
||||
; CHECK-NEXT: br label %for.first.peel15
|
||||
; CHECK: for.first.peel15:
|
||||
; CHECK: br label %for.first.latch.peel23
|
||||
; CHECK: for.first.latch.peel23:
|
||||
; CHECK: br label %for.first.peel.next14
|
||||
; CHECK: for.first.peel.next14:
|
||||
; CHECK-NEXT: br label %for.first.peel.next27
|
||||
; CHECK: for.first.peel.next27:
|
||||
; CHECK-NEXT: br label %for.first.preheader.peel.newph
|
||||
; CHECK: for.first.preheader.peel.newph:
|
||||
; CHECK-NEXT: br label %for.first
|
||||
; CHECK: for.first:
|
||||
; CHECK: br label %for.first.latch
|
||||
; CHECK: for.first.latch:
|
||||
; CHECK: br label %for.second.latch
|
||||
; CHECK: for.second.latch:
|
||||
; CHECK: br i1 %exitcond, label %for.first, label %for.end
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
|
||||
@B = common global [1024 x i32] zeroinitializer, align 16
|
||||
|
||||
define void @function(i32* noalias %arg) {
|
||||
for.first.preheader:
|
||||
br label %for.first
|
||||
|
||||
for.first: ; preds = %for.first.preheader, %for.first.latch
|
||||
%.014 = phi i32 [ 0, %for.first.preheader ], [ %tmp15, %for.first.latch ]
|
||||
%indvars.iv23 = phi i64 [ 0, %for.first.preheader ], [ %indvars.iv.next3, %for.first.latch ]
|
||||
%tmp = add nsw i32 %.014, -3
|
||||
%tmp8 = add nuw nsw i64 %indvars.iv23, 3
|
||||
%tmp9 = trunc i64 %tmp8 to i32
|
||||
%tmp10 = mul nsw i32 %tmp, %tmp9
|
||||
%tmp11 = trunc i64 %indvars.iv23 to i32
|
||||
%tmp12 = srem i32 %tmp10, %tmp11
|
||||
%tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23
|
||||
store i32 %tmp12, i32* %tmp13, align 4
|
||||
br label %for.first.latch
|
||||
|
||||
for.first.latch: ; preds = %for.first
|
||||
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1
|
||||
%tmp15 = add nuw nsw i32 %.014, 1
|
||||
%exitcond4 = icmp ne i64 %indvars.iv.next3, 100
|
||||
br i1 %exitcond4, label %for.first, label %for.second.preheader
|
||||
|
||||
for.second.preheader: ; preds = %for.first.latch
|
||||
br label %for.second
|
||||
|
||||
for.second: ; preds = %for.second.preheader, %for.second.latch
|
||||
%.02 = phi i32 [ 0, %for.second.preheader ], [ %tmp28, %for.second.latch ]
|
||||
%indvars.iv1 = phi i64 [ 3, %for.second.preheader ], [ %indvars.iv.next, %for.second.latch ]
|
||||
%tmp20 = add nsw i32 %.02, -3
|
||||
%tmp21 = add nuw nsw i64 %indvars.iv1, 3
|
||||
%tmp22 = trunc i64 %tmp21 to i32
|
||||
%tmp23 = mul nsw i32 %tmp20, %tmp22
|
||||
%tmp24 = trunc i64 %indvars.iv1 to i32
|
||||
%tmp25 = srem i32 %tmp23, %tmp24
|
||||
%tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1
|
||||
store i32 %tmp25, i32* %tmp26, align 4
|
||||
br label %for.second.latch
|
||||
|
||||
for.second.latch: ; preds = %for.second
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
|
||||
%tmp28 = add nuw nsw i32 %.02, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, 100
|
||||
br i1 %exitcond, label %for.second, label %for.end
|
||||
|
||||
for.end: ; preds = %for.second.latch
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue