[OpenMPIRBuilder] Implement tileLoops.

The  tileLoops method implements the code generation part of the tile directive introduced in OpenMP 5.1. It takes a list of loops forming a loop nest, tiles it, and returns the CanonicalLoopInfo representing the generated loops.

The implementation takes n CanonicalLoopInfos, n tile size Values and returns 2*n new CanonicalLoopInfos. The input CanonicalLoopInfos are invalidated and BBs not reused in the new loop nest removed from the function.

In a modified version of D76342, I was able to correctly compile and execute a tiled loop nest.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D92974
This commit is contained in:
Michael Kruse 2021-01-23 13:10:44 -06:00
parent c7d5d8fa33
commit b7dee667b6
4 changed files with 784 additions and 1 deletions

View File

@ -300,6 +300,53 @@ public:
bool NeedsBarrier,
Value *Chunk = nullptr);
/// Tile a loop nest.
///
/// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
/// \p/ Loops must be perfectly nested, from outermost to innermost loop
/// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
/// of every loop and every tile sizes must be usable in the outermost
/// loop's preheader. This implies that the loop nest is rectangular.
///
/// Example:
/// \code
/// for (int i = 0; i < 15; ++i) // Canonical loop "i"
/// for (int j = 0; j < 14; ++j) // Canonical loop "j"
/// body(i, j);
/// \endcode
///
/// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
/// \code
/// for (int i1 = 0; i1 < 3; ++i1)
/// for (int j1 = 0; j1 < 2; ++j1)
/// for (int i2 = 0; i2 < 5; ++i2)
/// for (int j2 = 0; j2 < 7; ++j2)
/// body(i1*3+i2, j1*3+j2);
/// \endcode
///
/// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
/// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
/// handles non-constant trip counts, non-constant tile sizes and trip counts
/// that are not multiples of the tile size. In the latter case the tile loop
/// of the last floor-loop iteration will have fewer iterations than specified
/// as its tile size.
///
///
/// @param DL Debug location for instructions added by tiling, for
/// instance the floor- and tile trip count computation.
/// @param Loops Loops to tile. The CanonicalLoopInfo objects are
/// invalidated by this method, i.e. should not used after
/// tiling.
/// @param TileSizes For each loop in \p Loops, the tile size for that
/// dimensions.
///
/// \returns A list of generated loops. Contains twice as many loops as the
/// input loop nest; the first half are the floor loops and the
/// second half are the tile loops.
std::vector<CanonicalLoopInfo *>
tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
ArrayRef<Value *> TileSizes);
/// Generator for '#omp flush'
///
/// \param Loc The location where the flush directive was encountered
@ -729,6 +776,12 @@ private:
BasicBlock *Exit;
BasicBlock *After;
/// Add the control blocks of this loop to \p BBs.
///
/// This does not include any block from the body, including the one returned
/// by getBody().
void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
public:
/// The preheader ensures that there is only a single edge entering the loop.
/// Code that must be execute before any loop iteration can be emitted here,
@ -781,6 +834,14 @@ public:
return IndVarPHI;
}
/// Return the type of the induction variable (and the trip count).
Type *getIndVarType() const { return getIndVar()->getType(); }
/// Return the insertion point for user code before the loop.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
return {Preheader, std::prev(Preheader->end())};
};
/// Return the insertion point for user code in the body.
OpenMPIRBuilder::InsertPointTy getBodyIP() const {
return {Body, Body->begin()};

View File

@ -1164,6 +1164,252 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
return CLI;
}
/// Make \p Source branch to \p Target.
///
/// Handles two situations:
/// * \p Source already has an unconditional branch.
/// * \p Source is a degenerate block (no terminator because the BB is
/// the current head of the IR construction).
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) {
if (Instruction *Term = Source->getTerminator()) {
auto *Br = cast<BranchInst>(Term);
assert(!Br->isConditional() &&
"BB's terminator must be an unconditional branch (or degenerate)");
BasicBlock *Succ = Br->getSuccessor(0);
Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
Br->setSuccessor(0, Target);
return;
}
auto *NewBr = BranchInst::Create(Target, Source);
NewBr->setDebugLoc(DL);
}
/// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
/// after this \p OldTarget will be orphaned.
static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
BasicBlock *NewTarget, DebugLoc DL) {
for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
redirectTo(Pred, NewTarget, DL);
}
/// Determine which blocks in \p BBs are reachable from outside and remove the
/// ones that are not reachable from the function.
static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) {
SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
for (Use &U : BB->uses()) {
auto *UseInst = dyn_cast<Instruction>(U.getUser());
if (!UseInst)
continue;
if (BBsToErase.count(UseInst->getParent()))
continue;
return true;
}
return false;
};
while (true) {
bool Changed = false;
for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
if (HasRemainingUses(BB)) {
BBsToErase.erase(BB);
Changed = true;
}
}
if (!Changed)
break;
}
SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
DeleteDeadBlocks(BBVec);
}
std::vector<CanonicalLoopInfo *>
OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
ArrayRef<Value *> TileSizes) {
int NumLoops = Loops.size();
assert(TileSizes.size() == NumLoops &&
"Must pass as many tile sizes as there are loops");
assert(NumLoops >= 1 && "At least one loop to tile required");
CanonicalLoopInfo *OutermostLoop = Loops.front();
CanonicalLoopInfo *InnermostLoop = Loops.back();
Function *F = OutermostLoop->getBody()->getParent();
BasicBlock *InnerEnter = InnermostLoop->getBody();
BasicBlock *InnerLatch = InnermostLoop->getLatch();
// Collect original trip counts and induction variable to be accessible by
// index. Also, the structure of the original loops is not preserved during
// the construction of the tiled loops, so do it before we scavenge the BBs of
// any original CanonicalLoopInfo.
SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
for (CanonicalLoopInfo *L : Loops) {
OrigTripCounts.push_back(L->getTripCount());
OrigIndVars.push_back(L->getIndVar());
}
// Collect the code between loop headers. These may contain SSA definitions
// that are used in the loop nest body. To be usable with in the innermost
// body, these BasicBlocks will be sunk into the loop nest body. That is,
// these instructions may be executed more often than before the tiling.
// TODO: It would be sufficient to only sink them into body of the
// corresponding tile loop.
SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
for (int i = 0; i < NumLoops - 1; ++i) {
CanonicalLoopInfo *Surrounding = Loops[i];
CanonicalLoopInfo *Nested = Loops[i + 1];
BasicBlock *EnterBB = Surrounding->getBody();
BasicBlock *ExitBB = Nested->getHeader();
InbetweenCode.emplace_back(EnterBB, ExitBB);
}
// Compute the trip counts of the floor loops.
Builder.SetCurrentDebugLocation(DL);
Builder.restoreIP(OutermostLoop->getPreheaderIP());
SmallVector<Value *, 4> FloorCount, FloorRems;
for (int i = 0; i < NumLoops; ++i) {
Value *TileSize = TileSizes[i];
Value *OrigTripCount = OrigTripCounts[i];
Type *IVType = OrigTripCount->getType();
Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
// 0 if tripcount divides the tilesize, 1 otherwise.
// 1 means we need an additional iteration for a partial tile.
//
// Unfortunately we cannot just use the roundup-formula
// (tripcount + tilesize - 1)/tilesize
// because the summation might overflow. We do not want introduce undefined
// behavior when the untiled loop nest did not.
Value *FloorTripOverflow =
Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
FloorTripCount =
Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
"omp_floor" + Twine(i) + ".tripcount", true);
// Remember some values for later use.
FloorCount.push_back(FloorTripCount);
FloorRems.push_back(FloorTripRem);
}
// Generate the new loop nest, from the outermost to the innermost.
std::vector<CanonicalLoopInfo *> Result;
Result.reserve(NumLoops * 2);
// The basic block of the surrounding loop that enters the nest generated
// loop.
BasicBlock *Enter = OutermostLoop->getPreheader();
// The basic block of the surrounding loop where the inner code should
// continue.
BasicBlock *Continue = OutermostLoop->getAfter();
// Where the next loop basic block should be inserted.
BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
auto EmbeddNewLoop =
[this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
// Setup the position where the next embedded loop connects to this loop.
Enter = EmbeddedLoop->getBody();
Continue = EmbeddedLoop->getLatch();
OutroInsertBefore = EmbeddedLoop->getLatch();
return EmbeddedLoop;
};
auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
const Twine &NameBase) {
for (auto P : enumerate(TripCounts)) {
CanonicalLoopInfo *EmbeddedLoop =
EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
Result.push_back(EmbeddedLoop);
}
};
EmbeddNewLoops(FloorCount, "floor");
// Within the innermost floor loop, emit the code that computes the tile
// sizes.
Builder.SetInsertPoint(Enter->getTerminator());
SmallVector<Value *, 4> TileCounts;
for (int i = 0; i < NumLoops; ++i) {
CanonicalLoopInfo *FloorLoop = Result[i];
Value *TileSize = TileSizes[i];
Value *FloorIsEpilogue =
Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
Value *TileTripCount =
Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
TileCounts.push_back(TileTripCount);
}
// Create the tile loops.
EmbeddNewLoops(TileCounts, "tile");
// Insert the inbetween code into the body.
BasicBlock *BodyEnter = Enter;
BasicBlock *BodyEntered = nullptr;
for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
BasicBlock *EnterBB = P.first;
BasicBlock *ExitBB = P.second;
if (BodyEnter)
redirectTo(BodyEnter, EnterBB, DL);
else
redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
BodyEnter = nullptr;
BodyEntered = ExitBB;
}
// Append the original loop nest body into the generated loop nest body.
if (BodyEnter)
redirectTo(BodyEnter, InnerEnter, DL);
else
redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
redirectAllPredecessorsTo(InnerLatch, Continue, DL);
// Replace the original induction variable with an induction variable computed
// from the tile and floor induction variables.
Builder.restoreIP(Result.back()->getBodyIP());
for (int i = 0; i < NumLoops; ++i) {
CanonicalLoopInfo *FloorLoop = Result[i];
CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
Value *OrigIndVar = OrigIndVars[i];
Value *Size = TileSizes[i];
Value *Scale =
Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
Value *Shift =
Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
OrigIndVar->replaceAllUsesWith(Shift);
}
// Remove unused parts of the original loops.
SmallVector<BasicBlock *, 12> OldControlBBs;
OldControlBBs.reserve(6 * Loops.size());
for (CanonicalLoopInfo *Loop : Loops)
Loop->collectControlBlocks(OldControlBBs);
removeUnusedBlocksFromParent(OldControlBBs);
#ifndef NDEBUG
for (CanonicalLoopInfo *GenL : Result)
GenL->assertOK();
#endif
return Result;
}
OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
llvm::Value *BufSize, llvm::Value *CpyBuf,
@ -1570,6 +1816,16 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks(
}
}
void CanonicalLoopInfo::collectControlBlocks(
SmallVectorImpl<BasicBlock *> &BBs) {
// We only count those BBs as control block for which we do not need to
// reverse the CFG, i.e. not the loop body which can contain arbitrary control
// flow. For consistency, this also means we do not add the Body block, which
// is just the entry to the body code.
BBs.reserve(BBs.size() + 6);
BBs.append({Preheader, Header, Cond, Latch, Exit, After});
}
void CanonicalLoopInfo::assertOK() const {
#ifndef NDEBUG
if (!IsValid)
@ -1604,11 +1860,16 @@ void CanonicalLoopInfo::assertOK() const {
assert(Body);
assert(Body->getSinglePredecessor() == Cond &&
"Body only reachable from exiting block");
assert(!isa<PHINode>(Body->front()));
assert(Latch);
assert(isa<BranchInst>(Latch->getTerminator()) &&
"Latch must terminate with unconditional branch");
assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
// TODO: To support simple redirecting of the end of the body code that has
// multiple; introduce another auxiliary basic block like preheader and after.
assert(Latch->getSinglePredecessor() != nullptr);
assert(!isa<PHINode>(Latch->front()));
assert(Exit);
assert(isa<BranchInst>(Exit->getTerminator()) &&
@ -1619,6 +1880,7 @@ void CanonicalLoopInfo::assertOK() const {
assert(After);
assert(After->getSinglePredecessor() == Exit &&
"After block only reachable from exit block");
assert(After->empty() || !isa<PHINode>(After->front()));
Instruction *IndVar = getIndVar();
assert(IndVar && "Canonical induction variable not found?");
@ -1626,6 +1888,17 @@ void CanonicalLoopInfo::assertOK() const {
"Induction variable must be an integer");
assert(cast<PHINode>(IndVar)->getParent() == Header &&
"Induction variable must be a PHI in the loop header");
assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
assert(
cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
->isOne());
Value *TripCount = getTripCount();
assert(TripCount && "Loop trip count not found?");

View File

@ -325,7 +325,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
"Pred is not a predecessor!");
// Return early if there are no PHI nodes to update.
if (!isa<PHINode>(begin()))
if (empty() || !isa<PHINode>(begin()))
return;
unsigned NumPreds = cast<PHINode>(front()).getNumIncomingValues();

View File

@ -23,6 +23,95 @@ using namespace omp;
namespace {
/// Create an instruction that uses the values in \p Values. We use "printf"
/// just because it is often used for this purpose in test code, but it is never
/// executed here.
static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr,
ArrayRef<Value *> Values) {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
Constant *Indices[] = {Zero, Zero};
Constant *FormatStrConst =
ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices);
Function *PrintfDecl = M->getFunction("printf");
if (!PrintfDecl) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true);
PrintfDecl = Function::Create(Ty, Linkage, "printf", M);
}
SmallVector<Value *, 4> Args;
Args.push_back(FormatStrConst);
Args.append(Values.begin(), Values.end());
return Builder.CreateCall(PrintfDecl, Args);
}
/// Verify that blocks in \p RefOrder are corresponds to the depth-first visit
/// order the control flow of \p F.
///
/// This is an easy way to verify the branching structure of the CFG without
/// checking every branch instruction individually. For the CFG of a
/// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering
/// the body, i.e. the DFS order corresponds to the execution order with one
/// loop iteration.
static testing::AssertionResult
verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
df_iterator_default_set<BasicBlock *, 16> Visited;
auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited);
BasicBlock *Prev = nullptr;
for (BasicBlock *BB : DFS) {
if (It != E && BB == *It) {
Prev = *It;
++It;
}
}
if (It == E)
return testing::AssertionSuccess();
if (!Prev)
return testing::AssertionFailure()
<< "Did not find " << (*It)->getName() << " in control flow";
return testing::AssertionFailure()
<< "Expected " << Prev->getName() << " before " << (*It)->getName()
<< " in control flow";
}
/// Verify that blocks in \p RefOrder are in the same relative order in the
/// linked lists of blocks in \p F. The linked list may contain additional
/// blocks in-between.
///
/// While the order in the linked list is not relevant for semantics, keeping
/// the order roughly in execution order makes its printout easier to read.
static testing::AssertionResult
verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
BasicBlock *Prev = nullptr;
for (BasicBlock &BB : *F) {
if (It != E && &BB == *It) {
Prev = *It;
++It;
}
}
if (It == E)
return testing::AssertionSuccess();
if (!Prev)
return testing::AssertionFailure() << "Did not find " << (*It)->getName()
<< " in function " << F->getName();
return testing::AssertionFailure()
<< "Expected " << Prev->getName() << " before " << (*It)->getName()
<< " in function " << F->getName();
}
class OpenMPIRBuilderTest : public testing::Test {
protected:
void SetUp() override {
@ -1071,6 +1160,366 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
EXPECT_FALSE(verifyModule(*M, &errs()));
}
TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
Value *TripCount = F->getArg(0);
BasicBlock *BodyCode = nullptr;
Instruction *Call = nullptr;
auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
Builder.restoreIP(CodeGenIP);
BodyCode = Builder.GetInsertBlock();
// Add something that consumes the induction variable to the body.
Call = createPrintfCall(Builder, "%d\\n", {LC});
};
CanonicalLoopInfo *Loop =
OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount);
// Finalize the function.
Builder.restoreIP(Loop->getAfterIP());
Builder.CreateRetVoid();
Instruction *OrigIndVar = Loop->getIndVar();
EXPECT_EQ(Call->getOperand(1), OrigIndVar);
// Tile the loop.
Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7));
std::vector<CanonicalLoopInfo *> GenLoops =
OMPBuilder.tileLoops(DL, {Loop}, {TileSize});
OMPBuilder.finalize();
EXPECT_FALSE(verifyModule(*M, &errs()));
EXPECT_EQ(GenLoops.size(), 2);
CanonicalLoopInfo *Floor = GenLoops[0];
CanonicalLoopInfo *Tile = GenLoops[1];
BasicBlock *RefOrder[] = {
Floor->getPreheader(), Floor->getHeader(), Floor->getCond(),
Floor->getBody(), Tile->getPreheader(), Tile->getHeader(),
Tile->getCond(), Tile->getBody(), BodyCode,
Tile->getLatch(), Tile->getExit(), Tile->getAfter(),
Floor->getLatch(), Floor->getExit(), Floor->getAfter(),
};
EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
EXPECT_TRUE(verifyListOrder(F, RefOrder));
// Check the induction variable.
EXPECT_EQ(Call->getParent(), BodyCode);
auto *Shift = cast<AddOperator>(Call->getOperand(1));
EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody());
EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar());
auto *Scale = cast<MulOperator>(Shift->getOperand(0));
EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody());
EXPECT_EQ(Scale->getOperand(0), TileSize);
EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar());
}
TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
Value *TripCount = F->getArg(0);
Type *LCTy = TripCount->getType();
BasicBlock *BodyCode = nullptr;
CanonicalLoopInfo *InnerLoop = nullptr;
auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
llvm::Value *OuterLC) {
auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
llvm::Value *InnerLC) {
Builder.restoreIP(InnerCodeGenIP);
BodyCode = Builder.GetInsertBlock();
// Add something that consumes the induction variables to the body.
createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
};
InnerLoop = OMPBuilder.createCanonicalLoop(
OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner");
};
CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
Loc, OuterLoopBodyGenCB, TripCount, "outer");
// Finalize the function.
Builder.restoreIP(OuterLoop->getAfterIP());
Builder.CreateRetVoid();
// Tile to loop nest.
Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11));
Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7));
std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops(
DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize});
OMPBuilder.finalize();
EXPECT_FALSE(verifyModule(*M, &errs()));
EXPECT_EQ(GenLoops.size(), 4);
CanonicalLoopInfo *Floor1 = GenLoops[0];
CanonicalLoopInfo *Floor2 = GenLoops[1];
CanonicalLoopInfo *Tile1 = GenLoops[2];
CanonicalLoopInfo *Tile2 = GenLoops[3];
BasicBlock *RefOrder[] = {
Floor1->getPreheader(),
Floor1->getHeader(),
Floor1->getCond(),
Floor1->getBody(),
Floor2->getPreheader(),
Floor2->getHeader(),
Floor2->getCond(),
Floor2->getBody(),
Tile1->getPreheader(),
Tile1->getHeader(),
Tile1->getCond(),
Tile1->getBody(),
Tile2->getPreheader(),
Tile2->getHeader(),
Tile2->getCond(),
Tile2->getBody(),
BodyCode,
Tile2->getLatch(),
Tile2->getExit(),
Tile2->getAfter(),
Tile1->getLatch(),
Tile1->getExit(),
Tile1->getAfter(),
Floor2->getLatch(),
Floor2->getExit(),
Floor2->getAfter(),
Floor1->getLatch(),
Floor1->getExit(),
Floor1->getAfter(),
};
EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
EXPECT_TRUE(verifyListOrder(F, RefOrder));
}
TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
Value *TripCount = F->getArg(0);
Type *LCTy = TripCount->getType();
Value *OuterStartVal = ConstantInt::get(LCTy, 2);
Value *OuterStopVal = TripCount;
Value *OuterStep = ConstantInt::get(LCTy, 5);
Value *InnerStartVal = ConstantInt::get(LCTy, 13);
Value *InnerStopVal = TripCount;
Value *InnerStep = ConstantInt::get(LCTy, 3);
// Fix an insertion point for ComputeIP.
BasicBlock *LoopNextEnter =
BasicBlock::Create(M->getContext(), "loopnest.enter", F,
Builder.GetInsertBlock()->getNextNode());
BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()};
OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL});
BasicBlock *BodyCode = nullptr;
CanonicalLoopInfo *InnerLoop = nullptr;
CallInst *Call = nullptr;
auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
llvm::Value *OuterLC) {
auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
llvm::Value *InnerLC) {
Builder.restoreIP(InnerCodeGenIP);
BodyCode = Builder.GetInsertBlock();
// Add something that consumes the induction variable to the body.
Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
};
InnerLoop = OMPBuilder.createCanonicalLoop(
OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal,
InnerStep, false, false, ComputeIP, "inner");
};
CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false,
false, ComputeIP, "outer");
// Finalize the function
Builder.restoreIP(OuterLoop->getAfterIP());
Builder.CreateRetVoid();
// Tile the loop nest.
Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11));
Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7));
std::vector<CanonicalLoopInfo *> GenLoops =
OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1});
OMPBuilder.finalize();
EXPECT_FALSE(verifyModule(*M, &errs()));
EXPECT_EQ(GenLoops.size(), 4);
CanonicalLoopInfo *Floor0 = GenLoops[0];
CanonicalLoopInfo *Floor1 = GenLoops[1];
CanonicalLoopInfo *Tile0 = GenLoops[2];
CanonicalLoopInfo *Tile1 = GenLoops[3];
BasicBlock *RefOrder[] = {
Floor0->getPreheader(),
Floor0->getHeader(),
Floor0->getCond(),
Floor0->getBody(),
Floor1->getPreheader(),
Floor1->getHeader(),
Floor1->getCond(),
Floor1->getBody(),
Tile0->getPreheader(),
Tile0->getHeader(),
Tile0->getCond(),
Tile0->getBody(),
Tile1->getPreheader(),
Tile1->getHeader(),
Tile1->getCond(),
Tile1->getBody(),
BodyCode,
Tile1->getLatch(),
Tile1->getExit(),
Tile1->getAfter(),
Tile0->getLatch(),
Tile0->getExit(),
Tile0->getAfter(),
Floor1->getLatch(),
Floor1->getExit(),
Floor1->getAfter(),
Floor0->getLatch(),
Floor0->getExit(),
Floor0->getAfter(),
};
EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
EXPECT_TRUE(verifyListOrder(F, RefOrder));
EXPECT_EQ(Call->getParent(), BodyCode);
auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1));
EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal);
auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0));
EXPECT_EQ(RangeScale0->getOperand(1), OuterStep);
auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0));
EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody());
EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar());
auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0));
EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody());
EXPECT_EQ(TileScale0->getOperand(0), TileSize0);
EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar());
auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2));
EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode);
EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal);
auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0));
EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode);
EXPECT_EQ(RangeScale1->getOperand(1), InnerStep);
auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0));
EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody());
EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar());
auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0));
EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody());
EXPECT_EQ(TileScale1->getOperand(0), TileSize1);
EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar());
}
TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
IRBuilder<> Builder(BB);
// Create a loop, tile it, and extract its trip count. All input values are
// constant and IRBuilder evaluates all-constant arithmetic inplace, such that
// the floor trip count itself will be a ConstantInt. Unfortunately we cannot
// do the same for the tile loop.
auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step,
bool IsSigned, bool InclusiveStop,
int64_t TileSize) -> uint64_t {
OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
Type *LCTy = Type::getInt16Ty(Ctx);
Value *StartVal = ConstantInt::get(LCTy, Start);
Value *StopVal = ConstantInt::get(LCTy, Stop);
Value *StepVal = ConstantInt::get(LCTy, Step);
// Generate a loop.
auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
CanonicalLoopInfo *Loop =
OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
StepVal, IsSigned, InclusiveStop);
// Tile the loop.
Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
std::vector<CanonicalLoopInfo *> GenLoops =
OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal});
// Set the insertion pointer to after loop, where the next loop will be
// emitted.
Builder.restoreIP(Loop->getAfterIP());
// Extract the trip count.
CanonicalLoopInfo *FloorLoop = GenLoops[0];
Value *FloorTripCount = FloorLoop->getTripCount();
return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue();
};
// Empty iteration domain.
EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0);
EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0);
EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0);
EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0);
EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0);
// Only complete tiles.
EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2);
EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2);
EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2);
EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2);
EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2);
EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2);
// Only a partial tile.
EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1);
EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1);
EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1);
EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1);
EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1);
// Complete and partial tiles.
EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2);
EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3);
EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2);
EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5);
EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5);
// Close to 16-bit integer range.
EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFF);
EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFF / 7 + 1);
EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFF / 7 + 1);
EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFF / 7 + 1);
EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFF / 7 + 1);
EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1);
EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1);
// Finalize the function.
Builder.CreateRetVoid();
OMPBuilder.finalize();
EXPECT_FALSE(verifyModule(*M, &errs()));
}
TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);