2018-11-07 03:58:42 +08:00
|
|
|
//===- LoopTiling.cpp --- Loop tiling pass ------------------------------*-===//
|
|
|
|
//
|
|
|
|
// Copyright 2019 The MLIR Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
// =============================================================================
|
|
|
|
//
|
|
|
|
// This file implements a pass to tile loop nests.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "mlir/Analysis/AffineAnalysis.h"
|
|
|
|
#include "mlir/Analysis/AffineStructures.h"
|
|
|
|
#include "mlir/Analysis/LoopAnalysis.h"
|
|
|
|
#include "mlir/IR/Builders.h"
|
|
|
|
#include "mlir/Pass.h"
|
|
|
|
#include "mlir/Transforms/LoopUtils.h"
|
|
|
|
#include "mlir/Transforms/Passes.h"
|
|
|
|
#include "mlir/Transforms/Utils.h"
|
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
|
|
|
|
using namespace mlir;
|
|
|
|
|
2019-01-26 14:14:04 +08:00
|
|
|
#define DEBUG_TYPE "loop-tile"
|
|
|
|
|
|
|
|
static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
|
|
|
|
|
2018-11-07 03:58:42 +08:00
|
|
|
// Tile size for all loops.
|
|
|
|
static llvm::cl::opt<unsigned>
|
|
|
|
clTileSize("tile-size", llvm::cl::Hidden,
|
2019-01-26 14:14:04 +08:00
|
|
|
llvm::cl::desc("Use this tile size for all loops"),
|
|
|
|
llvm::cl::cat(clOptionsCategory));
|
2018-11-07 03:58:42 +08:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2018-12-29 00:48:09 +08:00
|
|
|
/// A pass to perform loop tiling on all suitable loop nests of a Function.
|
2018-11-07 03:58:42 +08:00
|
|
|
struct LoopTiling : public FunctionPass {
|
2018-11-08 02:24:03 +08:00
|
|
|
LoopTiling() : FunctionPass(&LoopTiling::passID) {}
|
2018-12-31 15:10:35 +08:00
|
|
|
PassResult runOnFunction(Function *f) override;
|
2018-11-17 12:12:06 +08:00
|
|
|
constexpr static unsigned kDefaultTileSize = 4;
|
2018-11-07 10:34:18 +08:00
|
|
|
|
|
|
|
static char passID;
|
2018-11-07 03:58:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
2018-11-07 10:34:18 +08:00
|
|
|
char LoopTiling::passID = 0;
|
|
|
|
|
2018-11-07 03:58:42 +08:00
|
|
|
/// Creates a pass to perform loop tiling on all suitable loop nests of an
|
2018-12-29 00:48:09 +08:00
|
|
|
/// Function.
|
2018-11-07 03:58:42 +08:00
|
|
|
FunctionPass *mlir::createLoopTilingPass() { return new LoopTiling(); }
|
|
|
|
|
2018-12-29 08:05:35 +08:00
|
|
|
// Move the loop body of ForInst 'src' from 'src' into the specified location in
|
2018-11-07 03:58:42 +08:00
|
|
|
// destination's body.
|
2018-12-29 08:05:35 +08:00
|
|
|
static inline void moveLoopBody(ForInst *src, ForInst *dest,
|
2018-12-29 05:07:39 +08:00
|
|
|
Block::iterator loc) {
|
|
|
|
dest->getBody()->getInstructions().splice(loc,
|
|
|
|
src->getBody()->getInstructions());
|
2018-11-07 03:58:42 +08:00
|
|
|
}
|
|
|
|
|
2018-12-29 08:05:35 +08:00
|
|
|
// Move the loop body of ForInst 'src' from 'src' to the start of dest's body.
|
|
|
|
static inline void moveLoopBody(ForInst *src, ForInst *dest) {
|
2018-12-24 00:17:48 +08:00
|
|
|
moveLoopBody(src, dest, dest->getBody()->begin());
|
2018-11-07 03:58:42 +08:00
|
|
|
}
|
|
|
|
|
2018-12-08 09:35:49 +08:00
|
|
|
/// Constructs and sets new loop bounds after tiling for the case of
|
2018-11-07 03:58:42 +08:00
|
|
|
/// hyper-rectangular index sets, where the bounds of one dimension do not
|
|
|
|
/// depend on other dimensions. Bounds of each dimension can thus be treated
|
|
|
|
/// independently, and deriving the new bounds is much simpler and faster
|
|
|
|
/// than for the case of tiling arbitrary polyhedral shapes.
|
2018-12-29 08:05:35 +08:00
|
|
|
static void constructTiledIndexSetHyperRect(ArrayRef<ForInst *> origLoops,
|
|
|
|
ArrayRef<ForInst *> newLoops,
|
2018-12-08 09:35:49 +08:00
|
|
|
ArrayRef<unsigned> tileSizes) {
|
2018-11-07 03:58:42 +08:00
|
|
|
assert(!origLoops.empty());
|
|
|
|
assert(origLoops.size() == tileSizes.size());
|
|
|
|
|
2018-12-28 07:06:22 +08:00
|
|
|
FuncBuilder b(origLoops[0]);
|
2018-11-07 03:58:42 +08:00
|
|
|
unsigned width = origLoops.size();
|
|
|
|
|
|
|
|
// Bounds for tile space loops.
|
|
|
|
for (unsigned i = 0; i < width; i++) {
|
|
|
|
auto lbOperands = origLoops[i]->getLowerBoundOperands();
|
|
|
|
auto ubOperands = origLoops[i]->getUpperBoundOperands();
|
2019-01-08 07:06:32 +08:00
|
|
|
SmallVector<Value *, 4> newLbOperands(lbOperands);
|
|
|
|
SmallVector<Value *, 4> newUbOperands(ubOperands);
|
2018-11-07 03:58:42 +08:00
|
|
|
newLoops[i]->setLowerBound(newLbOperands, origLoops[i]->getLowerBoundMap());
|
|
|
|
newLoops[i]->setUpperBound(newUbOperands, origLoops[i]->getUpperBoundMap());
|
|
|
|
newLoops[i]->setStep(tileSizes[i]);
|
|
|
|
}
|
|
|
|
// Bounds for intra-tile loops.
|
|
|
|
for (unsigned i = 0; i < width; i++) {
|
|
|
|
int64_t largestDiv = getLargestDivisorOfTripCount(*origLoops[i]);
|
|
|
|
auto mayBeConstantCount = getConstantTripCount(*origLoops[i]);
|
2018-12-08 09:35:49 +08:00
|
|
|
// The lower bound is just the tile-space loop.
|
|
|
|
AffineMap lbMap = b.getDimIdentityMap();
|
2019-01-27 04:40:12 +08:00
|
|
|
newLoops[width + i]->setLowerBound(
|
|
|
|
/*operands=*/newLoops[i]->getInductionVar(), lbMap);
|
2018-11-09 03:46:18 +08:00
|
|
|
|
|
|
|
// Set the upper bound.
|
2018-11-07 03:58:42 +08:00
|
|
|
if (mayBeConstantCount.hasValue() &&
|
|
|
|
mayBeConstantCount.getValue() < tileSizes[i]) {
|
2018-11-09 03:46:18 +08:00
|
|
|
// Trip count is less than tile size; upper bound is the trip count.
|
2018-12-08 09:35:49 +08:00
|
|
|
auto ubMap = b.getConstantAffineMap(mayBeConstantCount.getValue());
|
2018-11-07 03:58:42 +08:00
|
|
|
newLoops[width + i]->setUpperBoundMap(ubMap);
|
2018-11-09 03:46:18 +08:00
|
|
|
} else if (largestDiv % tileSizes[i] != 0) {
|
|
|
|
// Intra-tile loop ii goes from i to min(i + tileSize, ub_i).
|
2018-12-08 09:35:49 +08:00
|
|
|
// Construct the upper bound map; the operands are the original operands
|
|
|
|
// with 'i' (tile-space loop) appended to it. The new upper bound map is
|
|
|
|
// the original one with an additional expression i + tileSize appended.
|
2018-12-28 06:35:10 +08:00
|
|
|
SmallVector<Value *, 4> ubOperands(origLoops[i]->getUpperBoundOperands());
|
2019-01-27 04:40:12 +08:00
|
|
|
ubOperands.push_back(newLoops[i]->getInductionVar());
|
2018-12-08 09:35:49 +08:00
|
|
|
|
|
|
|
auto origUbMap = origLoops[i]->getUpperBoundMap();
|
|
|
|
SmallVector<AffineExpr, 4> boundExprs;
|
|
|
|
boundExprs.reserve(1 + origUbMap.getNumResults());
|
|
|
|
auto dim = b.getAffineDimExpr(origUbMap.getNumInputs());
|
|
|
|
// The new upper bound map is the original one with an additional
|
|
|
|
// expression i + tileSize appended.
|
|
|
|
boundExprs.push_back(dim + tileSizes[i]);
|
2019-01-08 07:06:32 +08:00
|
|
|
boundExprs.append(origUbMap.getResults().begin(),
|
2018-12-08 09:35:49 +08:00
|
|
|
origUbMap.getResults().end());
|
|
|
|
auto ubMap =
|
|
|
|
b.getAffineMap(origUbMap.getNumInputs() + 1, 0, boundExprs, {});
|
|
|
|
newLoops[width + i]->setUpperBound(/*operands=*/ubOperands, ubMap);
|
2018-11-09 03:46:18 +08:00
|
|
|
} else {
|
|
|
|
// No need of the min expression.
|
2018-12-08 09:35:49 +08:00
|
|
|
auto dim = b.getAffineDimExpr(0);
|
|
|
|
auto ubMap = b.getAffineMap(1, 0, dim + tileSizes[i], {});
|
2019-01-27 04:40:12 +08:00
|
|
|
newLoops[width + i]->setUpperBound(newLoops[i]->getInductionVar(), ubMap);
|
2018-11-07 03:58:42 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tiles the specified band of perfectly nested loops creating tile-space loops
|
|
|
|
/// and intra-tile loops. A band is a contiguous set of loops.
|
|
|
|
// TODO(bondhugula): handle non hyper-rectangular spaces.
|
2018-12-29 08:05:35 +08:00
|
|
|
UtilResult mlir::tileCodeGen(ArrayRef<ForInst *> band,
|
2018-11-07 03:58:42 +08:00
|
|
|
ArrayRef<unsigned> tileSizes) {
|
|
|
|
assert(!band.empty());
|
|
|
|
assert(band.size() == tileSizes.size());
|
2018-12-29 08:05:35 +08:00
|
|
|
// Check if the supplied for inst's are all successively nested.
|
2018-11-07 03:58:42 +08:00
|
|
|
for (unsigned i = 1, e = band.size(); i < e; i++) {
|
2018-12-29 08:05:35 +08:00
|
|
|
assert(band[i]->getParentInst() == band[i - 1]);
|
2018-11-07 03:58:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
auto origLoops = band;
|
|
|
|
|
2018-12-29 08:05:35 +08:00
|
|
|
ForInst *rootForInst = origLoops[0];
|
|
|
|
auto loc = rootForInst->getLoc();
|
2018-11-07 03:58:42 +08:00
|
|
|
// Note that width is at least one since band isn't empty.
|
|
|
|
unsigned width = band.size();
|
|
|
|
|
2018-12-29 08:05:35 +08:00
|
|
|
SmallVector<ForInst *, 12> newLoops(2 * width);
|
|
|
|
ForInst *innermostPointLoop;
|
2018-11-07 03:58:42 +08:00
|
|
|
|
|
|
|
// The outermost among the loops as we add more..
|
2018-12-29 08:05:35 +08:00
|
|
|
auto *topLoop = rootForInst;
|
2018-11-07 03:58:42 +08:00
|
|
|
|
|
|
|
// Add intra-tile (or point) loops.
|
|
|
|
for (unsigned i = 0; i < width; i++) {
|
2018-12-28 07:06:22 +08:00
|
|
|
FuncBuilder b(topLoop);
|
2018-11-07 03:58:42 +08:00
|
|
|
// Loop bounds will be set later.
|
|
|
|
auto *pointLoop = b.createFor(loc, 0, 0);
|
2018-12-29 05:07:39 +08:00
|
|
|
pointLoop->getBody()->getInstructions().splice(
|
|
|
|
pointLoop->getBody()->begin(), topLoop->getBlock()->getInstructions(),
|
2018-12-24 00:17:48 +08:00
|
|
|
topLoop);
|
2018-11-07 03:58:42 +08:00
|
|
|
newLoops[2 * width - 1 - i] = pointLoop;
|
|
|
|
topLoop = pointLoop;
|
|
|
|
if (i == 0)
|
|
|
|
innermostPointLoop = pointLoop;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add tile space loops;
|
|
|
|
for (unsigned i = width; i < 2 * width; i++) {
|
2018-12-28 07:06:22 +08:00
|
|
|
FuncBuilder b(topLoop);
|
2018-11-07 03:58:42 +08:00
|
|
|
// Loop bounds will be set later.
|
|
|
|
auto *tileSpaceLoop = b.createFor(loc, 0, 0);
|
2018-12-29 05:07:39 +08:00
|
|
|
tileSpaceLoop->getBody()->getInstructions().splice(
|
|
|
|
tileSpaceLoop->getBody()->begin(),
|
|
|
|
topLoop->getBlock()->getInstructions(), topLoop);
|
2018-11-07 03:58:42 +08:00
|
|
|
newLoops[2 * width - i - 1] = tileSpaceLoop;
|
|
|
|
topLoop = tileSpaceLoop;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Move the loop body of the original nest to the new one.
|
|
|
|
moveLoopBody(origLoops[origLoops.size() - 1], innermostPointLoop);
|
|
|
|
|
2019-01-27 04:40:12 +08:00
|
|
|
SmallVector<Value *, 8> origLoopIVs = extractForInductionVars(band);
|
|
|
|
SmallVector<Optional<Value *>, 6> ids(origLoopIVs.begin(), origLoopIVs.end());
|
2018-12-19 08:38:24 +08:00
|
|
|
FlatAffineConstraints cst;
|
|
|
|
getIndexSet(band, &cst);
|
2018-12-08 09:35:49 +08:00
|
|
|
|
|
|
|
if (!cst.isHyperRectangular(0, width)) {
|
2018-12-29 08:05:35 +08:00
|
|
|
rootForInst->emitError("tiled code generation unimplemented for the"
|
2018-12-08 09:35:49 +08:00
|
|
|
"non-hyperrectangular case");
|
2018-11-07 03:58:42 +08:00
|
|
|
return UtilResult::Failure;
|
|
|
|
}
|
|
|
|
|
2018-12-08 09:35:49 +08:00
|
|
|
constructTiledIndexSetHyperRect(origLoops, newLoops, tileSizes);
|
|
|
|
// In this case, the point loop IVs just replace the original ones.
|
|
|
|
for (unsigned i = 0; i < width; i++) {
|
2019-01-27 04:40:12 +08:00
|
|
|
origLoopIVs[i]->replaceAllUsesWith(newLoops[i + width]->getInductionVar());
|
2018-12-08 09:35:49 +08:00
|
|
|
}
|
|
|
|
|
2018-11-07 03:58:42 +08:00
|
|
|
// Erase the old loop nest.
|
2018-12-29 08:05:35 +08:00
|
|
|
rootForInst->erase();
|
2018-11-07 03:58:42 +08:00
|
|
|
|
|
|
|
return UtilResult::Success;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Identify valid and profitable bands of loops to tile. This is currently just
|
|
|
|
// a temporary placeholder to test the mechanics of tiled code generation.
|
|
|
|
// Returns all maximal outermost perfect loop nests to tile.
|
2018-12-29 00:48:09 +08:00
|
|
|
static void getTileableBands(Function *f,
|
2018-12-29 08:05:35 +08:00
|
|
|
std::vector<SmallVector<ForInst *, 6>> *bands) {
|
|
|
|
// Get maximal perfect nest of 'for' insts starting from root (inclusive).
|
|
|
|
auto getMaximalPerfectLoopNest = [&](ForInst *root) {
|
|
|
|
SmallVector<ForInst *, 6> band;
|
|
|
|
ForInst *currInst = root;
|
2018-11-30 07:25:40 +08:00
|
|
|
do {
|
2018-12-29 08:05:35 +08:00
|
|
|
band.push_back(currInst);
|
|
|
|
} while (currInst->getBody()->getInstructions().size() == 1 &&
|
2018-12-30 07:33:43 +08:00
|
|
|
(currInst = dyn_cast<ForInst>(&currInst->getBody()->front())));
|
2018-11-07 03:58:42 +08:00
|
|
|
bands->push_back(band);
|
|
|
|
};
|
|
|
|
|
2018-12-30 07:33:43 +08:00
|
|
|
for (auto &block : *f) {
|
|
|
|
for (auto &inst : block) {
|
|
|
|
auto *forInst = dyn_cast<ForInst>(&inst);
|
|
|
|
if (!forInst)
|
|
|
|
continue;
|
|
|
|
getMaximalPerfectLoopNest(forInst);
|
|
|
|
}
|
2018-11-07 03:58:42 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-31 15:10:35 +08:00
|
|
|
PassResult LoopTiling::runOnFunction(Function *f) {
|
2018-12-29 08:05:35 +08:00
|
|
|
std::vector<SmallVector<ForInst *, 6>> bands;
|
2018-11-07 03:58:42 +08:00
|
|
|
getTileableBands(f, &bands);
|
|
|
|
|
|
|
|
// Temporary tile sizes.
|
|
|
|
unsigned tileSize =
|
|
|
|
clTileSize.getNumOccurrences() > 0 ? clTileSize : kDefaultTileSize;
|
|
|
|
|
|
|
|
for (const auto &band : bands) {
|
|
|
|
SmallVector<unsigned, 6> tileSizes(band.size(), tileSize);
|
|
|
|
if (tileCodeGen(band, tileSizes)) {
|
|
|
|
return failure();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return success();
|
|
|
|
}
|
2018-11-07 10:34:18 +08:00
|
|
|
|
|
|
|
static PassRegistration<LoopTiling> pass("loop-tile", "Tile loop nests");
|