forked from OSchip/llvm-project
Introduce loop tiling code generation (hyper-rectangular case)
- simple perfectly nested band tiling with fixed tile sizes. - only the hyper-rectangular case is handled, with other limitations of getIndexSet applying (constant loop bounds, etc.); once the latter utility is extended, tiled code generation should become more general. - Add FlatAffineConstraints::isHyperRectangular() PiperOrigin-RevId: 220324933
This commit is contained in:
parent
5e01000d46
commit
6cd5d5c544
|
@ -473,6 +473,10 @@ public:
|
|||
SmallVectorImpl<AffineMap> *ubs,
|
||||
MLIRContext *context);
|
||||
|
||||
/// Returns true if the set is hyper-rectangular on the specified contiguous
|
||||
/// set of identifiers.
|
||||
bool isHyperRectangular(unsigned pos, unsigned num) const;
|
||||
|
||||
// More expensive ones.
|
||||
void removeDuplicates();
|
||||
|
||||
|
|
|
@ -86,6 +86,9 @@ AffineMap getUnrolledLoopUpperBound(const ForStmt &forStmt,
|
|||
UtilResult stmtBodySkew(ForStmt *forStmt, ArrayRef<uint64_t> delays,
|
||||
bool unrollPrologueEpilogue = false);
|
||||
|
||||
/// Tiles the specified band of perfectly nested loops creating tile-space loops
|
||||
/// and intra-tile loops. A band is a contiguous set of loops.
|
||||
UtilResult tileCodeGen(ArrayRef<ForStmt *> band, ArrayRef<unsigned> tileSizes);
|
||||
|
||||
} // end namespace mlir
|
||||
|
||||
|
|
|
@ -67,6 +67,9 @@ FunctionPass *createComposeAffineMapsPass();
|
|||
/// generated CFG functions.
|
||||
ModulePass *createConvertToCFGPass();
|
||||
|
||||
/// Creates a pass to perform tiling on loop nests.
|
||||
FunctionPass *createLoopTilingPass();
|
||||
|
||||
} // end namespace mlir
|
||||
|
||||
#endif // MLIR_TRANSFORMS_PASSES_H
|
||||
|
|
|
@ -1100,7 +1100,6 @@ bool FlatAffineConstraints::getDimensionBounds(unsigned pos, unsigned num,
|
|||
MLIRContext *context) {
|
||||
assert(pos + num < getNumCols());
|
||||
|
||||
// Only constant dim bounds for now.
|
||||
projectOut(0, pos);
|
||||
projectOut(pos + num, getNumIds() - num);
|
||||
|
||||
|
@ -1108,6 +1107,7 @@ bool FlatAffineConstraints::getDimensionBounds(unsigned pos, unsigned num,
|
|||
ubs->resize(num, AffineMap::Null());
|
||||
|
||||
for (int i = static_cast<int>(num) - 1; i >= 0; i--) {
|
||||
// Only constant dim bounds for now.
|
||||
auto lb = getConstantLowerBound(i);
|
||||
auto ub = getConstantUpperBound(i);
|
||||
// TODO(mlir-team): handle arbitrary bounds.
|
||||
|
@ -1168,10 +1168,37 @@ Optional<int64_t> FlatAffineConstraints::getConstantUpperBound(unsigned pos) {
|
|||
return ub;
|
||||
}
|
||||
|
||||
// A simple (naive and conservative) check for hyper-rectangularlity.
|
||||
bool FlatAffineConstraints::isHyperRectangular(unsigned pos,
|
||||
unsigned num) const {
|
||||
assert(pos < getNumCols() - 1);
|
||||
// Check for two non-zero coefficients in the range [pos, pos + sum).
|
||||
for (unsigned r = 0; r < getNumInequalities(); r++) {
|
||||
unsigned sum = 0;
|
||||
for (unsigned c = pos; c < pos + num; c++) {
|
||||
if (atIneq(r, c) != 0)
|
||||
sum++;
|
||||
}
|
||||
if (sum > 1)
|
||||
return false;
|
||||
}
|
||||
for (unsigned r = 0; r < getNumEqualities(); r++) {
|
||||
unsigned sum = 0;
|
||||
for (unsigned c = pos; c < pos + num; c++) {
|
||||
if (atEq(r, c) != 0)
|
||||
sum++;
|
||||
}
|
||||
if (sum > 1)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void FlatAffineConstraints::print(raw_ostream &os) const {
|
||||
assert(inequalities.size() == getNumInequalities() * numReservedCols);
|
||||
assert(equalities.size() == getNumEqualities() * numReservedCols);
|
||||
os << "\nConstraints:\n";
|
||||
os << "\nConstraints (" << getNumDimIds() << " dims, " << getNumSymbolIds()
|
||||
<< " symbols, " << getNumLocalIds() << " locals): \n";
|
||||
for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
|
||||
for (unsigned j = 0; j < getNumCols(); ++j) {
|
||||
os << atEq(i, j) << " ";
|
||||
|
|
|
@ -0,0 +1,240 @@
|
|||
//===- LoopTiling.cpp --- Loop tiling pass ------------------------------*-===//
|
||||
//
|
||||
// Copyright 2019 The MLIR Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
// =============================================================================
|
||||
//
|
||||
// This file implements a pass to tile loop nests.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
#include "mlir/Analysis/AffineStructures.h"
|
||||
#include "mlir/Analysis/LoopAnalysis.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/Pass.h"
|
||||
#include "mlir/Transforms/LoopUtils.h"
|
||||
#include "mlir/Transforms/Passes.h"
|
||||
#include "mlir/Transforms/Utils.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
// Tile size for all loops.
|
||||
static llvm::cl::opt<unsigned>
|
||||
clTileSize("tile-size", llvm::cl::Hidden,
|
||||
llvm::cl::desc("Use this tile size for all loops"));
|
||||
|
||||
namespace {
|
||||
|
||||
/// A pass to perform loop tiling on all suitable loop nests of an MLFunction.
|
||||
struct LoopTiling : public FunctionPass {
|
||||
PassResult runOnMLFunction(MLFunction *f) override;
|
||||
constexpr static unsigned kDefaultTileSize = 32;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
/// Creates a pass to perform loop tiling on all suitable loop nests of an
|
||||
/// MLFunction.
|
||||
FunctionPass *mlir::createLoopTilingPass() { return new LoopTiling(); }
|
||||
|
||||
// Move the loop body of ForStmt 'src' from 'src' into the specified location in
|
||||
// destination's body.
|
||||
static inline void moveLoopBody(ForStmt *src, ForStmt *dest,
|
||||
StmtBlock::iterator loc) {
|
||||
dest->getStatements().splice(loc, src->getStatements());
|
||||
}
|
||||
|
||||
// Move the loop body of ForStmt 'src' from 'src' to the start of dest's body.
|
||||
static inline void moveLoopBody(ForStmt *src, ForStmt *dest) {
|
||||
moveLoopBody(src, dest, dest->begin());
|
||||
}
|
||||
|
||||
/// Constructs/sets new loop bounds after tiling for the case of
|
||||
/// hyper-rectangular index sets, where the bounds of one dimension do not
|
||||
/// depend on other dimensions. Bounds of each dimension can thus be treated
|
||||
/// independently, and deriving the new bounds is much simpler and faster
|
||||
/// than for the case of tiling arbitrary polyhedral shapes.
|
||||
static bool setTiledIndexSetHyperRect(ArrayRef<ForStmt *> origLoops,
|
||||
ArrayRef<ForStmt *> newLoops,
|
||||
ArrayRef<unsigned> tileSizes) {
|
||||
assert(!origLoops.empty());
|
||||
assert(origLoops.size() == tileSizes.size());
|
||||
|
||||
MLFuncBuilder b(origLoops[0]);
|
||||
unsigned width = origLoops.size();
|
||||
|
||||
// Bounds for tile space loops.
|
||||
for (unsigned i = 0; i < width; i++) {
|
||||
auto lbOperands = origLoops[i]->getLowerBoundOperands();
|
||||
auto ubOperands = origLoops[i]->getUpperBoundOperands();
|
||||
SmallVector<MLValue *, 4> newLbOperands(lbOperands.begin(),
|
||||
lbOperands.end());
|
||||
SmallVector<MLValue *, 4> newUbOperands(ubOperands.begin(),
|
||||
ubOperands.end());
|
||||
newLoops[i]->setLowerBound(newLbOperands, origLoops[i]->getLowerBoundMap());
|
||||
newLoops[i]->setUpperBound(newUbOperands, origLoops[i]->getUpperBoundMap());
|
||||
newLoops[i]->setStep(tileSizes[i]);
|
||||
}
|
||||
// Bounds for intra-tile loops.
|
||||
for (unsigned i = 0; i < width; i++) {
|
||||
// TODO(bondhugula): Keep it simple for now - constant upper bound.
|
||||
if (!origLoops[i]->hasConstantUpperBound())
|
||||
return false;
|
||||
int64_t largestDiv = getLargestDivisorOfTripCount(*origLoops[i]);
|
||||
auto mayBeConstantCount = getConstantTripCount(*origLoops[i]);
|
||||
AffineMap lbMap, ubMap;
|
||||
auto dim = b.getAffineDimExpr(0);
|
||||
lbMap = b.getAffineMap(1, 0, dim, {});
|
||||
newLoops[width + i]->setLowerBound(newLoops[i], lbMap);
|
||||
if (mayBeConstantCount.hasValue() &&
|
||||
mayBeConstantCount.getValue() < tileSizes[i]) {
|
||||
ubMap = b.getConstantAffineMap(mayBeConstantCount.getValue() - 1);
|
||||
newLoops[width + i]->setUpperBoundMap(ubMap);
|
||||
} else if (largestDiv % tileSizes[i] == 0) {
|
||||
// No need of min.
|
||||
ubMap = b.getAffineMap(1, 0, dim + tileSizes[i] - 1, {});
|
||||
newLoops[width + i]->setUpperBound(newLoops[i], ubMap);
|
||||
} else {
|
||||
auto ubMax =
|
||||
b.getAffineConstantExpr(origLoops[i]->getConstantUpperBound());
|
||||
ubMap = b.getAffineMap(1, 0, {dim + tileSizes[i] - 1, ubMax}, {});
|
||||
newLoops[width + i]->setUpperBound(newLoops[i], ubMap);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Tiles the specified band of perfectly nested loops creating tile-space loops
|
||||
/// and intra-tile loops. A band is a contiguous set of loops.
|
||||
// TODO(bondhugula): handle non-constant bounds.
|
||||
// TODO(bondhugula): handle non hyper-rectangular spaces.
|
||||
UtilResult mlir::tileCodeGen(ArrayRef<ForStmt *> band,
|
||||
ArrayRef<unsigned> tileSizes) {
|
||||
assert(!band.empty());
|
||||
assert(band.size() == tileSizes.size());
|
||||
// Check if the supplied for stmt's are all successively nested.
|
||||
for (unsigned i = 1, e = band.size(); i < e; i++) {
|
||||
assert(band[i]->getParentStmt() == band[i - 1]);
|
||||
}
|
||||
|
||||
auto origLoops = band;
|
||||
|
||||
ForStmt *rootForStmt = origLoops[0];
|
||||
auto *loc = rootForStmt->getLoc();
|
||||
// Note that width is at least one since band isn't empty.
|
||||
unsigned width = band.size();
|
||||
|
||||
SmallVector<ForStmt *, 12> newLoops(2 * width);
|
||||
ForStmt *innermostPointLoop;
|
||||
|
||||
// The outermost among the loops as we add more..
|
||||
auto *topLoop = rootForStmt;
|
||||
|
||||
// Add intra-tile (or point) loops.
|
||||
for (unsigned i = 0; i < width; i++) {
|
||||
MLFuncBuilder b(topLoop);
|
||||
// Loop bounds will be set later.
|
||||
auto *pointLoop = b.createFor(loc, 0, 0);
|
||||
pointLoop->getStatements().splice(
|
||||
pointLoop->begin(), topLoop->getBlock()->getStatements(), topLoop);
|
||||
newLoops[2 * width - 1 - i] = pointLoop;
|
||||
topLoop = pointLoop;
|
||||
if (i == 0)
|
||||
innermostPointLoop = pointLoop;
|
||||
}
|
||||
|
||||
// Add tile space loops;
|
||||
for (unsigned i = width; i < 2 * width; i++) {
|
||||
MLFuncBuilder b(topLoop);
|
||||
// Loop bounds will be set later.
|
||||
auto *tileSpaceLoop = b.createFor(loc, 0, 0);
|
||||
tileSpaceLoop->getStatements().splice(
|
||||
tileSpaceLoop->begin(), topLoop->getBlock()->getStatements(), topLoop);
|
||||
newLoops[2 * width - i - 1] = tileSpaceLoop;
|
||||
topLoop = tileSpaceLoop;
|
||||
}
|
||||
|
||||
// Move the loop body of the original nest to the new one.
|
||||
moveLoopBody(origLoops[origLoops.size() - 1], innermostPointLoop);
|
||||
|
||||
SmallVector<MLValue *, 6> origLoopIVs(band.begin(), band.end());
|
||||
|
||||
FlatAffineConstraints cst(width, 0);
|
||||
addIndexSet(origLoopIVs, &cst);
|
||||
if (cst.isHyperRectangular(0, width)) {
|
||||
if (!setTiledIndexSetHyperRect(origLoops, newLoops, tileSizes)) {
|
||||
rootForStmt->emitError(
|
||||
"tiled code generation unimplemented for this case");
|
||||
return UtilResult::Failure;
|
||||
}
|
||||
// In this case, the point loop IVs just replace the original ones.
|
||||
for (unsigned i = 0; i < width; i++) {
|
||||
origLoopIVs[i]->replaceAllUsesWith(newLoops[i + width]);
|
||||
}
|
||||
} else {
|
||||
rootForStmt->emitError("tiled code generation unimplemented for this case");
|
||||
return UtilResult::Failure;
|
||||
}
|
||||
|
||||
// Erase the old loop nest.
|
||||
rootForStmt->erase();
|
||||
|
||||
return UtilResult::Success;
|
||||
}
|
||||
|
||||
// Identify valid and profitable bands of loops to tile. This is currently just
|
||||
// a temporary placeholder to test the mechanics of tiled code generation.
|
||||
// Returns all maximal outermost perfect loop nests to tile.
|
||||
static void getTileableBands(MLFunction *f,
|
||||
std::vector<SmallVector<ForStmt *, 6>> *bands) {
|
||||
auto getMaximalPerfectLoopNest = [&](ForStmt *root) {
|
||||
SmallVector<ForStmt *, 6> band;
|
||||
band.push_back(root);
|
||||
|
||||
ForStmt *currStmt = root;
|
||||
ForStmt *nestedFor;
|
||||
while (currStmt->getStatements().size() == 1 &&
|
||||
(nestedFor = dyn_cast<ForStmt>(&*currStmt->begin()))) {
|
||||
band.push_back(nestedFor);
|
||||
currStmt = nestedFor;
|
||||
}
|
||||
bands->push_back(band);
|
||||
};
|
||||
|
||||
for (auto &stmt : *f) {
|
||||
ForStmt *forStmt = dyn_cast<ForStmt>(&stmt);
|
||||
if (!forStmt)
|
||||
continue;
|
||||
getMaximalPerfectLoopNest(forStmt);
|
||||
}
|
||||
}
|
||||
|
||||
PassResult LoopTiling::runOnMLFunction(MLFunction *f) {
|
||||
std::vector<SmallVector<ForStmt *, 6>> bands;
|
||||
getTileableBands(f, &bands);
|
||||
|
||||
// Temporary tile sizes.
|
||||
unsigned tileSize =
|
||||
clTileSize.getNumOccurrences() > 0 ? clTileSize : kDefaultTileSize;
|
||||
|
||||
for (const auto &band : bands) {
|
||||
SmallVector<unsigned, 6> tileSizes(band.size(), tileSize);
|
||||
if (tileCodeGen(band, tileSizes)) {
|
||||
return failure();
|
||||
}
|
||||
}
|
||||
return success();
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
//===- Unroll.cpp - Code to perform loop unrolling ------------------------===//
|
||||
//===- LoopUnroll.cpp - Code to perform loop unrolling --------------------===//
|
||||
//
|
||||
// Copyright 2019 The MLIR Authors.
|
||||
//
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
// RUN: mlir-opt %s -loop-tile | FileCheck %s
|
||||
|
||||
// CHECK: #map0 = (d0) -> (d0 + 31)
|
||||
// CHECK: #map1 = (d0) -> (d0 + 31, 50)
|
||||
// CHECK-LABEL: mlfunc @loop_tiling()
|
||||
// CHECK-NEXT: for %i0 = 0 to 255 step 32 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 511 step 32 {
|
||||
// CHECK-NEXT: for %i2 = 0 to 1023 step 32 {
|
||||
// CHECK-NEXT: for %i3 = (d0) -> (d0)(%i0) to #map0(%i0) {
|
||||
// CHECK-NEXT: for %i4 = (d0) -> (d0)(%i1) to #map0(%i1) {
|
||||
// CHECK-NEXT: for %i5 = (d0) -> (d0)(%i2) to #map0(%i2) {
|
||||
// CHECK-NEXT: "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i6 = 0 to 50 step 32 {
|
||||
// CHECK-NEXT: for %i7 = (d0) -> (d0)(%i6) to min #map1(%i6) {
|
||||
// CHECK-NEXT: "bar"(%i7, %i7) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i8 = 0 to 20 step 32 {
|
||||
// CHECK-NEXT: for %i9 = (d0) -> (d0)(%i8) to 20 {
|
||||
// CHECK-NEXT: "foobar"(%i9) : (index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
mlfunc @loop_tiling() {
|
||||
for %i = 0 to 255 {
|
||||
for %j = 0 to 511 {
|
||||
for %k = 0 to 1023 {
|
||||
"foo"(%i, %j, %k) : (index, index, index) -> ()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for %x = 0 to 50 {
|
||||
"bar"(%x, %x) : (index, index) -> ()
|
||||
}
|
||||
|
||||
// Intra-tile loop won't need a min expression.
|
||||
for %y = 0 to 20 {
|
||||
"foobar"(%y) : (index) -> ()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
|
@ -74,6 +74,7 @@ enum Passes {
|
|||
ConvertToCFG,
|
||||
TFLiteLegaize,
|
||||
LoopFusion,
|
||||
LoopTiling,
|
||||
LoopUnroll,
|
||||
LoopUnrollAndJam,
|
||||
MemRefBoundCheck,
|
||||
|
@ -97,6 +98,7 @@ static cl::list<Passes> passList(
|
|||
clEnumValN(ConvertToCFG, "convert-to-cfg",
|
||||
"Convert all ML functions in the module to CFG ones"),
|
||||
clEnumValN(LoopFusion, "loop-fusion", "Fuse loop nests"),
|
||||
clEnumValN(LoopTiling, "loop-tile", "Tile loop nests"),
|
||||
clEnumValN(LoopUnroll, "loop-unroll", "Unroll loops"),
|
||||
clEnumValN(LoopUnrollAndJam, "loop-unroll-jam", "Unroll and jam loops"),
|
||||
clEnumValN(MemRefBoundCheck, "memref-bound-check",
|
||||
|
@ -207,6 +209,9 @@ static OptResult performActions(SourceMgr &sourceMgr, MLIRContext *context) {
|
|||
case LoopFusion:
|
||||
pass = createLoopFusionPass();
|
||||
break;
|
||||
case LoopTiling:
|
||||
pass = createLoopTilingPass();
|
||||
break;
|
||||
case LoopUnroll:
|
||||
pass = createLoopUnrollPass();
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue