[mlir] Make ViewLikeInterface Range work with attributes

While most of methods in ViewLikeInterface accept an `OpFoldResult` for
the offset/size/stride that may be static, represented as `Attribute`,
or dynamic, represented as `Value`, the `Range` abstraction only
accepted `Values`. This can often lead to known-constant
offset/size/strides being materialized into constant operations and
hinder further constant propagation without explicitly running the
constant folding pass. This often leads to a more complicated than
necessary addressing code being emitted. Switch `Range` to use
`OpFoldResult`. Code that uses `Range` currently keeps materializing the
constants to minimize the effect of this change on the IR. Further
commits will make use of this.

Reviewed By: nicolasvasilache, mravishankar

Differential Revision: https://reviews.llvm.org/D129633
This commit is contained in:
Alex Zinenko 2022-07-13 11:11:16 +00:00
parent 08a1b07e7c
commit 70e99f387a
11 changed files with 89 additions and 70 deletions

View File

@ -30,20 +30,6 @@ namespace linalg {
class LinalgOp;
// TOFO: allow an extra ValueRange to specify an indexing and allow
// non-hyperrectangular shapes.
using LoopRangeBuilder =
std::function<SmallVector<Range, 4>(ImplicitLocOpBuilder)>;
/// Provide a very simple inference procedure to build the loop ranges from the
/// op and its operands. This only works with permutation affine maps and
/// patterns of the form `(m, n)[s] -> (m + n - s floordiv 2)`.
/// A more advanced Tensor-Comprehension like inference is possible but has
/// proven to be ambiguous in unfavorable case.
/// As a consequence, we relax the default behavior very conservatively and
/// provide an op-specified hook so that Linalg ops may override the behavior.
LoopRangeBuilder defaultLoopRangesBuilder(LinalgOp op);
/// Returns the name mangled library call name to disambiguate between different
/// overloads at the C level. The name mangling scheme is basic and uses MLIR
/// type names:

View File

@ -208,6 +208,8 @@ SmallVector<Value> insertSlicesBack(OpBuilder &builder, Location loc,
/// necessary.
Value materializeOpFoldResult(ImplicitLocOpBuilder &builder,
OpFoldResult opFoldResult);
Value materializeOpFoldResult(OpBuilder &b, Location loc,
OpFoldResult opFoldResult);
/// Creates an extract_slice/subview op for a single `valueToTile` with
/// `builder`. This new operation extracts a tile of `valueToTile`, starting

View File

@ -24,9 +24,9 @@ namespace mlir {
/// operands into a list of triples. Such a list can be more convenient to
/// manipulate.
struct Range {
Value offset;
Value size;
Value stride;
OpFoldResult offset;
OpFoldResult size;
OpFoldResult stride;
};
class OffsetSizeAndStrideOpInterface;

View File

@ -1346,6 +1346,26 @@ static FailureOr<SmallVector<Value>> collapseGenericOpIterationDims(
genericOp, "illegal to collapse specified dimensions");
}
// Bail on non-canonical ranges.
SmallVector<Range> loopRanges =
cast<LinalgOp>(genericOp.getOperation())
.createLoopRanges(rewriter, genericOp.getLoc());
auto opFoldIsConstantValue = [](OpFoldResult ofr, int64_t value) {
if (auto attr = ofr.dyn_cast<Attribute>())
return attr.cast<IntegerAttr>().getInt() == value;
llvm::APInt actual;
return matchPattern(ofr.get<Value>(), m_ConstantInt(&actual)) &&
actual.getSExtValue() == value;
};
if (!llvm::all_of(loopRanges, [&](Range range) {
return opFoldIsConstantValue(range.offset, 0) &&
opFoldIsConstantValue(range.stride, 1);
})) {
return rewriter.notifyMatchFailure(
genericOp,
"expected all loop ranges to have zero start and unit stride");
}
// Get the iterator types for the operand.
SmallVector<StringRef> iteratorTypes = getCollapsedOpIteratorTypes(
genericOp.iterator_types().getValue(), collapsingInfo);
@ -1390,17 +1410,10 @@ static FailureOr<SmallVector<Value>> collapseGenericOpIterationDims(
// Collect the loop range of the generic op.
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(collapsedGenericOp);
SmallVector<Range> loopRanges =
cast<LinalgOp>(genericOp.getOperation())
.createLoopRanges(rewriter, genericOp.getLoc());
assert(llvm::all_of(loopRanges,
[](Range range) {
return matchPattern(range.offset, m_Zero()) &&
matchPattern(range.stride, m_One());
}) &&
"expected all loop ranges to have zero start and unit stride");
SmallVector<Value> loopBound = llvm::to_vector(
llvm::map_range(loopRanges, [](Range range) { return range.size; }));
SmallVector<Value> loopBound =
llvm::to_vector(llvm::map_range(loopRanges, [&](Range range) {
return materializeOpFoldResult(rewriter, loc, range.size);
}));
generateCollapsedIndexingRegion(loc,
&collapsedGenericOp->getRegion(0).front(),
collapsingInfo, loopBound, rewriter);

View File

@ -117,7 +117,6 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
SmallVector<Range, 8> loopRanges;
Location loc = producer.getLoc();
auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
auto one = b.create<arith::ConstantIndexOp>(loc, 1);
for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {
auto shapeDim = getShapeDefiningLoopRange(producer, i);
@ -125,14 +124,14 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
sizeBounds.push_back(dim);
auto it = fusedLoopsAndRanges.find(i);
if (it != fusedLoopsAndRanges.end()) {
ivs.push_back(it->second.offset);
tileSizes.push_back(it->second.size);
ivs.push_back(materializeOpFoldResult(b, loc, it->second.offset));
tileSizes.push_back(materializeOpFoldResult(b, loc, it->second.size));
loopRanges.push_back(it->second);
LLVM_DEBUG(llvm::dbgs() << "tiled loop#" << i << " with LoopRange "
<< loopRanges.back() << "\n");
} else {
tileSizes.push_back(zero);
loopRanges.push_back(Range{zero, dim, one});
loopRanges.push_back(Range{b.getIndexAttr(0), dim, b.getIndexAttr(1)});
LLVM_DEBUG(llvm::dbgs() << "full loop#" << i << " with LoopRange "
<< loopRanges.back() << "\n");
}
@ -168,8 +167,9 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
// Shift all IndexOp results by the tile offset.
SmallVector<Value> allIvs;
llvm::transform(loopRanges, std::back_inserter(allIvs),
[](Range range) { return range.offset; });
llvm::transform(loopRanges, std::back_inserter(allIvs), [&](Range range) {
return materializeOpFoldResult(b, loc, range.offset);
});
offsetIndices(b, clonedOp, allIvs);
return clonedOp;

View File

@ -143,8 +143,9 @@ static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult,
// Obtain the `producerOp` loop bounds and the `sliceOp` ranges.
SmallVector<Value> producerLoopBounds;
llvm::transform(producerOp.createLoopRanges(b, loc),
std::back_inserter(producerLoopBounds),
[](Range range) { return range.size; });
std::back_inserter(producerLoopBounds), [&](Range range) {
return materializeOpFoldResult(b, loc, range.size);
});
SmallVector<Range> sliceOpRanges = sliceOp.getOrCreateRanges(b, loc);
// Tile the producer operands given the `sliceOp` ranges. Iterate the
@ -157,8 +158,10 @@ static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult,
for (auto it : zip(tiledSliceDimIndices, tiledProducerLoopIndices)) {
int64_t tiledSliceDim = std::get<0>(it);
int64_t tiledProducerLoop = std::get<1>(it);
tileIvs[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].offset;
tileSizes[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].size;
tileIvs[tiledProducerLoop] =
materializeOpFoldResult(b, loc, sliceOpRanges[tiledSliceDim].offset);
tileSizes[tiledProducerLoop] =
materializeOpFoldResult(b, loc, sliceOpRanges[tiledSliceDim].size);
allIvs[tiledProducerLoop] = tileIvs[tiledProducerLoop];
}
erase_value(tileIvs, nullptr);

View File

@ -223,14 +223,20 @@ FailureOr<PromotionInfo> mlir::linalg::promoteSubviewAsNewBuffer(
if (droppedDims[en.index()])
continue;
auto rangeValue = en.value();
// Try to extract a tight constant.
// Try to extract a tight constant. If the size is known statically, no need
// to look for the bound.
LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n");
FailureOr<int64_t> upperBound =
getConstantUpperBoundForIndex(rangeValue.size);
Value size =
failed(upperBound)
? rangeValue.size
: b.create<arith::ConstantIndexOp>(loc, upperBound.value());
Value size;
if (auto attr = rangeValue.size.dyn_cast<Attribute>()) {
size = materializeOpFoldResult(b, loc, rangeValue.size);
} else {
Value materializedSize = materializeOpFoldResult(b, loc, rangeValue.size);
FailureOr<int64_t> upperBound =
getConstantUpperBoundForIndex(materializedSize);
size = failed(upperBound)
? materializedSize
: b.create<arith::ConstantIndexOp>(loc, upperBound.getValue());
}
LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n");
fullSizes.push_back(size);
partialSizes.push_back(

View File

@ -74,12 +74,10 @@ linalg::splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension,
if (dimension >= iterationSpace.size())
return std::make_pair(op, TilingInterface());
SmallVector<OpFoldResult> offsets =
getAsOpFoldResult(llvm::to_vector(llvm::map_range(
iterationSpace, [](const Range &range) { return range.offset; })));
SmallVector<OpFoldResult> sizes =
getAsOpFoldResult(llvm::to_vector(llvm::map_range(
iterationSpace, [](const Range &range) { return range.size; })));
SmallVector<OpFoldResult> offsets = llvm::to_vector(llvm::map_range(
iterationSpace, [](const Range &range) { return range.offset; }));
SmallVector<OpFoldResult> sizes = llvm::to_vector(llvm::map_range(
iterationSpace, [](const Range &range) { return range.size; }));
// Adjust the split point so that it doesn't overflow the size.
AffineExpr d0, d1, d2;
@ -105,7 +103,7 @@ linalg::splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension,
TilingInterface firstPart = createSplitPart(
rewriter, op.getLoc(), op, offsets, sizes,
op.getDestinationOperands(rewriter), dimension, minSplitPoint,
getAsOpFoldResult(iterationSpace[dimension].offset), firstResults);
iterationSpace[dimension].offset, firstResults);
// Need to pretend that the original op now takes as operands firstResults,
// otherwise tiling interface implementation will take the wrong value to

View File

@ -66,8 +66,7 @@ mlir::linalg::makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map,
// Create a new range with the applied tile sizes.
SmallVector<Range, 4> res;
for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx)
res.push_back(Range{b.create<arith::ConstantIndexOp>(loc, 0),
shapeSizes[idx], tileSizes[idx]});
res.push_back(Range{b.getIndexAttr(0), shapeSizes[idx], tileSizes[idx]});
return std::make_tuple(res, loopIndexToRangeIndex);
}
@ -567,10 +566,12 @@ static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op,
SmallVector<Range> ranges = tilingInterface.getIterationDomain(builder);
SmallVector<Value> lbs, dims, allDims, steps;
for (int64_t i = 0; i < rank; ++i) {
allDims.push_back(ranges[i].size);
Value materializedSize =
materializeOpFoldResult(builder, loc, ranges[i].size);
allDims.push_back(materializedSize);
if (!isZero(tileSizes[i])) {
lbs.push_back(ranges[i].offset);
dims.push_back(ranges[i].size);
lbs.push_back(materializeOpFoldResult(builder, loc, ranges[i].offset));
dims.push_back(materializedSize);
steps.push_back(tileSizes[i]);
}
}

View File

@ -129,13 +129,14 @@ template struct mlir::linalg::GenerateLoopNest<AffineForOp>;
/// Given a list of subview ranges, extract individual values for lower, upper
/// bounds and steps and put them into the corresponding vectors.
static void unpackRanges(ArrayRef<Range> ranges, SmallVectorImpl<Value> &lbs,
static void unpackRanges(OpBuilder &builder, Location loc,
ArrayRef<Range> ranges, SmallVectorImpl<Value> &lbs,
SmallVectorImpl<Value> &ubs,
SmallVectorImpl<Value> &steps) {
for (Range range : ranges) {
lbs.emplace_back(range.offset);
ubs.emplace_back(range.size);
steps.emplace_back(range.stride);
lbs.emplace_back(materializeOpFoldResult(builder, loc, range.offset));
ubs.emplace_back(materializeOpFoldResult(builder, loc, range.size));
steps.emplace_back(materializeOpFoldResult(builder, loc, range.stride));
}
}
@ -524,7 +525,7 @@ void GenerateLoopNest<scf::ForOp>::doit(
}
SmallVector<Value, 4> lbs, ubs, steps;
unpackRanges(loopRanges, lbs, ubs, steps);
unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
LoopNest loopNest = mlir::scf::buildLoopNest(
b, loc, lbs, ubs, steps, iterArgInitValues,
[&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
@ -567,7 +568,7 @@ void GenerateLoopNest<AffineForOp>::doit(
SmallVector<Value> iterArgInitValues = linalgOp.getOutputTensorOperands();
assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
SmallVector<Value, 4> lbs, ubs, steps;
unpackRanges(loopRanges, lbs, ubs, steps);
unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
// Affine loops require constant steps.
SmallVector<int64_t, 4> constantSteps;
@ -744,7 +745,7 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
stepsStorage.reserve(numLoops);
// Get the loop lb, ub, and step.
unpackRanges(loopRanges, lbsStorage, ubsStorage, stepsStorage);
unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
// Modify the lb, ub, and step based on the distribution options.
SmallVector<DistributionMethod, 0> distributionMethod;
@ -986,6 +987,12 @@ Value materializeOpFoldResult(ImplicitLocOpBuilder &builder,
return builder.create<arith::ConstantIndexOp>(attr.getValue().getSExtValue());
}
Value materializeOpFoldResult(OpBuilder &builder, Location loc,
OpFoldResult opFoldResult) {
ImplicitLocOpBuilder b(loc, builder);
return materializeOpFoldResult(b, opFoldResult);
}
SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
LinalgOp linalgOp,
ArrayRef<Value> valuesToTile,

View File

@ -14,6 +14,7 @@
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
#include "mlir/Dialect/Arithmetic/Utils/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
@ -117,23 +118,25 @@ generateTileLoopNest(OpBuilder &builder, Location loc,
AffineMap minMap = AffineMap::get(1, 2, {s0, s1 - d0}, builder.getContext());
for (auto loopRange : llvm::enumerate(loopRanges)) {
Value offset =
getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().offset);
Value size =
getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().size);
// No loops if tile size is zero. Set offset and size to the loop
// offset and size.
if (matchPattern(tileSizeVals[loopRange.index()], m_Zero())) {
offsets[loopRange.index()] = loopRange.value().offset;
sizes[loopRange.index()] = loopRange.value().size;
offsets[loopRange.index()] = offset;
sizes[loopRange.index()] = size;
continue;
}
auto loop = builder.create<scf::ForOp>(
loc, loopRange.value().offset, loopRange.value().size,
tileSizeVals[loopRange.index()], ValueRange{},
loc, offset, size, tileSizeVals[loopRange.index()], ValueRange{},
[&](OpBuilder &bodyBuilder, Location bodyLoc, Value iv,
ValueRange /*iterArgs*/) {
Value boundedTileSize = builder.create<AffineMinOp>(
bodyLoc, minMap,
ValueRange{iv, tileSizeVals[loopRange.index()],
loopRange.value().size});
ValueRange{iv, tileSizeVals[loopRange.index()], size});
sizes[loopRange.index()] = boundedTileSize;
builder.create<scf::YieldOp>(loc);
});