forked from OSchip/llvm-project
[mlir] Make ViewLikeInterface Range work with attributes
While most of methods in ViewLikeInterface accept an `OpFoldResult` for the offset/size/stride that may be static, represented as `Attribute`, or dynamic, represented as `Value`, the `Range` abstraction only accepted `Values`. This can often lead to known-constant offset/size/strides being materialized into constant operations and hinder further constant propagation without explicitly running the constant folding pass. This often leads to a more complicated than necessary addressing code being emitted. Switch `Range` to use `OpFoldResult`. Code that uses `Range` currently keeps materializing the constants to minimize the effect of this change on the IR. Further commits will make use of this. Reviewed By: nicolasvasilache, mravishankar Differential Revision: https://reviews.llvm.org/D129633
This commit is contained in:
parent
08a1b07e7c
commit
70e99f387a
|
@ -30,20 +30,6 @@ namespace linalg {
|
|||
|
||||
class LinalgOp;
|
||||
|
||||
// TOFO: allow an extra ValueRange to specify an indexing and allow
|
||||
// non-hyperrectangular shapes.
|
||||
using LoopRangeBuilder =
|
||||
std::function<SmallVector<Range, 4>(ImplicitLocOpBuilder)>;
|
||||
|
||||
/// Provide a very simple inference procedure to build the loop ranges from the
|
||||
/// op and its operands. This only works with permutation affine maps and
|
||||
/// patterns of the form `(m, n)[s] -> (m + n - s floordiv 2)`.
|
||||
/// A more advanced Tensor-Comprehension like inference is possible but has
|
||||
/// proven to be ambiguous in unfavorable case.
|
||||
/// As a consequence, we relax the default behavior very conservatively and
|
||||
/// provide an op-specified hook so that Linalg ops may override the behavior.
|
||||
LoopRangeBuilder defaultLoopRangesBuilder(LinalgOp op);
|
||||
|
||||
/// Returns the name mangled library call name to disambiguate between different
|
||||
/// overloads at the C level. The name mangling scheme is basic and uses MLIR
|
||||
/// type names:
|
||||
|
|
|
@ -208,6 +208,8 @@ SmallVector<Value> insertSlicesBack(OpBuilder &builder, Location loc,
|
|||
/// necessary.
|
||||
Value materializeOpFoldResult(ImplicitLocOpBuilder &builder,
|
||||
OpFoldResult opFoldResult);
|
||||
Value materializeOpFoldResult(OpBuilder &b, Location loc,
|
||||
OpFoldResult opFoldResult);
|
||||
|
||||
/// Creates an extract_slice/subview op for a single `valueToTile` with
|
||||
/// `builder`. This new operation extracts a tile of `valueToTile`, starting
|
||||
|
|
|
@ -24,9 +24,9 @@ namespace mlir {
|
|||
/// operands into a list of triples. Such a list can be more convenient to
|
||||
/// manipulate.
|
||||
struct Range {
|
||||
Value offset;
|
||||
Value size;
|
||||
Value stride;
|
||||
OpFoldResult offset;
|
||||
OpFoldResult size;
|
||||
OpFoldResult stride;
|
||||
};
|
||||
|
||||
class OffsetSizeAndStrideOpInterface;
|
||||
|
|
|
@ -1346,6 +1346,26 @@ static FailureOr<SmallVector<Value>> collapseGenericOpIterationDims(
|
|||
genericOp, "illegal to collapse specified dimensions");
|
||||
}
|
||||
|
||||
// Bail on non-canonical ranges.
|
||||
SmallVector<Range> loopRanges =
|
||||
cast<LinalgOp>(genericOp.getOperation())
|
||||
.createLoopRanges(rewriter, genericOp.getLoc());
|
||||
auto opFoldIsConstantValue = [](OpFoldResult ofr, int64_t value) {
|
||||
if (auto attr = ofr.dyn_cast<Attribute>())
|
||||
return attr.cast<IntegerAttr>().getInt() == value;
|
||||
llvm::APInt actual;
|
||||
return matchPattern(ofr.get<Value>(), m_ConstantInt(&actual)) &&
|
||||
actual.getSExtValue() == value;
|
||||
};
|
||||
if (!llvm::all_of(loopRanges, [&](Range range) {
|
||||
return opFoldIsConstantValue(range.offset, 0) &&
|
||||
opFoldIsConstantValue(range.stride, 1);
|
||||
})) {
|
||||
return rewriter.notifyMatchFailure(
|
||||
genericOp,
|
||||
"expected all loop ranges to have zero start and unit stride");
|
||||
}
|
||||
|
||||
// Get the iterator types for the operand.
|
||||
SmallVector<StringRef> iteratorTypes = getCollapsedOpIteratorTypes(
|
||||
genericOp.iterator_types().getValue(), collapsingInfo);
|
||||
|
@ -1390,17 +1410,10 @@ static FailureOr<SmallVector<Value>> collapseGenericOpIterationDims(
|
|||
// Collect the loop range of the generic op.
|
||||
OpBuilder::InsertionGuard g(rewriter);
|
||||
rewriter.setInsertionPoint(collapsedGenericOp);
|
||||
SmallVector<Range> loopRanges =
|
||||
cast<LinalgOp>(genericOp.getOperation())
|
||||
.createLoopRanges(rewriter, genericOp.getLoc());
|
||||
assert(llvm::all_of(loopRanges,
|
||||
[](Range range) {
|
||||
return matchPattern(range.offset, m_Zero()) &&
|
||||
matchPattern(range.stride, m_One());
|
||||
}) &&
|
||||
"expected all loop ranges to have zero start and unit stride");
|
||||
SmallVector<Value> loopBound = llvm::to_vector(
|
||||
llvm::map_range(loopRanges, [](Range range) { return range.size; }));
|
||||
SmallVector<Value> loopBound =
|
||||
llvm::to_vector(llvm::map_range(loopRanges, [&](Range range) {
|
||||
return materializeOpFoldResult(rewriter, loc, range.size);
|
||||
}));
|
||||
generateCollapsedIndexingRegion(loc,
|
||||
&collapsedGenericOp->getRegion(0).front(),
|
||||
collapsingInfo, loopBound, rewriter);
|
||||
|
|
|
@ -117,7 +117,6 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
|
|||
SmallVector<Range, 8> loopRanges;
|
||||
Location loc = producer.getLoc();
|
||||
auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
|
||||
auto one = b.create<arith::ConstantIndexOp>(loc, 1);
|
||||
|
||||
for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {
|
||||
auto shapeDim = getShapeDefiningLoopRange(producer, i);
|
||||
|
@ -125,14 +124,14 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
|
|||
sizeBounds.push_back(dim);
|
||||
auto it = fusedLoopsAndRanges.find(i);
|
||||
if (it != fusedLoopsAndRanges.end()) {
|
||||
ivs.push_back(it->second.offset);
|
||||
tileSizes.push_back(it->second.size);
|
||||
ivs.push_back(materializeOpFoldResult(b, loc, it->second.offset));
|
||||
tileSizes.push_back(materializeOpFoldResult(b, loc, it->second.size));
|
||||
loopRanges.push_back(it->second);
|
||||
LLVM_DEBUG(llvm::dbgs() << "tiled loop#" << i << " with LoopRange "
|
||||
<< loopRanges.back() << "\n");
|
||||
} else {
|
||||
tileSizes.push_back(zero);
|
||||
loopRanges.push_back(Range{zero, dim, one});
|
||||
loopRanges.push_back(Range{b.getIndexAttr(0), dim, b.getIndexAttr(1)});
|
||||
LLVM_DEBUG(llvm::dbgs() << "full loop#" << i << " with LoopRange "
|
||||
<< loopRanges.back() << "\n");
|
||||
}
|
||||
|
@ -168,8 +167,9 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
|
|||
|
||||
// Shift all IndexOp results by the tile offset.
|
||||
SmallVector<Value> allIvs;
|
||||
llvm::transform(loopRanges, std::back_inserter(allIvs),
|
||||
[](Range range) { return range.offset; });
|
||||
llvm::transform(loopRanges, std::back_inserter(allIvs), [&](Range range) {
|
||||
return materializeOpFoldResult(b, loc, range.offset);
|
||||
});
|
||||
offsetIndices(b, clonedOp, allIvs);
|
||||
|
||||
return clonedOp;
|
||||
|
|
|
@ -143,8 +143,9 @@ static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult,
|
|||
// Obtain the `producerOp` loop bounds and the `sliceOp` ranges.
|
||||
SmallVector<Value> producerLoopBounds;
|
||||
llvm::transform(producerOp.createLoopRanges(b, loc),
|
||||
std::back_inserter(producerLoopBounds),
|
||||
[](Range range) { return range.size; });
|
||||
std::back_inserter(producerLoopBounds), [&](Range range) {
|
||||
return materializeOpFoldResult(b, loc, range.size);
|
||||
});
|
||||
SmallVector<Range> sliceOpRanges = sliceOp.getOrCreateRanges(b, loc);
|
||||
|
||||
// Tile the producer operands given the `sliceOp` ranges. Iterate the
|
||||
|
@ -157,8 +158,10 @@ static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult,
|
|||
for (auto it : zip(tiledSliceDimIndices, tiledProducerLoopIndices)) {
|
||||
int64_t tiledSliceDim = std::get<0>(it);
|
||||
int64_t tiledProducerLoop = std::get<1>(it);
|
||||
tileIvs[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].offset;
|
||||
tileSizes[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].size;
|
||||
tileIvs[tiledProducerLoop] =
|
||||
materializeOpFoldResult(b, loc, sliceOpRanges[tiledSliceDim].offset);
|
||||
tileSizes[tiledProducerLoop] =
|
||||
materializeOpFoldResult(b, loc, sliceOpRanges[tiledSliceDim].size);
|
||||
allIvs[tiledProducerLoop] = tileIvs[tiledProducerLoop];
|
||||
}
|
||||
erase_value(tileIvs, nullptr);
|
||||
|
|
|
@ -223,14 +223,20 @@ FailureOr<PromotionInfo> mlir::linalg::promoteSubviewAsNewBuffer(
|
|||
if (droppedDims[en.index()])
|
||||
continue;
|
||||
auto rangeValue = en.value();
|
||||
// Try to extract a tight constant.
|
||||
// Try to extract a tight constant. If the size is known statically, no need
|
||||
// to look for the bound.
|
||||
LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n");
|
||||
FailureOr<int64_t> upperBound =
|
||||
getConstantUpperBoundForIndex(rangeValue.size);
|
||||
Value size =
|
||||
failed(upperBound)
|
||||
? rangeValue.size
|
||||
: b.create<arith::ConstantIndexOp>(loc, upperBound.value());
|
||||
Value size;
|
||||
if (auto attr = rangeValue.size.dyn_cast<Attribute>()) {
|
||||
size = materializeOpFoldResult(b, loc, rangeValue.size);
|
||||
} else {
|
||||
Value materializedSize = materializeOpFoldResult(b, loc, rangeValue.size);
|
||||
FailureOr<int64_t> upperBound =
|
||||
getConstantUpperBoundForIndex(materializedSize);
|
||||
size = failed(upperBound)
|
||||
? materializedSize
|
||||
: b.create<arith::ConstantIndexOp>(loc, upperBound.getValue());
|
||||
}
|
||||
LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n");
|
||||
fullSizes.push_back(size);
|
||||
partialSizes.push_back(
|
||||
|
|
|
@ -74,12 +74,10 @@ linalg::splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension,
|
|||
if (dimension >= iterationSpace.size())
|
||||
return std::make_pair(op, TilingInterface());
|
||||
|
||||
SmallVector<OpFoldResult> offsets =
|
||||
getAsOpFoldResult(llvm::to_vector(llvm::map_range(
|
||||
iterationSpace, [](const Range &range) { return range.offset; })));
|
||||
SmallVector<OpFoldResult> sizes =
|
||||
getAsOpFoldResult(llvm::to_vector(llvm::map_range(
|
||||
iterationSpace, [](const Range &range) { return range.size; })));
|
||||
SmallVector<OpFoldResult> offsets = llvm::to_vector(llvm::map_range(
|
||||
iterationSpace, [](const Range &range) { return range.offset; }));
|
||||
SmallVector<OpFoldResult> sizes = llvm::to_vector(llvm::map_range(
|
||||
iterationSpace, [](const Range &range) { return range.size; }));
|
||||
|
||||
// Adjust the split point so that it doesn't overflow the size.
|
||||
AffineExpr d0, d1, d2;
|
||||
|
@ -105,7 +103,7 @@ linalg::splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension,
|
|||
TilingInterface firstPart = createSplitPart(
|
||||
rewriter, op.getLoc(), op, offsets, sizes,
|
||||
op.getDestinationOperands(rewriter), dimension, minSplitPoint,
|
||||
getAsOpFoldResult(iterationSpace[dimension].offset), firstResults);
|
||||
iterationSpace[dimension].offset, firstResults);
|
||||
|
||||
// Need to pretend that the original op now takes as operands firstResults,
|
||||
// otherwise tiling interface implementation will take the wrong value to
|
||||
|
|
|
@ -66,8 +66,7 @@ mlir::linalg::makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map,
|
|||
// Create a new range with the applied tile sizes.
|
||||
SmallVector<Range, 4> res;
|
||||
for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx)
|
||||
res.push_back(Range{b.create<arith::ConstantIndexOp>(loc, 0),
|
||||
shapeSizes[idx], tileSizes[idx]});
|
||||
res.push_back(Range{b.getIndexAttr(0), shapeSizes[idx], tileSizes[idx]});
|
||||
return std::make_tuple(res, loopIndexToRangeIndex);
|
||||
}
|
||||
|
||||
|
@ -567,10 +566,12 @@ static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op,
|
|||
SmallVector<Range> ranges = tilingInterface.getIterationDomain(builder);
|
||||
SmallVector<Value> lbs, dims, allDims, steps;
|
||||
for (int64_t i = 0; i < rank; ++i) {
|
||||
allDims.push_back(ranges[i].size);
|
||||
Value materializedSize =
|
||||
materializeOpFoldResult(builder, loc, ranges[i].size);
|
||||
allDims.push_back(materializedSize);
|
||||
if (!isZero(tileSizes[i])) {
|
||||
lbs.push_back(ranges[i].offset);
|
||||
dims.push_back(ranges[i].size);
|
||||
lbs.push_back(materializeOpFoldResult(builder, loc, ranges[i].offset));
|
||||
dims.push_back(materializedSize);
|
||||
steps.push_back(tileSizes[i]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -129,13 +129,14 @@ template struct mlir::linalg::GenerateLoopNest<AffineForOp>;
|
|||
|
||||
/// Given a list of subview ranges, extract individual values for lower, upper
|
||||
/// bounds and steps and put them into the corresponding vectors.
|
||||
static void unpackRanges(ArrayRef<Range> ranges, SmallVectorImpl<Value> &lbs,
|
||||
static void unpackRanges(OpBuilder &builder, Location loc,
|
||||
ArrayRef<Range> ranges, SmallVectorImpl<Value> &lbs,
|
||||
SmallVectorImpl<Value> &ubs,
|
||||
SmallVectorImpl<Value> &steps) {
|
||||
for (Range range : ranges) {
|
||||
lbs.emplace_back(range.offset);
|
||||
ubs.emplace_back(range.size);
|
||||
steps.emplace_back(range.stride);
|
||||
lbs.emplace_back(materializeOpFoldResult(builder, loc, range.offset));
|
||||
ubs.emplace_back(materializeOpFoldResult(builder, loc, range.size));
|
||||
steps.emplace_back(materializeOpFoldResult(builder, loc, range.stride));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -524,7 +525,7 @@ void GenerateLoopNest<scf::ForOp>::doit(
|
|||
}
|
||||
|
||||
SmallVector<Value, 4> lbs, ubs, steps;
|
||||
unpackRanges(loopRanges, lbs, ubs, steps);
|
||||
unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
|
||||
LoopNest loopNest = mlir::scf::buildLoopNest(
|
||||
b, loc, lbs, ubs, steps, iterArgInitValues,
|
||||
[&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
|
||||
|
@ -567,7 +568,7 @@ void GenerateLoopNest<AffineForOp>::doit(
|
|||
SmallVector<Value> iterArgInitValues = linalgOp.getOutputTensorOperands();
|
||||
assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
|
||||
SmallVector<Value, 4> lbs, ubs, steps;
|
||||
unpackRanges(loopRanges, lbs, ubs, steps);
|
||||
unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
|
||||
|
||||
// Affine loops require constant steps.
|
||||
SmallVector<int64_t, 4> constantSteps;
|
||||
|
@ -744,7 +745,7 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
|
|||
stepsStorage.reserve(numLoops);
|
||||
|
||||
// Get the loop lb, ub, and step.
|
||||
unpackRanges(loopRanges, lbsStorage, ubsStorage, stepsStorage);
|
||||
unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
|
||||
|
||||
// Modify the lb, ub, and step based on the distribution options.
|
||||
SmallVector<DistributionMethod, 0> distributionMethod;
|
||||
|
@ -986,6 +987,12 @@ Value materializeOpFoldResult(ImplicitLocOpBuilder &builder,
|
|||
return builder.create<arith::ConstantIndexOp>(attr.getValue().getSExtValue());
|
||||
}
|
||||
|
||||
Value materializeOpFoldResult(OpBuilder &builder, Location loc,
|
||||
OpFoldResult opFoldResult) {
|
||||
ImplicitLocOpBuilder b(loc, builder);
|
||||
return materializeOpFoldResult(b, opFoldResult);
|
||||
}
|
||||
|
||||
SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
|
||||
LinalgOp linalgOp,
|
||||
ArrayRef<Value> valuesToTile,
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
|
||||
#include "mlir/Dialect/Arithmetic/Utils/Utils.h"
|
||||
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
||||
#include "mlir/Dialect/SCF/Utils/Utils.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
|
@ -117,23 +118,25 @@ generateTileLoopNest(OpBuilder &builder, Location loc,
|
|||
AffineMap minMap = AffineMap::get(1, 2, {s0, s1 - d0}, builder.getContext());
|
||||
|
||||
for (auto loopRange : llvm::enumerate(loopRanges)) {
|
||||
Value offset =
|
||||
getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().offset);
|
||||
Value size =
|
||||
getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().size);
|
||||
// No loops if tile size is zero. Set offset and size to the loop
|
||||
// offset and size.
|
||||
if (matchPattern(tileSizeVals[loopRange.index()], m_Zero())) {
|
||||
offsets[loopRange.index()] = loopRange.value().offset;
|
||||
sizes[loopRange.index()] = loopRange.value().size;
|
||||
offsets[loopRange.index()] = offset;
|
||||
sizes[loopRange.index()] = size;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto loop = builder.create<scf::ForOp>(
|
||||
loc, loopRange.value().offset, loopRange.value().size,
|
||||
tileSizeVals[loopRange.index()], ValueRange{},
|
||||
loc, offset, size, tileSizeVals[loopRange.index()], ValueRange{},
|
||||
[&](OpBuilder &bodyBuilder, Location bodyLoc, Value iv,
|
||||
ValueRange /*iterArgs*/) {
|
||||
Value boundedTileSize = builder.create<AffineMinOp>(
|
||||
bodyLoc, minMap,
|
||||
ValueRange{iv, tileSizeVals[loopRange.index()],
|
||||
loopRange.value().size});
|
||||
ValueRange{iv, tileSizeVals[loopRange.index()], size});
|
||||
sizes[loopRange.index()] = boundedTileSize;
|
||||
builder.create<scf::YieldOp>(loc);
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue