forked from OSchip/llvm-project
[mlir] Add an interface to allow operations to specify how they can be tiled.
An interface to allow for tiling of operations is introduced. The tiling of the linalg.pad_tensor operation is modified to use this interface. Differential Revision: https://reviews.llvm.org/D108611
This commit is contained in:
parent
3fefebabe5
commit
ba72cfe734
|
@ -25,6 +25,7 @@
|
|||
#include "mlir/Interfaces/CopyOpInterface.h"
|
||||
#include "mlir/Interfaces/InferTypeOpInterface.h"
|
||||
#include "mlir/Interfaces/SideEffectInterfaces.h"
|
||||
#include "mlir/Interfaces/TilingInterface.h"
|
||||
#include "mlir/Interfaces/ViewLikeInterface.h"
|
||||
#include "mlir/Support/LLVM.h"
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ include "mlir/Interfaces/ControlFlowInterfaces.td"
|
|||
include "mlir/Interfaces/InferTypeOpInterface.td"
|
||||
include "mlir/Interfaces/LoopLikeInterface.td"
|
||||
include "mlir/Interfaces/SideEffectInterfaces.td"
|
||||
include "mlir/Interfaces/TilingInterface.td"
|
||||
include "mlir/Interfaces/ViewLikeInterface.td"
|
||||
|
||||
// Base class for Linalg dialect ops that do not correspond to library calls.
|
||||
|
@ -130,7 +131,10 @@ def Linalg_InitTensorOp : Linalg_Op<"init_tensor",
|
|||
|
||||
def Linalg_PadTensorOp : Linalg_Op<"pad_tensor",
|
||||
[AttrSizedOperandSegments, NoSideEffect,
|
||||
DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
|
||||
DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
|
||||
DeclareOpInterfaceMethods<TilingInterface,
|
||||
["getDestinationOperands", "getLoopIteratorTypes", "getLoopBounds",
|
||||
"getTiledImplementation"]>]> {
|
||||
let summary = "tensor pad operation";
|
||||
let description = [{
|
||||
`linalg.pad_tensor` is an operation that pads the `source` tensor
|
||||
|
|
|
@ -6,6 +6,7 @@ add_mlir_interface(DerivedAttributeOpInterface)
|
|||
add_mlir_interface(InferTypeOpInterface)
|
||||
add_mlir_interface(LoopLikeInterface)
|
||||
add_mlir_interface(SideEffectInterfaces)
|
||||
add_mlir_interface(TilingInterface)
|
||||
add_mlir_interface(VectorInterfaces)
|
||||
add_mlir_interface(ViewLikeInterface)
|
||||
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
//===- TilingInterface.h - Interface for tiling operations ------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the definitions of the TilingInterface defined in
|
||||
// `TilingInterface.td`.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef MLIR_INTERFACES_TILINGINTERFACE_H_
|
||||
#define MLIR_INTERFACES_TILINGINTERFACE_H_
|
||||
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/BuiltinTypes.h"
|
||||
#include "mlir/IR/Operation.h"
|
||||
#include "mlir/Interfaces/ViewLikeInterface.h"
|
||||
#include "mlir/Support/LLVM.h"
|
||||
|
||||
/// Include the ODS generated interface header files.
|
||||
#include "mlir/Interfaces/TilingInterface.h.inc"
|
||||
|
||||
#endif // MLIR_INTERFACES_TILINGINTERFACE_H_
|
|
@ -0,0 +1,95 @@
|
|||
//===- TilingInterface.td - Interface for tiling operations *- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains an interface to allow operations to generate a tiled
|
||||
// implementation of themselves.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef MLIR_TILINGINTERFACE
|
||||
#define MLIR_TILINGINTERFACE
|
||||
|
||||
include "mlir/IR/OpBase.td"
|
||||
|
||||
def TilingInterface : OpInterface<"TilingInterface"> {
|
||||
let description = [{
|
||||
Interface for allowing operations to expose information needed to
|
||||
tile them (similar to LinalgOp, but without having access to
|
||||
indexing maps)
|
||||
}];
|
||||
let cppNamespace = "::mlir";
|
||||
let methods = [
|
||||
InterfaceMethod<
|
||||
/*desc=*/[{
|
||||
Returns a list of operands into which the result of the
|
||||
tiled implementation is written into. With `tensor`
|
||||
operands, this will be used as the initial tensor into which
|
||||
the tiled results are inserted into. With `memref` operands,
|
||||
this will be the operand into which the result of the tiled
|
||||
operation is written into.
|
||||
}],
|
||||
/*retType=*/"SmallVector<Value>",
|
||||
/*methodName=*/"getDestinationOperands",
|
||||
/*args=*/(ins "OpBuilder &":$b),
|
||||
/*methodBody=*/"",
|
||||
/*defaultImplementation=*/"return ValueRange{};"
|
||||
>,
|
||||
InterfaceMethod<
|
||||
/*desc=*/[{
|
||||
Returns a list of `StringRef`s that describe the number of
|
||||
loops and the iterator types of the operation. The list is
|
||||
expected to use
|
||||
`getParallelIteratorTypeName()`/`getReductionIteratorTypeName()`
|
||||
from MLIR Structured Op Utils.
|
||||
}],
|
||||
/*retType=*/"SmallVector<StringRef>",
|
||||
/*methodName=*/"getLoopIteratorTypes"
|
||||
>,
|
||||
InterfaceMethod<
|
||||
/*desc=*/[{
|
||||
Returns a list of ranges that describe the loop bounds and
|
||||
step for the loops of the operation.
|
||||
}],
|
||||
/*retTy=*/"SmallVector<Range>",
|
||||
/*methodName=*/"getLoopBounds",
|
||||
/*args=*/(ins "OpBuilder &":$b)
|
||||
>,
|
||||
InterfaceMethod<
|
||||
/*desc=*/[{
|
||||
Method to generate the tiled implementation of an operation.
|
||||
|
||||
The iteration space of the operation is returned by
|
||||
`getLoopBounds`. The caller provides the information of the
|
||||
tile within this iteration space whose implementation the
|
||||
caller needs.
|
||||
- `dest` are the Value into which the result of the tiled
|
||||
operation is to be inserted into. The type of the `dest`
|
||||
Values is same as the types returned by
|
||||
`getDestinationOperands` method.
|
||||
- `offsets` provides the offset of the tile within the
|
||||
iteration space
|
||||
- `sizes` provides the size of the tile.
|
||||
|
||||
The method returns the operation that is the tiled
|
||||
implementation.
|
||||
}],
|
||||
/*retType=*/"Operation *",
|
||||
/*methodName=*/"getTiledImplementation",
|
||||
/*args=*/(ins
|
||||
"OpBuilder &":$b,
|
||||
"ValueRange ":$dest,
|
||||
"ArrayRef<OpFoldResult> ":$offsets,
|
||||
"ArrayRef<OpFoldResult> ":$sizes),
|
||||
/*methodBody=*/"",
|
||||
/*defaultImplementation=*/[{
|
||||
return nullptr;
|
||||
}]
|
||||
>
|
||||
];
|
||||
}
|
||||
#endif // MLIR_TILINGINTERFACE
|
|
@ -18,9 +18,11 @@ add_mlir_dialect_library(MLIRLinalg
|
|||
MLIRIR
|
||||
MLIRParser
|
||||
MLIRSideEffectInterfaces
|
||||
MLIRViewLikeInterface
|
||||
MLIRSCF
|
||||
MLIRStandard
|
||||
MLIRMath
|
||||
MLIRMemRef
|
||||
MLIRTensor
|
||||
MLIRTilingInterface
|
||||
MLIRViewLikeInterface
|
||||
)
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
|
||||
|
@ -1203,6 +1204,253 @@ LogicalResult PadTensorOp::reifyResultShapes(
|
|||
return success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Methods related to PadTensor tiling.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Given an OpFoldResult, return a Value. If the OpFoldResult is an Attribute,
|
||||
/// it must be of type Integer.
|
||||
static Value getAsValue(OpBuilder &builder, Location loc, OpFoldResult ofr) {
|
||||
if (auto val = ofr.dyn_cast<Value>())
|
||||
return val;
|
||||
auto intVal = getConstantIntValue(ofr);
|
||||
auto intAttr = ofr.dyn_cast<Attribute>().dyn_cast_or_null<IntegerAttr>();
|
||||
assert(intVal && "expected Value or IntegerAttr");
|
||||
return builder.create<ConstantIndexOp>(loc, intAttr.getInt());
|
||||
}
|
||||
|
||||
SmallVector<Value> PadTensorOp::getDestinationOperands(OpBuilder &b) {
|
||||
ReifiedRankedShapedTypeDims reifiedShapes;
|
||||
(void)reifyResultShapes(b, reifiedShapes);
|
||||
Value initTensor = b.create<InitTensorOp>(getLoc(), reifiedShapes[0],
|
||||
getResultType().getElementType());
|
||||
return {initTensor};
|
||||
}
|
||||
|
||||
SmallVector<StringRef> PadTensorOp::getLoopIteratorTypes() {
|
||||
SmallVector<StringRef> iteratorTypes(getResultType().getRank(),
|
||||
getParallelIteratorTypeName());
|
||||
return iteratorTypes;
|
||||
}
|
||||
|
||||
SmallVector<Range> PadTensorOp::getLoopBounds(OpBuilder &b) {
|
||||
ReifiedRankedShapedTypeDims reifiedShapes;
|
||||
(void)reifyResultShapes(b, reifiedShapes);
|
||||
Value zero = b.create<ConstantIndexOp>(getLoc(), 0);
|
||||
Value one = b.create<ConstantIndexOp>(getLoc(), 1);
|
||||
// Initialize all the ranges to {zero, one, one}. All the `ub`s are
|
||||
// overwritten.
|
||||
SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});
|
||||
for (auto ub : enumerate(reifiedShapes[0]))
|
||||
loopRanges[ub.index()].size = ub.value();
|
||||
return loopRanges;
|
||||
}
|
||||
|
||||
Operation *PadTensorOp::getTiledImplementation(OpBuilder &b, ValueRange dest,
|
||||
ArrayRef<OpFoldResult> offsets,
|
||||
ArrayRef<OpFoldResult> sizes) {
|
||||
// Only constant padding value supported.
|
||||
Value padValue = getConstantPaddingValue();
|
||||
if (!padValue)
|
||||
return nullptr;
|
||||
|
||||
// Helper variables and functions for various arithmetic operations. These are
|
||||
// used extensively for computing new offset/length and padding values.
|
||||
Location loc = getLoc();
|
||||
AffineExpr dim0, dim1;
|
||||
bindDims(b.getContext(), dim0, dim1);
|
||||
// Add two integers.
|
||||
auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
|
||||
auto add = [&](Value v1, Value v2) {
|
||||
return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});
|
||||
};
|
||||
// Subtract two integers.
|
||||
auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
|
||||
auto sub = [&](Value v1, Value v2) {
|
||||
return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});
|
||||
};
|
||||
// Take the minimum of two integers.
|
||||
auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
|
||||
auto min = [&](Value v1, Value v2) {
|
||||
return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
|
||||
};
|
||||
// Take the maximum of two integers.
|
||||
auto max = [&](Value v1, Value v2) {
|
||||
return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
|
||||
};
|
||||
// Zero index-typed integer.
|
||||
auto zero = b.create<ConstantIndexOp>(loc, 0);
|
||||
|
||||
// Helper function for filling static/dynamic low/high padding indices vectors
|
||||
// of PadTensorOp.
|
||||
auto appendIndex = [&](Value val, SmallVector<Value> &dynIndices,
|
||||
SmallVector<int64_t> &staticIndices) {
|
||||
if (auto constInt = getConstantIntValue(val)) {
|
||||
staticIndices.push_back(*constInt);
|
||||
} else {
|
||||
staticIndices.push_back(ShapedType::kDynamicSize);
|
||||
dynIndices.push_back(val);
|
||||
}
|
||||
};
|
||||
|
||||
// Compute new offsets, lengths, low padding, high padding.
|
||||
SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
|
||||
SmallVector<Value> newLows, newHighs;
|
||||
SmallVector<int64_t> staticNewLows, staticNewHighs;
|
||||
// Set to true if the original data source is not read at all.
|
||||
bool hasZeroLen = false;
|
||||
// Same as hasZeroLen, but for dynamic dimension sizes. This condition
|
||||
// is true if the original data source turns out to be unused at runtime.
|
||||
Value dynHasZeroLenCond;
|
||||
|
||||
int64_t rank = getSourceType().getRank();
|
||||
for (unsigned dim = 0; dim < rank; ++dim) {
|
||||
auto low = getAsValue(b, loc, getMixedLowPad()[dim]);
|
||||
bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
|
||||
auto high = getAsValue(b, loc, getMixedHighPad()[dim]);
|
||||
bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
|
||||
auto offset = getAsValue(b, loc, offsets[dim]);
|
||||
auto length = getAsValue(b, loc, sizes[dim]);
|
||||
auto srcSize = b.createOrFold<tensor::DimOp>(loc, source(), dim);
|
||||
|
||||
// The new amount of low padding is `low - offset`. Except for the case
|
||||
// where none of the low padding is read. In that case, the new amount of
|
||||
// low padding is zero.
|
||||
//
|
||||
// Optimization: If low = 0, then newLow = 0.
|
||||
Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
|
||||
appendIndex(newLow, newLows, staticNewLows);
|
||||
|
||||
// Start reading the data from position `offset - low`. Since the original
|
||||
// read may have started in the low padding zone, this value could be
|
||||
// negative. Therefore, start reading from:
|
||||
//
|
||||
// max(offset - low, 0)
|
||||
//
|
||||
// The original read could also have started in the high padding zone.
|
||||
// In that case, set the offset to the end of source tensor. The new
|
||||
// ExtractSliceOp length will be zero in that case. (Effectively reading no
|
||||
// data from the source.)
|
||||
//
|
||||
// Optimization: If low = 0, then the formula can be simplified.
|
||||
Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
|
||||
: min(offset, srcSize);
|
||||
newOffsets.push_back(getAsOpFoldResult(newOffset));
|
||||
|
||||
// The original ExtractSliceOp was reading until position `offset + length`.
|
||||
// Therefore, the corresponding position within the source tensor is:
|
||||
//
|
||||
// offset + length - low
|
||||
//
|
||||
// In case the original ExtractSliceOp stopped reading within the low
|
||||
// padding zone, this value can be negative. In that case, the end position
|
||||
// of the read should be zero. (Similar to newOffset.)
|
||||
//
|
||||
// The original read could also have stopped in the high padding zone.
|
||||
// In that case, set the end positition of the read should be the end of the
|
||||
// source tensor. (Similar to newOffset.)
|
||||
//
|
||||
// endLoc = min(max(offset - low + length, 0), srcSize)
|
||||
//
|
||||
// The new ExtractSliceOp length is `endLoc - newOffset`.
|
||||
//
|
||||
// Optimization: If low = 0, then the formula can be simplified.
|
||||
Value endLoc = hasLowPad
|
||||
? min(max(add(sub(offset, low), length), zero), srcSize)
|
||||
: min(add(offset, length), srcSize);
|
||||
Value newLength = sub(endLoc, newOffset);
|
||||
newLengths.push_back(getAsOpFoldResult(newLength));
|
||||
|
||||
// Check if newLength is zero. In that case, no SubTensorOp should be
|
||||
// executed.
|
||||
if (auto newLengthInt = getConstantIntValue(newLength)) {
|
||||
hasZeroLen |= *newLengthInt == 0;
|
||||
} else {
|
||||
Value check = b.create<CmpIOp>(loc, CmpIPredicate::eq, newLength, zero);
|
||||
dynHasZeroLenCond = dynHasZeroLenCond
|
||||
? b.create<OrOp>(loc, check, dynHasZeroLenCond)
|
||||
: check;
|
||||
}
|
||||
|
||||
// The amount of high padding is simply the number of elements remaining,
|
||||
// so that the result has the same length as the original ExtractSliceOp.
|
||||
// As an optimization, if the original high padding is zero, then the new
|
||||
// high padding must also be zero.
|
||||
Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
|
||||
appendIndex(newHigh, newHighs, staticNewHighs);
|
||||
|
||||
// Only unit stride supported.
|
||||
newStrides.push_back(b.getIndexAttr(1));
|
||||
}
|
||||
|
||||
// The shape of the result can be obtained from the sizes passed in.
|
||||
SmallVector<Value> dynDims;
|
||||
SmallVector<int64_t> shape;
|
||||
dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize);
|
||||
RankedTensorType resultType =
|
||||
RankedTensorType::get(shape, getResultType().getElementType());
|
||||
|
||||
// Insert cast to ensure that types match. (May be folded away.)
|
||||
auto castResult = [&](Value val) -> Operation * {
|
||||
auto castOp = b.create<tensor::CastOp>(loc, resultType, val);
|
||||
return castOp;
|
||||
};
|
||||
|
||||
// In cases where the original data source is unused: Emit a GenerateOp and
|
||||
// do not generate a SliceOp. (The result shape of the SliceOp would
|
||||
// have a dimension of size 0, the semantics of which is unclear.)
|
||||
auto createGenerateOp = [&]() {
|
||||
// Create GenerateOp.
|
||||
auto generateOp = b.create<tensor::GenerateOp>(
|
||||
loc, resultType, dynDims,
|
||||
[&](OpBuilder &builder, Location gLoc, ValueRange indices) {
|
||||
builder.create<tensor::YieldOp>(gLoc, padValue);
|
||||
});
|
||||
return castResult(generateOp);
|
||||
};
|
||||
|
||||
// Emit a SliceOp and a PadTensorOp. Should not be used in cases where
|
||||
// the result shape of the new SliceOp has a zero dimension.
|
||||
auto createPadTensorOfSubTensor = [&]() {
|
||||
// Create pad_tensor(subtensor(x)).
|
||||
auto newSliceOp = b.create<tensor::ExtractSliceOp>(
|
||||
loc, source(), newOffsets, newLengths, newStrides);
|
||||
auto newPadTensorOp = b.create<PadTensorOp>(
|
||||
loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs);
|
||||
|
||||
// Copy region to new PadTensorOp.
|
||||
BlockAndValueMapping bvm;
|
||||
region().cloneInto(&newPadTensorOp.getRegion(), bvm);
|
||||
|
||||
// Cast result and return.
|
||||
return castResult(newPadTensorOp);
|
||||
};
|
||||
|
||||
// Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known
|
||||
// that the original data source x is not used.
|
||||
if (hasZeroLen) {
|
||||
return createGenerateOp();
|
||||
}
|
||||
|
||||
// If there are dynamic dimensions: Generate an scf.if check to avoid creating
|
||||
// SliceOps with result dimensions of size 0 at runtime.
|
||||
if (dynHasZeroLenCond) {
|
||||
auto result = b.create<scf::IfOp>(
|
||||
loc, resultType, dynHasZeroLenCond,
|
||||
/*thenBuilder=*/
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
b.create<scf::YieldOp>(loc, createGenerateOp()->getResult(0));
|
||||
},
|
||||
/*elseBuilder=*/
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
b.create<scf::YieldOp>(loc,
|
||||
createPadTensorOfSubTensor()->getResult(0));
|
||||
});
|
||||
return result;
|
||||
}
|
||||
return createPadTensorOfSubTensor();
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Folds linalg.pad_tensor when padding is static zeros.
|
||||
struct FoldStaticZeroPadding : public OpRewritePattern<PadTensorOp> {
|
||||
|
|
|
@ -373,11 +373,12 @@ static LogicalResult tilePadTensorOp(OpBuilder &builder, PadTensorOp op,
|
|||
options.tileSizeComputationFunction(builder, op);
|
||||
assert(static_cast<int64_t>(tileSizes.size()) == rank);
|
||||
// Compute lower and upper bounds of the loop nest.
|
||||
SmallVector<Range> ranges = op.getLoopBounds(builder);
|
||||
SmallVector<Value> lbs, dims, steps;
|
||||
for (int64_t i = 0; i < rank; ++i) {
|
||||
if (!isZero(tileSizes[i])) {
|
||||
lbs.push_back(builder.create<ConstantIndexOp>(loc, 0));
|
||||
dims.push_back(builder.create<tensor::DimOp>(loc, op.output(), i));
|
||||
lbs.push_back(ranges[i].offset);
|
||||
dims.push_back(ranges[i].size);
|
||||
steps.push_back(tileSizes[i]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -648,16 +648,6 @@ GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp,
|
|||
return success();
|
||||
}
|
||||
|
||||
/// Given an OpFoldResult, return a Value. If the OpFoldResult is an Attribute,
|
||||
/// it must be of type Integer.
|
||||
static Value asValue(OpBuilder &builder, Location loc, OpFoldResult ofr) {
|
||||
if (auto val = ofr.dyn_cast<Value>())
|
||||
return val;
|
||||
auto intVal = getConstantIntValue(ofr);
|
||||
assert(intVal && "expected Value or IntegerAttr");
|
||||
return builder.create<ConstantIndexOp>(loc, *intVal);
|
||||
}
|
||||
|
||||
LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
|
||||
tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const {
|
||||
auto padOp = sliceOp.source().getDefiningOp<PadTensorOp>();
|
||||
|
@ -666,227 +656,12 @@ LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
|
|||
// Only unit stride supported.
|
||||
if (!sliceOp.hasUnitStride())
|
||||
return failure();
|
||||
// Only constant padding value supported.
|
||||
Value padValue = padOp.getConstantPaddingValue();
|
||||
if (!padValue)
|
||||
return failure();
|
||||
|
||||
// Helper variables and functions for various arithmetic operations. These are
|
||||
// used extensively for computing new offset/length and padding values.
|
||||
Location loc = sliceOp.getLoc();
|
||||
AffineExpr dim0, dim1;
|
||||
bindDims(rewriter.getContext(), dim0, dim1);
|
||||
// Add two integers.
|
||||
auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
|
||||
auto add = [&](Value v1, Value v2) {
|
||||
return rewriter.createOrFold<AffineApplyOp>(loc, addMap,
|
||||
ValueRange{v1, v2});
|
||||
};
|
||||
// Subtract two integers.
|
||||
auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
|
||||
auto sub = [&](Value v1, Value v2) {
|
||||
return rewriter.createOrFold<AffineApplyOp>(loc, subMap,
|
||||
ValueRange{v1, v2});
|
||||
};
|
||||
// Take the minimum of two integers.
|
||||
auto idMap = AffineMap::getMultiDimIdentityMap(2, rewriter.getContext());
|
||||
auto min = [&](Value v1, Value v2) {
|
||||
return rewriter.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
|
||||
};
|
||||
// Take the maximum of two integers.
|
||||
auto max = [&](Value v1, Value v2) {
|
||||
return rewriter.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
|
||||
};
|
||||
// Zero index-typed integer.
|
||||
auto zero = rewriter.create<ConstantIndexOp>(loc, 0);
|
||||
|
||||
// Helper function for filling static/dynamic low/high padding indices vectors
|
||||
// of PadTensorOp.
|
||||
auto appendIndex = [&](Value val, SmallVector<Value> &dynIndices,
|
||||
SmallVector<int64_t> &staticIndices) {
|
||||
if (auto constInt = getConstantIntValue(val)) {
|
||||
staticIndices.push_back(*constInt);
|
||||
} else {
|
||||
staticIndices.push_back(ShapedType::kDynamicSize);
|
||||
dynIndices.push_back(val);
|
||||
}
|
||||
};
|
||||
|
||||
// Compute new offsets, lengths, low padding, high padding.
|
||||
SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
|
||||
SmallVector<Value> newLows, newHighs;
|
||||
SmallVector<int64_t> staticNewLows, staticNewHighs;
|
||||
// Set to true if the original data source is not read at all.
|
||||
bool hasZeroLen = false;
|
||||
// Same as hasZeroLen, but for dynamic dimension sizes. This condition
|
||||
// is true if the original data source turns out to be unused at runtime.
|
||||
Value dynHasZeroLenCond;
|
||||
|
||||
int64_t rank = padOp.getSourceType().getRank();
|
||||
for (unsigned dim = 0; dim < rank; ++dim) {
|
||||
auto low = asValue(rewriter, loc, padOp.getMixedLowPad()[dim]);
|
||||
bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
|
||||
auto high = asValue(rewriter, loc, padOp.getMixedHighPad()[dim]);
|
||||
bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
|
||||
auto offset = asValue(rewriter, loc, sliceOp.getMixedOffsets()[dim]);
|
||||
auto length = asValue(rewriter, loc, sliceOp.getMixedSizes()[dim]);
|
||||
auto srcSize =
|
||||
rewriter.createOrFold<tensor::DimOp>(loc, padOp.source(), dim);
|
||||
|
||||
// The new amount of low padding is `low - offset`. Except for the case
|
||||
// where none of the low padding is read. In that case, the new amount of
|
||||
// low padding is zero.
|
||||
//
|
||||
// Optimization: If low = 0, then newLow = 0.
|
||||
Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
|
||||
appendIndex(newLow, newLows, staticNewLows);
|
||||
|
||||
// Start reading the data from position `offset - low`. Since the original
|
||||
// read may have started in the low padding zone, this value could be
|
||||
// negative. Therefore, start reading from:
|
||||
//
|
||||
// max(offset - low, 0)
|
||||
//
|
||||
// The original read could also have started in the high padding zone.
|
||||
// In that case, set the offset to the end of source tensor. The new
|
||||
// ExtractSliceOp length will be zero in that case. (Effectively reading no
|
||||
// data from the source.)
|
||||
//
|
||||
// Optimization: If low = 0, then the formula can be simplified.
|
||||
Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
|
||||
: min(offset, srcSize);
|
||||
newOffsets.push_back(getAsOpFoldResult(newOffset));
|
||||
|
||||
// The original ExtractSliceOp was reading until position `offset + length`.
|
||||
// Therefore, the corresponding position within the source tensor is:
|
||||
//
|
||||
// offset + length - low
|
||||
//
|
||||
// In case the original ExtractSliceOp stopped reading within the low
|
||||
// padding zone, this value can be negative. In that case, the end position
|
||||
// of the read should be zero. (Similar to newOffset.)
|
||||
//
|
||||
// The original read could also have stopped in the high padding zone.
|
||||
// In that case, set the end positition of the read should be the end of the
|
||||
// source tensor. (Similar to newOffset.)
|
||||
//
|
||||
// endLoc = min(max(offset - low + length, 0), srcSize)
|
||||
//
|
||||
// The new ExtractSliceOp length is `endLoc - newOffset`.
|
||||
//
|
||||
// Optimization: If low = 0, then the formula can be simplified.
|
||||
Value endLoc = hasLowPad
|
||||
? min(max(add(sub(offset, low), length), zero), srcSize)
|
||||
: min(add(offset, length), srcSize);
|
||||
Value newLength = sub(endLoc, newOffset);
|
||||
newLengths.push_back(getAsOpFoldResult(newLength));
|
||||
|
||||
// Check if newLength is zero. In that case, no SubTensorOp should be
|
||||
// executed.
|
||||
if (auto newLengthInt = getConstantIntValue(newLength)) {
|
||||
hasZeroLen |= *newLengthInt == 0;
|
||||
} else {
|
||||
Value check = rewriter.create<CmpIOp>(
|
||||
loc, CmpIPredicate::eq, newLength, zero);
|
||||
dynHasZeroLenCond =
|
||||
dynHasZeroLenCond
|
||||
? rewriter.create<OrOp>(loc, check, dynHasZeroLenCond)
|
||||
: check;
|
||||
}
|
||||
|
||||
// The amount of high padding is simply the number of elements remaining,
|
||||
// so that the result has the same length as the original ExtractSliceOp.
|
||||
// As an optimization, if the original high padding is zero, then the new
|
||||
// high padding must also be zero.
|
||||
Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
|
||||
appendIndex(newHigh, newHighs, staticNewHighs);
|
||||
|
||||
// Only unit stride supported.
|
||||
newStrides.push_back(rewriter.getIndexAttr(1));
|
||||
}
|
||||
|
||||
// Insert cast to ensure that types match. (May be folded away.)
|
||||
auto castResult = [&](Value val) -> Value {
|
||||
auto castOp = rewriter.create<tensor::CastOp>(loc, sliceOp.getType(), val);
|
||||
return castOp;
|
||||
};
|
||||
|
||||
// In cases where the original data source is unused: Emit a GenerateOp and
|
||||
// do not generate a SliceOp. (The result shape of the SliceOp would
|
||||
// have a dimension of size 0, the semantics of which is unclear.)
|
||||
auto createGenerateOp = [&]() {
|
||||
// The shape of the GenerateOp is the same as the existing SliceOp.
|
||||
RankedTensorType type = sliceOp.getType();
|
||||
SmallVector<Value> dynDims;
|
||||
for (unsigned i = 0; i < type.getRank(); ++i) {
|
||||
if (type.isDynamicDim(i))
|
||||
dynDims.push_back(asValue(rewriter, loc, sliceOp.getMixedSizes()[i]));
|
||||
}
|
||||
|
||||
// Create GenerateOp.
|
||||
auto generateOp = rewriter.create<tensor::GenerateOp>(loc, type, dynDims);
|
||||
|
||||
// Copy region to new op.
|
||||
BlockAndValueMapping bvm;
|
||||
padOp.region().cloneInto(&generateOp.getRegion(), bvm);
|
||||
// Rewrite linalg::YieldOp to tensor::YieldOp.
|
||||
{
|
||||
OpBuilder::InsertionGuard guard(rewriter);
|
||||
auto yieldOp = dyn_cast<linalg::YieldOp>(
|
||||
generateOp.getRegion().front().getTerminator());
|
||||
assert(yieldOp && "malformed PadTensorOp: expected YieldOp terminator");
|
||||
assert(yieldOp.values().size() == 1);
|
||||
rewriter.setInsertionPoint(yieldOp);
|
||||
rewriter.replaceOpWithNewOp<tensor::YieldOp>(
|
||||
yieldOp, yieldOp.values()[0]);
|
||||
}
|
||||
|
||||
return castResult(generateOp);
|
||||
};
|
||||
|
||||
// Emit a SliceOp and a PadTensorOp. Should not be used in cases where
|
||||
// the result shape of the new SliceOp has a zero dimension.
|
||||
auto createPadTensorOfSubTensor = [&]() {
|
||||
// Create pad_tensor(subtensor(x)).
|
||||
auto newSliceOp = rewriter.create<tensor::ExtractSliceOp>(
|
||||
loc, padOp.source(), newOffsets, newLengths, newStrides);
|
||||
auto newPadTensorOp = rewriter.create<PadTensorOp>(
|
||||
loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs);
|
||||
|
||||
// Copy region to new PadTensorOp.
|
||||
BlockAndValueMapping bvm;
|
||||
padOp.region().cloneInto(&newPadTensorOp.getRegion(), bvm);
|
||||
|
||||
// Cast result and return.
|
||||
return castResult(newPadTensorOp);
|
||||
};
|
||||
|
||||
// Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known
|
||||
// that the original data source x is not used.
|
||||
if (hasZeroLen) {
|
||||
rewriter.replaceOp(sliceOp, createGenerateOp());
|
||||
return success();
|
||||
}
|
||||
|
||||
// If there are dynamic dimensions: Generate an scf.if check to avoid creating
|
||||
// SliceOps with result dimensions of size 0 at runtime.
|
||||
if (dynHasZeroLenCond) {
|
||||
auto result = rewriter.create<scf::IfOp>(
|
||||
loc, sliceOp.getType(), dynHasZeroLenCond,
|
||||
/*thenBuilder=*/
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
b.create<scf::YieldOp>(loc, createGenerateOp());
|
||||
},
|
||||
/*elseBuilder=*/
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
b.create<scf::YieldOp>(loc, createPadTensorOfSubTensor());
|
||||
});
|
||||
rewriter.replaceOp(sliceOp, result.getResult(0));
|
||||
return success();
|
||||
}
|
||||
|
||||
Operation *tiledPadOp = padOp.getTiledImplementation(
|
||||
rewriter, /*dest=*/ValueRange{}, sliceOp.getMixedOffsets(),
|
||||
sliceOp.getMixedSizes());
|
||||
// All shapes are static and the data source is actually used. Rewrite into
|
||||
// pad_tensor(subtensor(x)).
|
||||
rewriter.replaceOp(sliceOp, createPadTensorOfSubTensor());
|
||||
rewriter.replaceOp(sliceOp, tiledPadOp->getResults());
|
||||
return success();
|
||||
}
|
||||
|
|
|
@ -90,6 +90,4 @@ bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2) {
|
|||
auto v1 = ofr1.dyn_cast<Value>(), v2 = ofr2.dyn_cast<Value>();
|
||||
return v1 && v1 == v2;
|
||||
}
|
||||
|
||||
} // namespace mlir
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ set(LLVM_OPTIONAL_SOURCES
|
|||
InferTypeOpInterface.cpp
|
||||
LoopLikeInterface.cpp
|
||||
SideEffectInterfaces.cpp
|
||||
TilingInterface.cpp
|
||||
VectorInterfaces.cpp
|
||||
ViewLikeInterface.cpp
|
||||
)
|
||||
|
@ -37,6 +38,6 @@ add_mlir_interface_library(DerivedAttributeOpInterface)
|
|||
add_mlir_interface_library(InferTypeOpInterface)
|
||||
add_mlir_interface_library(LoopLikeInterface)
|
||||
add_mlir_interface_library(SideEffectInterfaces)
|
||||
add_mlir_interface_library(TilingInterface)
|
||||
add_mlir_interface_library(VectorInterfaces)
|
||||
add_mlir_interface_library(ViewLikeInterface)
|
||||
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
//===- TilingInterface.cpp - Tiling interface -------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the definitions of the interface in `TilingInterface.td`.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Interfaces/TilingInterface.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
#include "mlir/Interfaces/TilingInterface.cpp.inc"
|
|
@ -3,14 +3,18 @@
|
|||
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,3" -cse -split-input-file | \
|
||||
// RUN: FileCheck %s -check-prefix=TILE1
|
||||
|
||||
// TILE2-LABEL: func @dynamic_pad_tensor(
|
||||
// TILE2-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)>
|
||||
// TILE2-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)>
|
||||
// TILE2: func @dynamic_pad_tensor(
|
||||
// TILE2-SAME: %[[IN:.*]]: tensor<?x?xf32>, %[[OUT:.*]]: tensor<?x?xf32>
|
||||
// TILE2-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// TILE2-DAG: %[[C1:.*]] = constant 1 : index
|
||||
// TILE2-DAG: %[[C2:.*]] = constant 2 : index
|
||||
// TILE2-DAG: %[[C3:.*]] = constant 3 : index
|
||||
// TILE2: %[[DIM0:.*]] = tensor.dim %[[OUT]], %[[C0]]
|
||||
// TILE2: %[[DIM1:.*]] = tensor.dim %[[OUT]], %[[C1]]
|
||||
// TILE2: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
|
||||
// TILE2: %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]]
|
||||
// TILE2: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
|
||||
// TILE2: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
|
||||
// TILE2: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]]
|
||||
// TILE2: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
|
||||
// TILE2: %[[SWAP_RESULT:.*]] = scf.if
|
||||
|
@ -21,12 +25,14 @@
|
|||
// TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
|
||||
// TILE2: return %[[RESULT]]
|
||||
|
||||
// TILE1-LABEL: func @dynamic_pad_tensor(
|
||||
// TILE1-DAG: #[[MAP:.*]] = affine_map<()[s0] -> (s0 + 7)>
|
||||
// TILE1: func @dynamic_pad_tensor(
|
||||
// TILE1-SAME: %[[IN:.*]]: tensor<?x?xf32>, %[[OUT:.*]]: tensor<?x?xf32>
|
||||
// TILE1-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// TILE1-DAG: %[[C1:.*]] = constant 1 : index
|
||||
// TILE1-DAG: %[[C3:.*]] = constant 3 : index
|
||||
// TILE1: %[[DIM1:.*]] = tensor.dim %[[OUT]], %[[C1]]
|
||||
// TILE1: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
|
||||
// TILE1: %[[DIM1:.*]] = affine.apply #[[MAP]]()[%[[DIM_IN1]]]
|
||||
// TILE1: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
|
||||
// TILE1: %[[DIM0:.*]] = tensor.dim %[[OUT]], %[[C0]]
|
||||
// TILE1: %[[SWAP_RESULT:.*]] = scf.if
|
||||
|
|
|
@ -757,6 +757,13 @@ alias(
|
|||
actual = ":SideEffectInterfacesTdFiles",
|
||||
)
|
||||
|
||||
td_library(
|
||||
name = "TilingInterfaceTdFiles",
|
||||
srcs = ["include/mlir/Interfaces/TilingInterface.td"],
|
||||
includes = ["include"],
|
||||
deps = [":OpBaseTdFiles"],
|
||||
)
|
||||
|
||||
td_library(
|
||||
name = "VectorInterfacesTdFiles",
|
||||
srcs = ["include/mlir/Interfaces/VectorInterfaces.td"],
|
||||
|
@ -4603,6 +4610,24 @@ alias(
|
|||
actual = "SideEffectInterfaces",
|
||||
)
|
||||
|
||||
gentbl_cc_library(
|
||||
name = "TilingInterfaceIncGen",
|
||||
strip_include_prefix = "include",
|
||||
tbl_outs = [
|
||||
(
|
||||
["-gen-op-interface-decls"],
|
||||
"include/mlir/Interfaces/TilingInterface.h.inc",
|
||||
),
|
||||
(
|
||||
["-gen-op-interface-defs"],
|
||||
"include/mlir/Interfaces/TilingInterface.cpp.inc",
|
||||
),
|
||||
],
|
||||
tblgen = ":mlir-tblgen",
|
||||
td_file = "include/mlir/Interfaces/TilingInterface.td",
|
||||
deps = [":TilingInterfaceTdFiles"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "Analysis",
|
||||
srcs = glob(
|
||||
|
@ -5790,6 +5815,7 @@ td_library(
|
|||
":LoopLikeInterfaceTdFiles",
|
||||
":OpBaseTdFiles",
|
||||
":SideEffectInterfacesTdFiles",
|
||||
":TilingInterfaceTdFiles",
|
||||
":ViewLikeInterfaceTdFiles",
|
||||
],
|
||||
)
|
||||
|
@ -6045,10 +6071,12 @@ cc_library(
|
|||
":MathDialect",
|
||||
":MemRefDialect",
|
||||
":Parser",
|
||||
":SCFDialect",
|
||||
":SideEffectInterfaces",
|
||||
":StandardOps",
|
||||
":Support",
|
||||
":TensorDialect",
|
||||
":TilingInterface",
|
||||
":ViewLikeInterface",
|
||||
"//llvm:Support",
|
||||
],
|
||||
|
@ -6129,6 +6157,21 @@ cc_library(
|
|||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "TilingInterface",
|
||||
srcs = ["lib/Interfaces/TilingInterface.cpp"],
|
||||
hdrs = ["include/mlir/Interfaces/TilingInterface.h"],
|
||||
includes = ["include"],
|
||||
deps = [
|
||||
":IR",
|
||||
":Support",
|
||||
":TensorDialect",
|
||||
":TilingInterfaceIncGen",
|
||||
":ViewLikeInterface",
|
||||
"//llvm:Support",
|
||||
],
|
||||
)
|
||||
|
||||
td_library(
|
||||
name = "VectorOpsTdFiles",
|
||||
srcs = ["include/mlir/Dialect/Vector/VectorOps.td"],
|
||||
|
|
Loading…
Reference in New Issue