forked from OSchip/llvm-project
[mlir][linalg] Transform PadTensorOp into InitOp, FillOp, GenericOp
Introduces a test pass that rewrites PadTensorOps with static shapes as a sequence of: ``` linalg.init_tensor // to create output linalg.fill // to initialize with padding value linalg.generic // to copy the original contents to the padded tensor ``` The pass can be triggered with: - `--test-linalg-transform-patterns="test-transform-pad-tensor"` Differential Revision: https://reviews.llvm.org/D102804
This commit is contained in:
parent
3d2c9069dc
commit
0804a88e48
|
@ -871,6 +871,15 @@ void populateLinalgDistributeTiledLoopPattern(
|
|||
// Op-specific patterns.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// PadTensorOp is not canonicalized away yet, so we provide a transformation to
|
||||
/// `linalg.generic`.
|
||||
struct PadTensorOpTransformationPattern : public OpRewritePattern<PadTensorOp> {
|
||||
using OpRewritePattern<PadTensorOp>::OpRewritePattern;
|
||||
|
||||
LogicalResult matchAndRewrite(PadTensorOp padOp,
|
||||
PatternRewriter &rewriter) const override;
|
||||
};
|
||||
|
||||
/// PadTensorOp does not implement the LinalgStructuredOpInterface `LinalgOp`,
|
||||
/// it needs a specific pattern to vectorize.
|
||||
struct PadTensorOpVectorizationPattern : public OpRewritePattern<PadTensorOp> {
|
||||
|
|
|
@ -637,3 +637,68 @@ LogicalResult AffineMinRangeCanonicalizationPattern::matchAndRewrite(
|
|||
|
||||
return failure();
|
||||
}
|
||||
|
||||
static SmallVector<StringRef> getNParallelLoopsAttrs(unsigned nParallelLoops) {
|
||||
return SmallVector<StringRef>(nParallelLoops, getParallelIteratorTypeName());
|
||||
}
|
||||
|
||||
/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp (to initialize
|
||||
/// with pad_val) and GenericOp (to copy contents).
|
||||
LogicalResult PadTensorOpTransformationPattern::matchAndRewrite(
|
||||
linalg::PadTensorOp padOp, PatternRewriter &rewriter) const {
|
||||
|
||||
auto inputShapedType = padOp.source().getType().cast<ShapedType>();
|
||||
auto resultShapedType = padOp.result().getType().cast<ShapedType>();
|
||||
|
||||
// Bail on non-static shapes.
|
||||
if (!inputShapedType.hasStaticShape())
|
||||
return failure();
|
||||
if (!resultShapedType.hasStaticShape())
|
||||
return failure();
|
||||
|
||||
// Only support padding with a constant for now, i.e. either:
|
||||
// 1. A BBarg from a different block.
|
||||
// 2. A value defined outside of the current block.
|
||||
Block &block = padOp.region().front();
|
||||
auto yieldOp = cast<YieldOp>(block.getTerminator());
|
||||
assert(yieldOp.getNumOperands() == 1 && "expected single operand yield");
|
||||
Value padValue = yieldOp.values().front();
|
||||
Operation *definingOp = padValue.getDefiningOp();
|
||||
if (definingOp && definingOp->getBlock() == &block)
|
||||
return failure();
|
||||
if (!definingOp && padValue.cast<BlockArgument>().getOwner() == &block)
|
||||
return failure();
|
||||
|
||||
// Create tensor with the padded shape
|
||||
Location loc = padOp.getLoc();
|
||||
SmallVector<Value> indices(resultShapedType.getRank(),
|
||||
rewriter.create<ConstantIndexOp>(loc, 0));
|
||||
Value initTensor = rewriter.create<InitTensorOp>(
|
||||
loc, resultShapedType.getShape(), resultShapedType.getElementType());
|
||||
|
||||
// Initialize tensor with the pad value
|
||||
Value tmpTensor =
|
||||
rewriter.create<linalg::FillOp>(loc, initTensor, padValue).result();
|
||||
|
||||
// Copy original contents into new tensor
|
||||
// Uses linalg.generic, but could be done with std.subtensor_insert
|
||||
SmallVector<AffineExpr, 4> outputExprs;
|
||||
for (unsigned i = 0; i < resultShapedType.getRank(); ++i) {
|
||||
outputExprs.push_back(getAffineDimExpr(i, rewriter.getContext()) +
|
||||
padOp.static_low()[i].cast<IntegerAttr>().getInt());
|
||||
}
|
||||
|
||||
SmallVector<AffineMap, 2> transferMaps = {
|
||||
rewriter.getMultiDimIdentityMap(inputShapedType.getRank()),
|
||||
AffineMap::get(resultShapedType.getRank(),
|
||||
/*symbolCount=*/0, outputExprs, rewriter.getContext())};
|
||||
|
||||
rewriter.replaceOpWithNewOp<linalg::GenericOp>(
|
||||
padOp, resultShapedType, padOp.source(), tmpTensor, transferMaps,
|
||||
getNParallelLoopsAttrs(resultShapedType.getRank()),
|
||||
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
|
||||
nestedBuilder.create<linalg::YieldOp>(nestedLoc, args[0]);
|
||||
});
|
||||
|
||||
return success();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
// RUN: mlir-opt -split-input-file --test-linalg-transform-patterns="test-transform-pad-tensor" %s | FileCheck --check-prefix=CHECK %s
|
||||
|
||||
// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
|
||||
// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0 + 1, d1 + 1, d2 + 1, d3 + 2)>
|
||||
// CHECK-LABEL: func @pad_tensor_with_memrefs
|
||||
func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x31x3xf32> {
|
||||
%cst = constant 0.000000e+00 : f32
|
||||
%0 = memref.tensor_load %arg0 : memref<1x28x28x1xf32>
|
||||
%1 = linalg.pad_tensor %0 low[1, 1, 1, 2] high[0, 2, 2, 0] {
|
||||
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): // no predecessors
|
||||
linalg.yield %cst : f32
|
||||
} : tensor<1x28x28x1xf32> to tensor<2x31x31x3xf32>
|
||||
%2 = memref.buffer_cast %1 : memref<2x31x31x3xf32>
|
||||
return %2 : memref<2x31x31x3xf32>
|
||||
}
|
||||
|
||||
// CHECK: linalg.fill
|
||||
// CHECK: linalg.generic
|
||||
// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
|
||||
// CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0 + 1, d1 + 2, d2 + 2)>
|
||||
// CHECK-LABEL: func @pad_tensor_no_memrefs
|
||||
func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf32> {
|
||||
%cst = constant 0.000000e+00 : f32
|
||||
%0 = linalg.pad_tensor %arg0 low[1, 2, 2] high[0, 2, 2] {
|
||||
^bb0(%arg1: index, %arg2: index, %arg3: index): // no predecessors
|
||||
linalg.yield %cst : f32
|
||||
} : tensor<1x28x28xf32> to tensor<2x32x32xf32>
|
||||
return %0 : tensor<2x32x32xf32>
|
||||
}
|
||||
|
||||
// CHECK: linalg.fill
|
||||
// CHECK: linalg.generic
|
||||
// CHECK-SAME: indexing_maps = [#[[$MAP2]], #[[$MAP3]]]
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-DAG: #[[$MAP4:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
|
||||
// CHECK-DAG: #[[$MAP5:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 + 2, d2 + 2, d3)>
|
||||
// CHECK-LABEL: func @pad_tensor_detailed
|
||||
func @pad_tensor_detailed(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
|
||||
%cst = constant 0.000000e+00 : f32
|
||||
%0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] {
|
||||
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): // no predecessors
|
||||
linalg.yield %cst : f32
|
||||
} : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32>
|
||||
return %0 : tensor<1x32x32x1xf32>
|
||||
}
|
||||
|
||||
// CHECK: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32>
|
||||
// CHECK: %[[CTE:.+]] = constant 0.000000e+00 : f32
|
||||
// CHECK: %[[TMP:.+]] = linalg.init_tensor [1, 32, 32, 1] : tensor<1x32x32x1xf32>
|
||||
// CHECK: %[[R1c:.+]] = linalg.fill
|
||||
// CHECK: %[[R2c:.+]] = linalg.generic
|
||||
// CHECK-SAME: indexing_maps = [#[[$MAP4]], #[[$MAP5]]]
|
||||
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]
|
||||
// CHECK: ins(%arg0 : tensor<1x28x28x1xf32>) outs(%1 : tensor<1x32x32x1xf32>)
|
||||
// CHECK: ^bb0(%[[VAL:.+]]: f32, %arg2: f32)
|
||||
// CHECK: linalg.yield %[[VAL]] : f32
|
||||
// CHECK: return %[[R2c:.+]]
|
|
@ -87,6 +87,10 @@ struct TestLinalgTransforms
|
|||
Option<int> testHoistPadding{*this, "test-hoist-padding",
|
||||
llvm::cl::desc("Test hoist padding"),
|
||||
llvm::cl::init(0)};
|
||||
Option<bool> testTransformPadTensor{
|
||||
*this, "test-transform-pad-tensor",
|
||||
llvm::cl::desc("Test transform pad tensor by copying with generic ops"),
|
||||
llvm::cl::init(false)};
|
||||
ListOption<int64_t> tileSizesForPadding{
|
||||
*this, "tile-sizes-for-padding",
|
||||
llvm::cl::desc("Linalg tile sizes when tile+pad"), llvm::cl::ZeroOrMore,
|
||||
|
@ -508,6 +512,12 @@ static void applyLinalgToVectorPatterns(FuncOp funcOp) {
|
|||
(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
|
||||
}
|
||||
|
||||
static void applyPadTensorToGenericPatterns(FuncOp funcOp) {
|
||||
RewritePatternSet patterns(funcOp.getContext());
|
||||
patterns.add<PadTensorOpTransformationPattern>(funcOp.getContext());
|
||||
(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
|
||||
}
|
||||
|
||||
static void applyAffineMinSCFCanonicalizationPatterns(FuncOp funcOp) {
|
||||
RewritePatternSet foldPattern(funcOp.getContext());
|
||||
foldPattern.add<AffineMinSCFCanonicalizationPattern>(funcOp.getContext());
|
||||
|
@ -583,6 +593,8 @@ void TestLinalgTransforms::runOnFunction() {
|
|||
return applyVectorTransferForwardingPatterns(getFunction());
|
||||
if (testGenericToVectorPattern)
|
||||
return applyLinalgToVectorPatterns(getFunction());
|
||||
if (testTransformPadTensor)
|
||||
return applyPadTensorToGenericPatterns(getFunction());
|
||||
if (testAffineMinSCFCanonicalizationPatterns)
|
||||
return applyAffineMinSCFCanonicalizationPatterns(getFunction());
|
||||
if (testTileAndPadPattern)
|
||||
|
|
Loading…
Reference in New Issue