[MLIR][LINALG] Convert Linalg on Tensors to Buffers

This is a basic pass to convert Linalg.GenericOp which works on tensors to use
buffers instead.

Differential Revision: https://reviews.llvm.org/D78996
This commit is contained in:
Ehsan Toosi 2020-04-28 14:44:21 +02:00
parent fa2783d79a
commit 6ccaf73887
5 changed files with 276 additions and 0 deletions

View File

@ -50,6 +50,11 @@ std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToParallelLoopsPass();
/// Placeholder for now, this is NYI.
std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToAffineLoopsPass();
/// Create a pass to convert Linalg operations which work on tensors to use
/// buffers instead.
std::unique_ptr<OperationPass<ModuleOp>>
createConvertLinalgOnTensorsToBuffersPass();
/// Patterns for fusing linalg operation on tensors.
void populateLinalgTensorOpsFusionPatterns(MLIRContext *context,
OwningRewritePatternList &patterns);

View File

@ -32,6 +32,12 @@ def LinalgLowerToLoops : FunctionPass<"convert-linalg-to-loops"> {
let constructor = "mlir::createConvertLinalgToLoopsPass()";
}
def LinalgOnTensorsToBuffers : Pass<"convert-linalg-on-tensors-to-buffers", "ModuleOp"> {
let summary = "Convert the Linalg operations which work on tensor-type "
"operands or results to use buffers instead";
let constructor = "mlir::createConvertLinalgOnTensorsToBuffersPass()";
}
def LinalgLowerToParallelLoops
: FunctionPass<"convert-linalg-to-parallel-loops"> {
let summary = "Lower the operations from the linalg dialect into parallel "

View File

@ -3,6 +3,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
Interchange.cpp
Loops.cpp
Promotion.cpp
TensorsToBuffers.cpp
Tiling.cpp
Transforms.cpp
Vectorization.cpp

View File

@ -0,0 +1,189 @@
//===- TensorsToBuffers.cpp - Transformation from tensors to buffers ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the conversion from tensors to buffers on Linalg
// operations.
//
//===----------------------------------------------------------------------===//
#include "PassDetail.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Operation.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/BufferPlacement.h"
using namespace mlir;
using ReturnOpConverter =
NonVoidToVoidReturnOpConverter<mlir::ReturnOp, mlir::ReturnOp,
linalg::CopyOp>;
namespace {
/// A pattern to convert Generic Linalg operations which work on tensors to
/// use buffers. A buffer is allocated using BufferAssignmentPlacer for
/// each operation result. BufferPlacement pass should be later used to move
/// Alloc operations to the correct positions and insert the missing Dealloc
/// operations in the correct places.
class GenericOpConverter
: public BufferAssignmentOpConversionPattern<linalg::GenericOp> {
public:
using BufferAssignmentOpConversionPattern<
linalg::GenericOp>::BufferAssignmentOpConversionPattern;
LogicalResult
matchAndRewrite(linalg::GenericOp op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const final {
Location loc = op.getLoc();
ResultRange results = op.getOperation()->getResults();
SmallVector<Value, 2> newArgs, newResults;
newArgs.reserve(operands.size() + results.size());
newArgs.append(operands.begin(), operands.end());
newResults.reserve(results.size());
// Update all types to memref types.
for (auto result : results) {
auto type = result.getType().cast<ShapedType>();
assert(type && "tensor to buffer conversion expects ranked results");
if (!type.hasStaticShape())
return rewriter.notifyMatchFailure(
op, "dynamic shapes not currently supported");
auto memrefType = MemRefType::get(type.getShape(), type.getElementType());
// Compute alloc position and insert a custom allocation node.
OpBuilder::InsertionGuard guard(rewriter);
rewriter.restoreInsertionPoint(
bufferAssignment->computeAllocPosition(result));
auto alloc = rewriter.create<AllocOp>(loc, memrefType);
newArgs.push_back(alloc);
newResults.push_back(alloc);
}
// Generate a new linalg operation that works on buffers.
auto linalgOp = rewriter.create<linalg::GenericOp>(
loc, llvm::None, newArgs, rewriter.getI64IntegerAttr(operands.size()),
rewriter.getI64IntegerAttr(results.size()), op.indexing_maps(),
op.iterator_types(), op.docAttr(), op.library_callAttr());
// Create a new block in the region of the new Generic Op.
Block &oldBlock = op.getRegion().front();
Region &newRegion = linalgOp.region();
Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(),
oldBlock.getArgumentTypes());
// Add the result arguments to the new block.
for (auto result : newResults)
newBlock->addArgument(
result.getType().cast<ShapedType>().getElementType());
// Clone the body of the old block to the new block.
BlockAndValueMapping mapping;
for (unsigned i = 0; i < oldBlock.getNumArguments(); i++)
mapping.map(oldBlock.getArgument(i), newBlock->getArgument(i));
rewriter.setInsertionPointToEnd(newBlock);
for (auto &op : oldBlock.getOperations()) {
Operation *clonedOp = rewriter.clone(op, mapping);
mapping.map(op.getResults(), clonedOp->getResults());
}
// Replace the results of the old Generic Op with the results of the new
// one.
rewriter.replaceOp(op, newResults);
return success();
}
};
/// Populate the given list with patterns to convert Linalg operations on
/// tensors to buffers.
static void populateConvertLinalgOnTensorsToBuffersPattern(
MLIRContext *context, BufferAssignmentPlacer *placer,
TypeConverter *converter, OwningRewritePatternList *patterns) {
// clang-format off
patterns->insert<FunctionAndBlockSignatureConverter,
GenericOpConverter,
ReturnOpConverter>(context, placer, converter);
// clang-format on
}
/// Converts Linalg operations that work on tensor-type operands or results to
/// work on buffers.
struct ConvertLinalgOnTensorsToBuffers
: public LinalgOnTensorsToBuffersBase<ConvertLinalgOnTensorsToBuffers> {
void runOnOperation() override {
MLIRContext &context = getContext();
ConversionTarget target(context);
BufferAssignmentTypeConverter converter;
// Mark all Standard operations legal.
target.addLegalDialect<StandardOpsDialect>();
// Mark all Linalg operations illegal as long as they work on tensors.
auto isIllegalType = [&](Type type) { return !converter.isLegal(type); };
auto isLegalOperation = [&](Operation *op) {
return llvm::none_of(op->getOperandTypes(), isIllegalType) &&
llvm::none_of(op->getResultTypes(), isIllegalType);
};
target.addDynamicallyLegalDialect<linalg::LinalgDialect>(
Optional<ConversionTarget::DynamicLegalityCallbackFn>(
isLegalOperation));
// TODO: Considering the following dynamic legality checks, the current
// implementation of FunctionAndBlockSignatureConverter of Buffer Assignment
// will convert the function signature incorrectly. This converter moves
// all the return values of the function to the input argument list without
// considering the return value types and creates a void function. However,
// the NonVoidToVoidReturnOpConverter doesn't change the return operation if
// its operands are not tensors. The following example leaves the IR in a
// broken state.
//
// @function(%arg0: f32, %arg1: tensor<4xf32>) -> (f32, f32) {
// %0 = mulf %arg0, %arg0 : f32
// return %0, %0 : f32, f32
// }
//
// broken IR after conversion:
//
// func @function(%arg0: f32, %arg1: memref<4xf32>, f32, f32) {
// %0 = mulf %arg0, %arg0 : f32
// return %0, %0 : f32, f32
// }
//
// This issue must be fixed in FunctionAndBlockSignatureConverter and
// NonVoidToVoidReturnOpConverter.
// Mark Standard Return operations illegal as long as one operand is tensor.
target.addDynamicallyLegalOp<mlir::ReturnOp>([&](mlir::ReturnOp returnOp) {
return llvm::none_of(returnOp.getOperandTypes(), isIllegalType);
});
// Mark the function operation illegal as long as an argument is tensor.
target.addDynamicallyLegalOp<FuncOp>([&](FuncOp funcOp) {
return converter.isSignatureLegal(funcOp.getType()) &&
llvm::none_of(funcOp.getType().getResults(),
[&](Type type) { return type.isa<MemRefType>(); });
});
// Walk over all the functions to apply buffer assignment.
getOperation().walk([&](FuncOp function) {
OwningRewritePatternList patterns;
BufferAssignmentPlacer placer(function);
populateConvertLinalgOnTensorsToBuffersPattern(&context, &placer,
&converter, &patterns);
// Applying full conversion
return WalkResult(
applyFullConversion(function, target, patterns, &converter));
});
}
};
} // end anonymous namespace
std::unique_ptr<OperationPass<ModuleOp>>
mlir::createConvertLinalgOnTensorsToBuffersPass() {
return std::make_unique<ConvertLinalgOnTensorsToBuffers>();
}

View File

@ -0,0 +1,75 @@
// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure
#map0 = affine_map<(d0) -> (d0)>
// CHECK-LABEL: func @muliple_results_generic_op
func @muliple_results_generic_op(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
%0, %1 = linalg.generic {args_in = 1 : i64, args_out = 2 : i64, indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} %arg0 {
^bb0(%gen_arg1: f32):
%tmp1 = exp %gen_arg1 : f32
linalg.yield %tmp1, %tmp1 : f32, f32
}: tensor<4xf32> -> (tensor<4xf32>, tensor<4xf32>)
return %0, %1 : tensor<4xf32>, tensor<4xf32>
}
// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]], %[[ARG2_RESULT:.*]]: [[TYPE]])
// CHECK: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]]
// CHECK: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]]
// CHECK: linalg.generic
// CHECK-SAME: %[[NEW_ARG0]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]]
// CHECK-NEXT: ^{{[a-z0-9_]*}}
// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32
// CHECK-NEXT: %{{.*}} = exp
// CHECK-NEXT: linalg.yield
// CHECK-NEXT: [[TYPE]], [[TYPE]], [[TYPE]]
// CHECK: linalg.copy(%[[FIRST_ALLOC]], %[[ARG1_RESULT]])
// CHECK: dealloc %[[FIRST_ALLOC]]
// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[ARG2_RESULT]])
// CHECK: dealloc %[[SECOND_ALLOC]]
// CHECK: return
// -----
#map0 = affine_map<(d0) -> (d0)>
// CHECK-LABEL: func @chained_operations
func @chained_operations(%arg0: tensor<4xf32>) -> tensor<4xf32> {
%0 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0 {
^bb0(%gen_arg1: f32):
%tmp1 = exp %gen_arg1 : f32
linalg.yield %tmp1 : f32
}: tensor<4xf32> -> tensor<4xf32>
%1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %0 {
^bb0(%gen_arg2: f32):
%tmp2 = exp %gen_arg2 : f32
linalg.yield %tmp2 : f32
}: tensor<4xf32> -> tensor<4xf32>
return %1 : tensor<4xf32>
}
// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]])
// CHECK: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]]
// CHECK: linalg.generic
// CHECK-SAME: %[[NEW_ARG0]], %[[FIRST_ALLOC]]
// CHECK: ^{{[a-z0-9_]*}}
// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32
// CHECK: [[TYPE]], [[TYPE]]
// CHECK: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]]
// CHECK: linalg.generic
// CHECK-SAME: %[[FIRST_ALLOC]], %[[SECOND_ALLOC]]
// CHECK: ^{{[a-z0-9_]*}}
// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32
// CHECK: [[TYPE]], [[TYPE]]
// CHECK: dealloc %[[FIRST_ALLOC]]
// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[ARG1_RESULT]])
// CHECK: dealloc %[[SECOND_ALLOC]]
// CHECK: return
// -----
// CHECK-LABEL: func @no_linalg_op
func @no_linalg_op(%arg0: f32) -> (f32, f32) {
%0 = mulf %arg0, %arg0 : f32
return %0, %0 : f32, f32
}
// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]]) -> ([[TYPE]], [[TYPE]])
// CHECK: %[[RESULT:.*]] = mulf %[[NEW_ARG0]], %[[NEW_ARG0]] : [[TYPE]]
// CHECK: return %[[RESULT]], %[[RESULT]] : [[TYPE]], [[TYPE]]