[MLIR][LINALG] Convert Linalg on Tensors to Buffers

This is a basic pass to convert Linalg.GenericOp which works on tensors to use buffers instead. Differential Revision: https://reviews.llvm.org/D78996
2020-04-28 14:44:21 +02:00 · 2020-04-28 14:44:21 +02:00 · 6ccaf73887
parent fa2783d79a
commit 6ccaf73887
5 changed files with 276 additions and 0 deletions
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@ -50,6 +50,11 @@ std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToParallelLoopsPass();
 /// Placeholder for now, this is NYI.
 std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToAffineLoopsPass();

+/// Create a pass to convert Linalg operations which work on tensors to use
+/// buffers instead.
+std::unique_ptr<OperationPass<ModuleOp>>
+createConvertLinalgOnTensorsToBuffersPass();
+
 /// Patterns for fusing linalg operation on tensors.
 void populateLinalgTensorOpsFusionPatterns(MLIRContext *context,
                                           OwningRewritePatternList &patterns);
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@ -32,6 +32,12 @@ def LinalgLowerToLoops : FunctionPass<"convert-linalg-to-loops"> {
  let constructor = "mlir::createConvertLinalgToLoopsPass()";
 }

+def LinalgOnTensorsToBuffers : Pass<"convert-linalg-on-tensors-to-buffers", "ModuleOp"> {
+  let summary = "Convert the Linalg operations which work on tensor-type "
+                "operands or results to use buffers instead";
+  let constructor = "mlir::createConvertLinalgOnTensorsToBuffersPass()";
+}
+
 def LinalgLowerToParallelLoops
    : FunctionPass<"convert-linalg-to-parallel-loops"> {
  let summary = "Lower the operations from the linalg dialect into parallel "
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@ -3,6 +3,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
  Interchange.cpp
  Loops.cpp
  Promotion.cpp
+  TensorsToBuffers.cpp
  Tiling.cpp
  Transforms.cpp
  Vectorization.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp
@ -0,0 +1,189 @@
+//===- TensorsToBuffers.cpp - Transformation from tensors to buffers ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the conversion from tensors to buffers on Linalg
+// operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/Linalg/Passes.h"
+#include "mlir/IR/Function.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/BufferPlacement.h"
+
+using namespace mlir;
+using ReturnOpConverter =
+    NonVoidToVoidReturnOpConverter<mlir::ReturnOp, mlir::ReturnOp,
+                                   linalg::CopyOp>;
+
+namespace {
+/// A pattern to convert Generic Linalg operations which work on tensors to
+/// use buffers. A buffer is allocated using BufferAssignmentPlacer for
+/// each operation result. BufferPlacement pass should be later used to move
+/// Alloc operations to the correct positions and insert the missing Dealloc
+/// operations in the correct places.
+class GenericOpConverter
+    : public BufferAssignmentOpConversionPattern<linalg::GenericOp> {
+public:
+  using BufferAssignmentOpConversionPattern<
+      linalg::GenericOp>::BufferAssignmentOpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(linalg::GenericOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    Location loc = op.getLoc();
+    ResultRange results = op.getOperation()->getResults();
+    SmallVector<Value, 2> newArgs, newResults;
+    newArgs.reserve(operands.size() + results.size());
+    newArgs.append(operands.begin(), operands.end());
+    newResults.reserve(results.size());
+
+    // Update all types to memref types.
+    for (auto result : results) {
+      auto type = result.getType().cast<ShapedType>();
+      assert(type && "tensor to buffer conversion expects ranked results");
+      if (!type.hasStaticShape())
+        return rewriter.notifyMatchFailure(
+            op, "dynamic shapes not currently supported");
+      auto memrefType = MemRefType::get(type.getShape(), type.getElementType());
+
+      // Compute alloc position and insert a custom allocation node.
+      OpBuilder::InsertionGuard guard(rewriter);
+      rewriter.restoreInsertionPoint(
+          bufferAssignment->computeAllocPosition(result));
+      auto alloc = rewriter.create<AllocOp>(loc, memrefType);
+      newArgs.push_back(alloc);
+      newResults.push_back(alloc);
+    }
+
+    // Generate a new linalg operation that works on buffers.
+    auto linalgOp = rewriter.create<linalg::GenericOp>(
+        loc, llvm::None, newArgs, rewriter.getI64IntegerAttr(operands.size()),
+        rewriter.getI64IntegerAttr(results.size()), op.indexing_maps(),
+        op.iterator_types(), op.docAttr(), op.library_callAttr());
+
+    // Create a new block in the region of the new Generic Op.
+    Block &oldBlock = op.getRegion().front();
+    Region &newRegion = linalgOp.region();
+    Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(),
+                                           oldBlock.getArgumentTypes());
+
+    // Add the result arguments to the new block.
+    for (auto result : newResults)
+      newBlock->addArgument(
+          result.getType().cast<ShapedType>().getElementType());
+
+    // Clone the body of the old block to the new block.
+    BlockAndValueMapping mapping;
+    for (unsigned i = 0; i < oldBlock.getNumArguments(); i++)
+      mapping.map(oldBlock.getArgument(i), newBlock->getArgument(i));
+    rewriter.setInsertionPointToEnd(newBlock);
+    for (auto &op : oldBlock.getOperations()) {
+      Operation *clonedOp = rewriter.clone(op, mapping);
+      mapping.map(op.getResults(), clonedOp->getResults());
+    }
+
+    // Replace the results of the old Generic Op with the results of the new
+    // one.
+    rewriter.replaceOp(op, newResults);
+    return success();
+  }
+};
+
+/// Populate the given list with patterns to convert Linalg operations on
+/// tensors to buffers.
+static void populateConvertLinalgOnTensorsToBuffersPattern(
+    MLIRContext *context, BufferAssignmentPlacer *placer,
+    TypeConverter *converter, OwningRewritePatternList *patterns) {
+  // clang-format off
+  patterns->insert<FunctionAndBlockSignatureConverter,
+                   GenericOpConverter,
+                   ReturnOpConverter>(context, placer, converter);
+  // clang-format on
+}
+
+/// Converts Linalg operations that work on tensor-type operands or results to
+/// work on buffers.
+struct ConvertLinalgOnTensorsToBuffers
+    : public LinalgOnTensorsToBuffersBase<ConvertLinalgOnTensorsToBuffers> {
+  void runOnOperation() override {
+    MLIRContext &context = getContext();
+    ConversionTarget target(context);
+    BufferAssignmentTypeConverter converter;
+
+    // Mark all Standard operations legal.
+    target.addLegalDialect<StandardOpsDialect>();
+
+    // Mark all Linalg operations illegal as long as they work on tensors.
+    auto isIllegalType = [&](Type type) { return !converter.isLegal(type); };
+    auto isLegalOperation = [&](Operation *op) {
+      return llvm::none_of(op->getOperandTypes(), isIllegalType) &&
+             llvm::none_of(op->getResultTypes(), isIllegalType);
+    };
+    target.addDynamicallyLegalDialect<linalg::LinalgDialect>(
+        Optional<ConversionTarget::DynamicLegalityCallbackFn>(
+            isLegalOperation));
+
+    // TODO: Considering the following dynamic legality checks, the current
+    // implementation of FunctionAndBlockSignatureConverter of Buffer Assignment
+    // will convert the function signature incorrectly. This converter moves
+    // all the return values of the function to the input argument list without
+    // considering the return value types and creates a void function. However,
+    // the NonVoidToVoidReturnOpConverter doesn't change the return operation if
+    // its operands are not tensors. The following example leaves the IR in a
+    // broken state.
+    //
+    // @function(%arg0: f32, %arg1: tensor<4xf32>) -> (f32, f32) {
+    //    %0 = mulf %arg0, %arg0 : f32
+    //    return %0, %0 : f32, f32
+    // }
+    //
+    // broken IR after conversion:
+    //
+    // func @function(%arg0: f32, %arg1: memref<4xf32>, f32, f32) {
+    //    %0 = mulf %arg0, %arg0 : f32
+    //    return %0, %0 : f32, f32
+    // }
+    //
+    // This issue must be fixed in FunctionAndBlockSignatureConverter and
+    // NonVoidToVoidReturnOpConverter.
+
+    // Mark Standard Return operations illegal as long as one operand is tensor.
+    target.addDynamicallyLegalOp<mlir::ReturnOp>([&](mlir::ReturnOp returnOp) {
+      return llvm::none_of(returnOp.getOperandTypes(), isIllegalType);
+    });
+
+    // Mark the function operation illegal as long as an argument is tensor.
+    target.addDynamicallyLegalOp<FuncOp>([&](FuncOp funcOp) {
+      return converter.isSignatureLegal(funcOp.getType()) &&
+             llvm::none_of(funcOp.getType().getResults(),
+                           [&](Type type) { return type.isa<MemRefType>(); });
+    });
+
+    // Walk over all the functions to apply buffer assignment.
+    getOperation().walk([&](FuncOp function) {
+      OwningRewritePatternList patterns;
+      BufferAssignmentPlacer placer(function);
+      populateConvertLinalgOnTensorsToBuffersPattern(&context, &placer,
+                                                     &converter, &patterns);
+
+      // Applying full conversion
+      return WalkResult(
+          applyFullConversion(function, target, patterns, &converter));
+    });
+  }
+};
+} // end anonymous namespace
+
+std::unique_ptr<OperationPass<ModuleOp>>
+mlir::createConvertLinalgOnTensorsToBuffersPass() {
+  return std::make_unique<ConvertLinalgOnTensorsToBuffers>();
+}
--- a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir
+++ b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir
@ -0,0 +1,75 @@
+// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @muliple_results_generic_op
+func @muliple_results_generic_op(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
+    %0, %1 = linalg.generic {args_in = 1 : i64, args_out = 2 : i64, indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} %arg0 {
+    ^bb0(%gen_arg1: f32):
+        %tmp1 = exp %gen_arg1 : f32
+        linalg.yield %tmp1, %tmp1 : f32, f32
+    }: tensor<4xf32> -> (tensor<4xf32>, tensor<4xf32>)
+    return %0, %1 : tensor<4xf32>, tensor<4xf32>
+}
+//      CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]], %[[ARG2_RESULT:.*]]: [[TYPE]])
+//      CHECK: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]]
+//      CHECK: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]]
+//      CHECK: linalg.generic
+// CHECK-SAME: %[[NEW_ARG0]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]]
+// CHECK-NEXT: ^{{[a-z0-9_]*}}
+// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32
+// CHECK-NEXT: %{{.*}} = exp
+// CHECK-NEXT: linalg.yield
+// CHECK-NEXT: [[TYPE]], [[TYPE]], [[TYPE]]
+//      CHECK: linalg.copy(%[[FIRST_ALLOC]], %[[ARG1_RESULT]])
+//      CHECK: dealloc %[[FIRST_ALLOC]]
+//      CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[ARG2_RESULT]])
+//      CHECK: dealloc %[[SECOND_ALLOC]]
+//      CHECK: return
+
+// -----
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @chained_operations
+func @chained_operations(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+    %0 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0 {
+    ^bb0(%gen_arg1: f32):
+        %tmp1 = exp %gen_arg1 : f32
+        linalg.yield %tmp1 : f32
+    }: tensor<4xf32> -> tensor<4xf32>
+    %1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %0 {
+    ^bb0(%gen_arg2: f32):
+        %tmp2 = exp %gen_arg2 : f32
+        linalg.yield %tmp2 : f32
+    }: tensor<4xf32> -> tensor<4xf32>
+    return %1 : tensor<4xf32>
+}
+//      CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]])
+//      CHECK: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]]
+//      CHECK: linalg.generic
+// CHECK-SAME: %[[NEW_ARG0]], %[[FIRST_ALLOC]]
+//      CHECK: ^{{[a-z0-9_]*}}
+// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32
+//      CHECK: [[TYPE]], [[TYPE]]
+//      CHECK: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]]
+//      CHECK: linalg.generic
+// CHECK-SAME: %[[FIRST_ALLOC]], %[[SECOND_ALLOC]]
+//      CHECK: ^{{[a-z0-9_]*}}
+// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32
+//      CHECK: [[TYPE]], [[TYPE]]
+//      CHECK: dealloc %[[FIRST_ALLOC]]
+//      CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[ARG1_RESULT]])
+//      CHECK: dealloc %[[SECOND_ALLOC]]
+//      CHECK: return
+
+// -----
+
+// CHECK-LABEL: func @no_linalg_op
+func @no_linalg_op(%arg0: f32) -> (f32, f32) {
+  %0 = mulf %arg0, %arg0 : f32
+  return %0, %0 : f32, f32
+}
+//      CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]]) -> ([[TYPE]], [[TYPE]])
+//      CHECK: %[[RESULT:.*]] = mulf %[[NEW_ARG0]], %[[NEW_ARG0]] : [[TYPE]]
+//      CHECK: return %[[RESULT]], %[[RESULT]] : [[TYPE]], [[TYPE]]