[mlir][linalg][sparse] add linalg optimization passes "upstream"

It is time to compose Linalg related optimizations with SparseTensor
related optimizations. This is a careful first start by adding some
general Linalg optimizations "upstream" of the sparse compiler in the
full sparse compiler pipeline. Some minor changes were needed to make
those optimizations aware of sparsity.

Note that after this, we will add a sparse specific fusion rule,
just to demonstrate the power of the new composition.

Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D119971
This commit is contained in:
Aart Bik 2022-02-16 12:56:43 -08:00
parent 129af4daa7
commit 515c617003
15 changed files with 53 additions and 66 deletions

View File

@ -20,6 +20,7 @@ add_mlir_dialect_library(MLIRLinalg
MLIRIR
MLIRParser
MLIRSideEffectInterfaces
MLIRSparseTensor
MLIRSCF
MLIRMath
MLIRMemRef

View File

@ -14,6 +14,7 @@
#include "mlir/Dialect/Arithmetic/Utils/Utils.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/AffineExprVisitor.h"
@ -819,9 +820,18 @@ struct EraseIdentityGenericOp : public OpRewritePattern<GenericOp> {
Type resultType = genericOp->getResult(yieldVal.index()).getType();
// The input can have a different type than the result, e.g. a dynamic
// input dimension can be turned into a static output dimension.
if (returnedArg.getType() != resultType)
returnedArg = rewriter.create<tensor::CastOp>(genericOp.getLoc(),
resultType, returnedArg);
Type returnType = returnedArg.getType();
if (returnType != resultType) {
// Distinguish between sparse conversion or dense tensor casting.
// TODO: unify the two ops?
if (sparse_tensor::getSparseTensorEncoding(returnType) ||
sparse_tensor::getSparseTensorEncoding(resultType))
returnedArg = rewriter.create<sparse_tensor::ConvertOp>(
genericOp.getLoc(), resultType, returnedArg);
else
returnedArg = rewriter.create<tensor::CastOp>(
genericOp.getLoc(), resultType, returnedArg);
}
returnedArgs.push_back(returnedArg);
}

View File

@ -50,6 +50,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
MLIRSCFTransforms
MLIRSCFUtils
MLIRPass
MLIRSparseTensor
MLIRStandard
MLIRStandardOpsTransforms
MLIRStandardToLLVM

View File

@ -17,6 +17,7 @@
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Matchers.h"
@ -2184,6 +2185,10 @@ struct RemoveOutsDependency : public OpRewritePattern<GenericOp> {
if (!operandType)
continue;
// If outs is sparse, leave it to the sparse compiler.
if (sparse_tensor::getSparseTensorEncoding(operandVal.getType()))
continue;
// If outs is already an `init_tensor` operation, nothing to do.
auto definingOp = operandVal.getDefiningOp<InitTensorOp>();
if (definingOp)
@ -2213,7 +2218,7 @@ struct RemoveOutsDependency : public OpRewritePattern<GenericOp> {
} // namespace
//===---------------------------------------------------------------------===//
// Methods that add patterns descrined in this file to a pattern list.
// Methods that add patterns described in this file to a pattern list.
//===---------------------------------------------------------------------===//
void mlir::linalg::populateFoldReshapeOpsByLinearizationPatterns(

View File

@ -29,6 +29,8 @@ using namespace mlir::sparse_tensor;
void mlir::sparse_tensor::buildSparseCompiler(
OpPassManager &pm, const SparseCompilerOptions &options) {
// TODO(wrengr): ensure the original `pm` is for ModuleOp
pm.addNestedPass<FuncOp>(createLinalgGeneralizationPass());
pm.addPass(createLinalgElementwiseOpFusionPass());
pm.addPass(createSparsificationPass(options.sparsificationOptions()));
pm.addPass(createSparseTensorConversionPass());
pm.addNestedPass<FuncOp>(createLinalgBufferizePass());

View File

@ -1,6 +1,5 @@
// RUN: mlir-opt %s --sparse-compiler | \
// RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \
// RUN: TENSOR1="%mlir_integration_test_dir/data/zero.mtx" \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
@ -40,15 +39,17 @@
// library.
module {
//
// A kernel that assigns elements from A to an initially zero X.
// A kernel that assigns elements from A to X.
//
func @dense_output(%arga: tensor<?x?xf64, #SparseMatrix>,
%argx: tensor<?x?xf64, #DenseMatrix>
{linalg.inplaceable = true})
-> tensor<?x?xf64, #DenseMatrix> {
func @dense_output(%arga: tensor<?x?xf64, #SparseMatrix>) -> tensor<?x?xf64, #DenseMatrix> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arga, %c0 : tensor<?x?xf64, #SparseMatrix>
%d1 = tensor.dim %arga, %c1 : tensor<?x?xf64, #SparseMatrix>
%init = sparse_tensor.init [%d0, %d1] : tensor<?x?xf64, #DenseMatrix>
%0 = linalg.generic #trait_assign
ins(%arga: tensor<?x?xf64, #SparseMatrix>)
outs(%argx: tensor<?x?xf64, #DenseMatrix>) {
outs(%init: tensor<?x?xf64, #DenseMatrix>) {
^bb(%a: f64, %x: f64):
linalg.yield %a : f64
} -> tensor<?x?xf64, #DenseMatrix>
@ -70,15 +71,9 @@ module {
%a = sparse_tensor.new %fileName
: !Filename to tensor<?x?xf64, #SparseMatrix>
// Initialize all-dense annotated "sparse" matrix to all zeros.
%fileZero = call @getTensorFilename(%c1) : (index) -> (!Filename)
%x = sparse_tensor.new %fileZero
: !Filename to tensor<?x?xf64, #DenseMatrix>
// Call the kernel.
%0 = call @dense_output(%a, %x)
: (tensor<?x?xf64, #SparseMatrix>,
tensor<?x?xf64, #DenseMatrix>) -> tensor<?x?xf64, #DenseMatrix>
%0 = call @dense_output(%a)
: (tensor<?x?xf64, #SparseMatrix>) -> tensor<?x?xf64, #DenseMatrix>
//
// Print the linearized 5x5 result for verification.
@ -92,7 +87,7 @@ module {
// Release the resources.
sparse_tensor.release %a : tensor<?x?xf64, #SparseMatrix>
sparse_tensor.release %x : tensor<?x?xf64, #DenseMatrix>
sparse_tensor.release %0 : tensor<?x?xf64, #DenseMatrix>
return
}

View File

@ -1,18 +1,12 @@
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparse-compiler | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: mlir-opt %s --sparse-compiler | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
//
// Do the same run, but now with SIMDization as well. This should not change the outcome.
//
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparse-compiler="vectorization-strategy=2 vl=2" | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: mlir-opt %s --sparse-compiler="vectorization-strategy=2 vl=2" | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s

View File

@ -1,11 +1,7 @@
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparse-compiler | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: mlir-opt %s --sparse-compiler | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
//
#CSR = #sparse_tensor.encoding<{
dimLevelType = [ "dense", "compressed" ],

View File

@ -1,18 +1,12 @@
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparse-compiler | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: mlir-opt %s --sparse-compiler | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
//
// Do the same run, but now with SIMDization as well. This should not change the outcome.
//
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparse-compiler="vectorization-strategy=2 vl=2" | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: mlir-opt %s --sparse-compiler="vectorization-strategy=2 vl=2" | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s

View File

@ -1,16 +1,12 @@
// RUN: mlir-opt %s --sparse-compiler | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
//
// Do the same run, but now with SIMDization as well. This should not change the outcome.
//
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparse-compiler="vectorization-strategy=2 vl=8" | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: mlir-opt %s --sparse-compiler="vectorization-strategy=2 vl=8" | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s

View File

@ -1,18 +1,12 @@
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparse-compiler | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: mlir-opt %s --sparse-compiler | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
//
// Do the same run, but now with SIMDization as well. This should not change the outcome.
//
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparse-compiler="vectorization-strategy=2 vl=8" | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: mlir-opt %s -sparse-compiler="vectorization-strategy=2 vl=8" | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s

View File

@ -113,7 +113,6 @@ class SparseCompiler:
def __init__(self, options: str):
pipeline = (
f'builtin.func(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),'
f'sparse-compiler{{{options} reassociate-fp-reductions=1 enable-index-optimizations=1}}')
self.pipeline = pipeline

View File

@ -73,7 +73,6 @@ class SparseCompiler:
def __init__(self):
pipeline = (
f'builtin.func(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),'
f'sparse-compiler{{reassociate-fp-reductions=1 enable-index-optimizations=1}}')
self.pipeline = pipeline

View File

@ -171,7 +171,6 @@ class SparseCompiler:
def __init__(self, sparsification_options: str, support_lib: str):
self._support_lib = support_lib
self._pipeline = (
f'builtin.func(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),'
f'sparse-compiler{{{sparsification_options} reassociate-fp-reductions=1 enable-index-optimizations=1}}')
# Must be in the scope of a `with ir.Context():`
self._passmanager = PassManager.parse(self._pipeline)

View File

@ -6997,6 +6997,7 @@ cc_library(
":Parser",
":SCFDialect",
":SideEffectInterfaces",
":SparseTensor",
":StandardOps",
":Support",
":TensorDialect",
@ -7083,6 +7084,7 @@ cc_library(
":SCFDialect",
":SCFTransforms",
":SCFUtils",
":SparseTensor",
":StandardOps",
":StandardOpsTransforms",
":Support",