From 8fc0525a159b3be3c83f8d80c03585237ea40a73 Mon Sep 17 00:00:00 2001 From: gysit Date: Mon, 13 Dec 2021 19:49:16 +0000 Subject: [PATCH] [mlir][linalg] Stage application of pad tensor op vectoriztaion. Adapt the LinalgStrategyVectorizationPattern pass to apply the vectorization patterns in two stages. The change ensures the generic pad tensor op vectorization pattern does not run too early. Additionally, the revision adds the transfer op canonicalization patterns to the set of applied patterns, since they are needed to enable efficient vectorization for rank-reduced convolutions. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D115627 --- .../Linalg/Transforms/LinalgStrategyPasses.cpp | 17 ++++++++++++++--- mlir/test/Dialect/Linalg/codegen-strategy.mlir | 16 ++++++++++++++++ .../Linalg/TestLinalgCodegenStrategy.cpp | 6 +++++- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp index 0da17434c429..3064b041cfc1 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp @@ -292,11 +292,22 @@ struct LinalgStrategyVectorizePass vectorizationPatterns.add( funcOp.getContext(), /*benefit=*/2); - if (vectorizePadding) { - linalg::populatePadTensorOpVectorizationPatterns(vectorizationPatterns); - } + TransferReadOp::getCanonicalizationPatterns(vectorizationPatterns, + funcOp.getContext()); + TransferWriteOp::getCanonicalizationPatterns(vectorizationPatterns, + funcOp.getContext()); (void)applyPatternsAndFoldGreedily(funcOp, std::move(vectorizationPatterns)); + + // Apply the pad tensor op vectorization separately to avoid running the + // GenericPadTensorOpVectorizationPattern too early. + // TODO: Improve once we have better infrastructure to control pattern + // application. + if (vectorizePadding) { + RewritePatternSet patterns(funcOp.getContext()); + linalg::populatePadTensorOpVectorizationPatterns(patterns); + (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + } } LinalgVectorizationOptions options; diff --git a/mlir/test/Dialect/Linalg/codegen-strategy.mlir b/mlir/test/Dialect/Linalg/codegen-strategy.mlir index ca1e55a0cd0e..7119db8f0ccd 100644 --- a/mlir/test/Dialect/Linalg/codegen-strategy.mlir +++ b/mlir/test/Dialect/Linalg/codegen-strategy.mlir @@ -3,6 +3,7 @@ // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 tile-interchange=1,2,0 generalize iterator-interchange=0,2,1" -split-input-file | FileCheck %s --check-prefix=CHECK-INTERCHANGE // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 pad pack-paddings=1,1,0 hoist-paddings=3,3,0" -split-input-file | FileCheck %s --check-prefix=CHECK-PAD // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 fuse pad vectorize" -split-input-file | FileCheck %s --check-prefix=CHECK-FUSE +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=conv anchor-op=linalg.conv_2d_nhwc_hwcf tile-sizes=1,1,8,32,1,1,8 fuse pad decompose vectorize vectorize-padding" -split-input-file | FileCheck %s --check-prefix=CHECK-DECOMP // CHECK-INTRINSIC: func @matmul( // CHECK-OUTER: func @matmul( @@ -74,3 +75,18 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<7 %1 = linalg.matmul ins(%arg0, %arg1: tensor<72x72xf32>, tensor<72x72xf32>) outs(%0: tensor<72x72xf32>) -> tensor<72x72xf32> return %1 : tensor<72x72xf32> } + +// ----- + +// CHECK-DECOMP: func @conv( +func @conv(%arg0: tensor<8x18x17x32xf32>, %arg1: tensor<3x3x32x64xf32>, %arg2: tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %0 = linalg.fill(%cst, %arg2) : f32, tensor<8x16x15x64xf32> -> tensor<8x16x15x64xf32> + + // Check the conv is padded by a rank-reducing vector transfer op pair. + // CHECK-DECOMP: vector.transfer_read {{.*}}: tensor<1x1x?x8xf32>, vector<1x8x8xf32> + // CHECK-DECOMP: vector.outerproduct + // CHECK-DECOMP: vector.transfer_write {{.*}}: vector<1x8x32xf32>, tensor<1x1x?x32xf32> + %1 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<8x18x17x32xf32>, tensor<3x3x32x64xf32>) outs(%0 : tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32> + return %1 : tensor<8x16x15x64xf32> +} diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp index 15ff86ceecf3..659cc4f227e4 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp @@ -120,6 +120,10 @@ struct TestLinalgCodegenStrategy *this, "vectorize", llvm::cl::desc("Rewrite the linalg op as a vector operation."), llvm::cl::init(false)}; + Option vectorizePadding{ + *this, "vectorize-padding", + llvm::cl::desc("Rewrite pad tensor ops as vector operations."), + llvm::cl::init(false)}; Option splitVectorTransfersTo{ *this, "split-transfers", llvm::cl::desc( @@ -186,7 +190,7 @@ void TestLinalgCodegenStrategy::runStrategy( .decomposeIf(decompose) .generalizeIf(generalize, "") .interchangeIf(!iteratorInterchange.empty(), iteratorInterchange) - .vectorizeIf(vectorize, "") + .vectorizeIf(vectorize, "", nullptr, vectorizePadding) .vectorLowering( LinalgVectorLoweringOptions() .setVectorTransformsOptions(