From 8fc0525a159b3be3c83f8d80c03585237ea40a73 Mon Sep 17 00:00:00 2001
From: gysit <gysit@google.com>
Date: Mon, 13 Dec 2021 19:49:16 +0000
Subject: [PATCH] [mlir][linalg] Stage application of pad tensor op
 vectoriztaion.

Adapt the LinalgStrategyVectorizationPattern pass to apply the vectorization patterns in two stages. The change ensures the generic pad tensor op vectorization pattern does not run too early. Additionally, the revision adds the transfer op canonicalization patterns to the set of applied patterns, since they are needed to enable efficient vectorization for rank-reduced convolutions.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D115627
---
 .../Linalg/Transforms/LinalgStrategyPasses.cpp  | 17 ++++++++++++++---
 mlir/test/Dialect/Linalg/codegen-strategy.mlir  | 16 ++++++++++++++++
 .../Linalg/TestLinalgCodegenStrategy.cpp        |  6 +++++-
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
index 0da17434c429..3064b041cfc1 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
@@ -292,11 +292,22 @@ struct LinalgStrategyVectorizePass
     vectorizationPatterns.add<linalg::LinalgCopyVTRForwardingPattern,
                               linalg::LinalgCopyVTWForwardingPattern>(
         funcOp.getContext(), /*benefit=*/2);
-    if (vectorizePadding) {
-      linalg::populatePadTensorOpVectorizationPatterns(vectorizationPatterns);
-    }
+    TransferReadOp::getCanonicalizationPatterns(vectorizationPatterns,
+                                                funcOp.getContext());
+    TransferWriteOp::getCanonicalizationPatterns(vectorizationPatterns,
+                                                 funcOp.getContext());
     (void)applyPatternsAndFoldGreedily(funcOp,
                                        std::move(vectorizationPatterns));
+
+    // Apply the pad tensor op vectorization separately to avoid running the
+    // GenericPadTensorOpVectorizationPattern too early.
+    // TODO: Improve once we have better infrastructure to control pattern
+    // application.
+    if (vectorizePadding) {
+      RewritePatternSet patterns(funcOp.getContext());
+      linalg::populatePadTensorOpVectorizationPatterns(patterns);
+      (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
+    }
   }
 
   LinalgVectorizationOptions options;
diff --git a/mlir/test/Dialect/Linalg/codegen-strategy.mlir b/mlir/test/Dialect/Linalg/codegen-strategy.mlir
index ca1e55a0cd0e..7119db8f0ccd 100644
--- a/mlir/test/Dialect/Linalg/codegen-strategy.mlir
+++ b/mlir/test/Dialect/Linalg/codegen-strategy.mlir
@@ -3,6 +3,7 @@
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 tile-interchange=1,2,0 generalize iterator-interchange=0,2,1" -split-input-file | FileCheck %s --check-prefix=CHECK-INTERCHANGE
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 pad pack-paddings=1,1,0 hoist-paddings=3,3,0" -split-input-file | FileCheck %s --check-prefix=CHECK-PAD
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 fuse pad vectorize" -split-input-file | FileCheck %s --check-prefix=CHECK-FUSE
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=conv anchor-op=linalg.conv_2d_nhwc_hwcf tile-sizes=1,1,8,32,1,1,8 fuse pad decompose vectorize vectorize-padding" -split-input-file | FileCheck %s --check-prefix=CHECK-DECOMP
 
 // CHECK-INTRINSIC: func @matmul(
 //     CHECK-OUTER: func @matmul(
@@ -74,3 +75,18 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<7
   %1 = linalg.matmul ins(%arg0, %arg1: tensor<72x72xf32>, tensor<72x72xf32>) outs(%0: tensor<72x72xf32>) -> tensor<72x72xf32>
   return %1 : tensor<72x72xf32>
 }
+
+// -----
+
+//         CHECK-DECOMP: func @conv(
+func @conv(%arg0: tensor<8x18x17x32xf32>, %arg1: tensor<3x3x32x64xf32>, %arg2: tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = linalg.fill(%cst, %arg2) : f32, tensor<8x16x15x64xf32> -> tensor<8x16x15x64xf32>
+
+  // Check the conv is padded by a rank-reducing vector transfer op pair.
+  //        CHECK-DECOMP:  vector.transfer_read {{.*}}: tensor<1x1x?x8xf32>, vector<1x8x8xf32>
+  //        CHECK-DECOMP:  vector.outerproduct
+  //        CHECK-DECOMP:  vector.transfer_write {{.*}}: vector<1x8x32xf32>, tensor<1x1x?x32xf32>
+  %1 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<8x18x17x32xf32>, tensor<3x3x32x64xf32>) outs(%0 : tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32>
+  return %1 : tensor<8x16x15x64xf32>
+}
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
index 15ff86ceecf3..659cc4f227e4 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
@@ -120,6 +120,10 @@ struct TestLinalgCodegenStrategy
       *this, "vectorize",
       llvm::cl::desc("Rewrite the linalg op as a vector operation."),
       llvm::cl::init(false)};
+  Option<bool> vectorizePadding{
+      *this, "vectorize-padding",
+      llvm::cl::desc("Rewrite pad tensor ops as vector operations."),
+      llvm::cl::init(false)};
   Option<std::string> splitVectorTransfersTo{
       *this, "split-transfers",
       llvm::cl::desc(
@@ -186,7 +190,7 @@ void TestLinalgCodegenStrategy::runStrategy(
       .decomposeIf(decompose)
       .generalizeIf(generalize, "")
       .interchangeIf(!iteratorInterchange.empty(), iteratorInterchange)
-      .vectorizeIf(vectorize, "")
+      .vectorizeIf(vectorize, "", nullptr, vectorizePadding)
       .vectorLowering(
           LinalgVectorLoweringOptions()
               .setVectorTransformsOptions(