From 3fff5acd8f5f144cc2cd6f52cb3f37d8e01c4d2f Mon Sep 17 00:00:00 2001
From: Diego Caballero <diego.caballero@intel.com>
Date: Wed, 22 Jul 2020 14:10:29 -0700
Subject: [PATCH] [mlir][VectorOps] Expose SuperVectorizer as a utility

This patch refactors a small part of the Super Vectorizer code to
a utility so that it can be used independently from the pass. This
aligns vectorization with other utilities that we already have for loop
transformations, such as fusion, interchange, tiling, etc.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D84289
---
 mlir/include/mlir/Dialect/Affine/Utils.h      | 12 +++++++
 .../Affine/Transforms/SuperVectorize.cpp      | 34 ++++++++++++++-----
 2 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h
index 19df93f760f5..327c4df7f774 100644
--- a/mlir/include/mlir/Dialect/Affine/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Utils.h
@@ -13,12 +13,15 @@
 #ifndef MLIR_DIALECT_AFFINE_UTILS_H
 #define MLIR_DIALECT_AFFINE_UTILS_H
 
+#include "mlir/Support/LLVM.h"
+
 namespace mlir {
 
 class AffineForOp;
 class AffineIfOp;
 class AffineParallelOp;
 struct LogicalResult;
+class Operation;
 
 /// Replaces parallel affine.for op with 1-d affine.parallel op.
 /// mlir::isLoopParallel detect the parallel affine.for ops.
@@ -31,6 +34,15 @@ void affineParallelize(AffineForOp forOp);
 /// significant code expansion in some cases.
 LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded = nullptr);
 
+/// Vectorizes affine loops in 'loops' using the n-D vectorization factors in
+/// 'vectorSizes'. By default, each vectorization factor is applied
+/// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can
+/// be optionally used to provide a different loop vectorization order.
+void vectorizeAffineLoops(
+    Operation *parentOp,
+    llvm::DenseSet<Operation *, DenseMapInfo<Operation *>> &loops,
+    ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern);
+
 } // namespace mlir
 
 #endif // MLIR_DIALECT_AFFINE_UTILS_H
diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
index 1638502508e3..748530f03358 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Dialect/Vector/VectorUtils.h"
@@ -1198,25 +1199,38 @@ void Vectorize::runOnFunction() {
     return signalPassFailure();
   }
 
-  // Thread-safe RAII local context, BumpPtrAllocator freed on exit.
-  NestedPatternContext mlContext;
-
   DenseSet<Operation *> parallelLoops;
   f.walk([&parallelLoops](AffineForOp loop) {
     if (isLoopParallel(loop))
       parallelLoops.insert(loop);
   });
 
+  vectorizeAffineLoops(f, parallelLoops, vectorSizes, fastestVaryingPattern);
+}
+
+namespace mlir {
+
+/// Vectorizes affine loops in 'loops' using the n-D vectorization factors in
+/// 'vectorSizes'. By default, each vectorization factor is applied
+/// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can
+/// be optionally used to provide a different loop vectorization order.
+void vectorizeAffineLoops(Operation *parentOp, DenseSet<Operation *> &loops,
+                          ArrayRef<int64_t> vectorSizes,
+                          ArrayRef<int64_t> fastestVaryingPattern) {
+  // Thread-safe RAII local context, BumpPtrAllocator freed on exit.
+  NestedPatternContext mlContext;
+
   for (auto &pat :
-       makePatterns(parallelLoops, vectorSizes.size(), fastestVaryingPattern)) {
+       makePatterns(loops, vectorSizes.size(), fastestVaryingPattern)) {
     LLVM_DEBUG(dbgs() << "\n******************************************");
     LLVM_DEBUG(dbgs() << "\n******************************************");
-    LLVM_DEBUG(dbgs() << "\n[early-vect] new pattern on Function\n");
-    LLVM_DEBUG(f.print(dbgs()));
+    LLVM_DEBUG(dbgs() << "\n[early-vect] new pattern on parent op\n");
+    LLVM_DEBUG(parentOp->print(dbgs()));
+
     unsigned patternDepth = pat.getDepth();
 
     SmallVector<NestedMatch, 8> matches;
-    pat.match(f, &matches);
+    pat.match(parentOp, &matches);
     // Iterate over all the top-level matches and vectorize eagerly.
     // This automatically prunes intersecting matches.
     for (auto m : matches) {
@@ -1239,9 +1253,11 @@ void Vectorize::runOnFunction() {
 }
 
 std::unique_ptr<OperationPass<FuncOp>>
-mlir::createSuperVectorizePass(ArrayRef<int64_t> virtualVectorSize) {
+createSuperVectorizePass(ArrayRef<int64_t> virtualVectorSize) {
   return std::make_unique<Vectorize>(virtualVectorSize);
 }
-std::unique_ptr<OperationPass<FuncOp>> mlir::createSuperVectorizePass() {
+std::unique_ptr<OperationPass<FuncOp>> createSuperVectorizePass() {
   return std::make_unique<Vectorize>();
 }
+
+} // namespace mlir