[mlir][linalg] Hoist padding simplifications (NFC).

Remove unused members and store the indexing and packing loops in SmallVector. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D113398
2021-11-10 13:43:50 +00:00 · 2021-11-10 13:43:50 +00:00 · 969243a007
parent e201232ece
commit 969243a007
2 changed files with 20 additions and 29 deletions
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
@ -17,7 +17,7 @@ class Value;
 namespace linalg {
 class PadTensorOp;

-/// Mechanically hoist padding operations on tensors by `nLoops` into a new,
+/// Mechanically hoist padding operations on tensors by `numLoops` into a new,
 /// generally larger tensor. This achieves packing of multiple padding ops into
 /// a larger tensor. On success, `padTensorOp` is replaced by the cloned version
 /// in the packing loop so the caller can continue reasoning about the padding
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@ -54,7 +54,7 @@ using namespace mlir::linalg;
 ///   7. There is no enclosing scf::ForOp that indexes the padded data.
 /// Other cases succeed and will trigger hoisting of the pad op.
 struct HoistingAnalysis {
-  HoistingAnalysis(PadTensorOp padTensorOp, int nLevels);
+  HoistingAnalysis(PadTensorOp padTensorOp, int numLoops);

  bool isValid() { return valid; }

@ -62,12 +62,6 @@ struct HoistingAnalysis {
  /// `backwardSlice`.
  FailureOr<SmallVector<Value>> getPackedTensorSizes(ImplicitLocOpBuilder &b);

-  /// The padTensorOp that needs to be hoisted.
-  PadTensorOp padTensorOp;
-
-  /// The maximum number of immediately enclosing scf::ForOp to hoist over.
-  int nLevels;
-
  /// The outermost loop, determined by `nLevels` above which `padTensorOp` will
  /// be hoisted.
  scf::ForOp outermostEnclosingForOp;
@ -81,9 +75,7 @@ struct HoistingAnalysis {
  ///  2. whose induction variable is used, directly or indirectly, in the
  ///     computation of `padTensorOp`.
  /// The span of these loops determines the footprint of the packed tensor.
-  /// SmallSetVector<scf::ForOp> packingLoops;
-  SetVector<scf::ForOp, SmallVector<scf::ForOp>, DenseSet<Operation *>>
-      packingLoops;
+  SmallVector<scf::ForOp> packingLoops;

 private:
  /// Returns the loops in `backwardSlice` used to index the padded data. The
@ -103,8 +95,8 @@ private:
  ///       %padded_slice = linalg.pad_tensor %slice
  /// ```
  /// getIndexingLoops(%padded_slice, %slice) returns [scf.for %i, scf.for %j]
-  SetVector<Operation *> getIndexingLoops(PadTensorOp padTensorOp,
-                                          tensor::ExtractSliceOp sliceOp);
+  SmallVector<scf::ForOp> getIndexingLoops(PadTensorOp padTensorOp,
+                                           tensor::ExtractSliceOp sliceOp);

  /// Encodes whether the analysis is valid and hoisting can proceed.
  bool valid;
@ -148,10 +140,8 @@ getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels,
  }
 }

-HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int nLevels)
-    : padTensorOp(padTensorOp), nLevels(nLevels), valid(false) {
-  AsmState state(padTensorOp->getParentOfType<mlir::FuncOp>());
-  (void)state;
+HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) {
+  valid = false;

  // Bail on any use that isn't an input of a Linalg op.
  // Hoisting of inplace updates happens after vectorization.
@ -160,7 +150,7 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int nLevels)

  // Get at most nLevels of immediately enclosing loops.
  SmallVector<scf::ForOp> reverseEnclosingLoops;
-  getAtMostNEnclosingLoops(padTensorOp, nLevels, reverseEnclosingLoops);
+  getAtMostNEnclosingLoops(padTensorOp, numLoops, reverseEnclosingLoops);
  if (reverseEnclosingLoops.empty()) {
    LLVM_DEBUG(DBGS() << "No immediately enclosing loop -> skip\n");
    return;
@ -216,19 +206,20 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int nLevels)
  }

  // Search the loops found in `backwardSlice` used to index the padded data.
-  SetVector<Operation *> indexingLoops = getIndexingLoops(padTensorOp, sliceOp);
+  SmallVector<scf::ForOp> indexingLoops =
+      getIndexingLoops(padTensorOp, sliceOp);

  // Add only the loops part of `indexingLoops` to the packing loops. All other
  // loops are not used to index the padded data and consequently access the
  // same data in every loop iteration. Adding them to the packing loops would
  // increase the cache footprint of the packed data by storing the same data
  // multiple times.
-  for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops)) {
-    if (indexingLoops.contains(forOp))
-      packingLoops.insert(forOp);
-  }
-  assert(indexingLoops.size() == packingLoops.size() &&
+  for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops))
+    if (!indexingLoops.empty() && indexingLoops.back() == forOp)
+      packingLoops.push_back(indexingLoops.pop_back_val());
+  assert(indexingLoops.empty() &&
         "expect the all indexing loops are enclosing loops");
+
  if (packingLoops.empty()) {
    LLVM_DEBUG(DBGS() << "Cannot find a packing loop -> skip\n");
    return;
@ -247,7 +238,7 @@ static void addIndexOperandsToIndexEdges(Operation *operation,
      indexEdges.insert(operand);
 }

-SetVector<Operation *>
+SmallVector<scf::ForOp>
 HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp,
                                   tensor::ExtractSliceOp sliceOp) {
  // Set of all values used for index computation.
@ -272,7 +263,7 @@ HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp,
  // After iterating `backwardSlice` we obtain:
  // indexEdges = [%i, %j, %ubi, %ubj]
  // indexingLoops = [scf.for %i, scf.for %j]
-  SetVector<Operation *> indexingLoops;
+  SmallVector<scf::ForOp> indexingLoops;
  for (Operation *op : llvm::reverse(backwardSlice)) {
    // Add the index operands of `padTensorOp` and `sliceOp` to start the
    // exploration of the index computation.
@ -286,7 +277,7 @@ HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp,
    if (auto forOp = dyn_cast<scf::ForOp>(op)) {
      if (indexEdges.contains(forOp.getInductionVar())) {
        addIndexOperandsToIndexEdges(op, indexEdges);
-        indexingLoops.insert(forOp);
+        indexingLoops.push_back(forOp);
        continue;
      }
    }
@ -442,7 +433,7 @@ HoistingAnalysis::getPackedTensorSizes(ImplicitLocOpBuilder &b) {

  // Iteratively try to fold the upper bounds into the constraints set.
  if (failed(foldUpperBoundsIntoConstraintsSet(
-          constraints, outermostEnclosingForOp, packingLoops.getArrayRef())))
+          constraints, outermostEnclosingForOp, packingLoops)))
    return failure();

  int nPackedLoops = packingLoops.size();
@ -577,7 +568,7 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
    auto forOp = dyn_cast<scf::ForOp>(op);
    assert(forOp && "Expected scf::ForOp when hoisting pad ops");
    // Unused loop, just skip it.
-    if (!analysis.packingLoops.contains(forOp))
+    if (!llvm::is_contained(analysis.packingLoops, forOp))
      continue;

    auto clonedForOp =