[mlir] Eliminate the remaining usages of cl::opt instead of PassOption.

Summary: Pass options are a better choice for various reasons and avoid the need for static constructors. Differential Revision: https://reviews.llvm.org/D77707
2020-04-08 12:57:02 -07:00 · 2020-04-08 12:57:02 -07:00 · 400ad6f95d
parent 072ec965e1
commit 400ad6f95d
28 changed files with 299 additions and 367 deletions
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@ -1606,8 +1606,8 @@ public:
  reference front() { return Storage.front(); }
  const_reference front() const { return Storage.front(); }

-  operator std::vector<DataType>&() { return Storage; }
-  operator ArrayRef<DataType>() { return Storage; }
+  operator std::vector<DataType> &() { return Storage; }
+  operator ArrayRef<DataType>() const { return Storage; }
  std::vector<DataType> *operator&() { return &Storage; }
  const std::vector<DataType> *operator&() const { return &Storage; }

--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@ -59,7 +59,7 @@ std::unique_ptr<OperationPass<FuncOp>> createLoopTilingPass();
 /// and no callback is provided, anything passed from the command-line (if at
 /// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
 std::unique_ptr<OperationPass<FuncOp>> createLoopUnrollPass(
-    int unrollFactor = -1, int unrollFull = -1,
+    int unrollFactor = -1, bool unrollFull = false,
    const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);

 /// Creates a loop unroll jam pass to unroll jam by the specified factor. A
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@ -18,6 +18,28 @@ include "mlir/Pass/PassBase.td"
 def AffineDataCopyGeneration : FunctionPass<"affine-data-copy-generate"> {
  let summary = "Generate explicit copying for affine memory operations";
  let constructor = "mlir::createAffineDataCopyGenerationPass()";
+  let options = [
+    Option<"fastMemoryCapacity", "fast-mem-capacity", "uint64_t",
+           /*default=*/"std::numeric_limits<uint64_t>::max()",
+           "Set fast memory space capacity in KiB (default: unlimited)">,
+    Option<"fastMemorySpace", "fast-mem-space", "unsigned",
+           /*default=*/"1",
+           "Fast memory space identifier for copy generation (default: 1)">,
+    Option<"generateDma", "generate-dma", "bool",
+           /*default=*/"true", "Generate DMA instead of point-wise copy">,
+    Option<"minDmaTransferSize", "min-dma-transfer", "int",
+           /*default=*/"1024",
+           "Minimum DMA transfer size supported by the target in bytes">,
+    Option<"slowMemorySpace", "slow-mem-space", "unsigned",
+           /*default=*/"0",
+           "Slow memory space identifier for copy generation (default: 0)">,
+    Option<"skipNonUnitStrideLoops", "skip-non-unit-stride-loops", "bool",
+           /*default=*/"false", "Testing purposes: avoid non-unit stride loop "
+                                "choice depths for copy placement">,
+    Option<"tagMemorySpace", "tag-mem-space", "unsigned",
+           /*default=*/"0",
+           "Tag memory space identifier for copy generation (default: 0)">,
+  ];
 }

 def AffineLoopInvariantCodeMotion
@ -29,16 +51,44 @@ def AffineLoopInvariantCodeMotion
 def AffineLoopTiling : FunctionPass<"affine-loop-tile"> {
  let summary = "Tile affine loop nests";
  let constructor = "mlir::createLoopTilingPass()";
+  let options = [
+    Option<"cacheSizeInKiB", "cache-size", "uint64_t", /*default=*/"512",
+           "Set size of cache to tile for in KiB">,
+    Option<"separate", "separate", "bool", /*default=*/"",
+           "Separate full and partial tiles">,
+    Option<"tileSize", "tile-size", "unsigned", /*default=*/"",
+           "Use this tile size for all loops">,
+    ListOption<"tileSizes", "tile-sizes", "unsigned",
+               "List of tile sizes for each perfect nest "
+               "(overridden by -tile-size)",
+               "llvm::cl::ZeroOrMore">,
+  ];
 }

 def AffineLoopUnroll : FunctionPass<"affine-loop-unroll"> {
  let summary = "Unroll affine loops";
  let constructor = "mlir::createLoopUnrollPass()";
+  let options = [
+    Option<"unrollFactor", "unroll-factor", "unsigned", /*default=*/"4",
+           "Use this unroll factor for all loops being unrolled">,
+    Option<"unrollFull", "unroll-full", "bool", /*default=*/"false",
+           "Fully unroll loops">,
+    Option<"numRepetitions", "unroll-num-reps", "unsigned", /*default=*/"1",
+           "Unroll innermost loops repeatedly this many times">,
+    Option<"unrollFullThreshold", "unroll-full-threshold", "unsigned",
+           /*default=*/"1",
+           "Unroll all loops with trip count less than or equal to this">,
+  ];
 }

 def AffineLoopUnrollAndJam : FunctionPass<"affine-loop-unroll-jam"> {
  let summary = "Unroll and jam affine loops";
  let constructor = "mlir::createLoopUnrollAndJamPass()";
+  let options = [
+    Option<"unrollJamFactor", "unroll-jam-factor", "unsigned",
+           /*default=*/"4",
+           "Use this unroll jam factor for all loops (default 4)">,
+  ];
 }

 def AffineVectorize : FunctionPass<"affine-super-vectorize"> {
--- a/mlir/include/mlir/IR/OperationSupport.h
+++ b/mlir/include/mlir/IR/OperationSupport.h
@ -514,6 +514,9 @@ public:
  /// Return if the given ElementsAttr should be elided.
  bool shouldElideElementsAttr(ElementsAttr attr) const;

+  /// Return the size limit for printing large ElementsAttr.
+  Optional<int64_t> getLargeElementsAttrLimit() const;
+
  /// Return if debug information should be printed.
  bool shouldPrintDebugInfo() const;

--- a/mlir/include/mlir/Pass/PassOptions.h
+++ b/mlir/include/mlir/Pass/PassOptions.h
@ -42,6 +42,9 @@ private:
    /// Return the argument string of this option.
    StringRef getArgStr() const { return getOption()->ArgStr; }

+    /// Returns true if this option has any value assigned to it.
+    bool hasValue() const { return optHasValue; }
+
  protected:
    /// Return the main option instance.
    virtual const llvm::cl::Option *getOption() const = 0;
@ -49,6 +52,9 @@ private:
    /// Copy the value from the given option into this one.
    virtual void copyValueFrom(const OptionBase &other) = 0;

+    /// Flag indicating if this option has a value.
+    bool optHasValue = false;
+
    /// Allow access to private methods.
    friend PassOptions;
  };
@ -113,10 +119,17 @@ public:
      assert(!this->isPositional() && !this->isSink() &&
             "sink and positional options are not supported");
      parent.options.push_back(this);
+
+      // Set a callback to track if this option has a value.
+      this->setCallback([this](const auto &) { this->optHasValue = true; });
    }
+    ~Option() override = default;
    using llvm::cl::opt<DataType, /*ExternalStorage=*/false,
                        OptionParser>::operator=;
-    ~Option() override = default;
+    Option &operator=(const Option &other) {
+      *this = other.getValue();
+      return *this;
+    }

  private:
    /// Return the main option instance.
@ -132,6 +145,7 @@ public:
    void copyValueFrom(const OptionBase &other) final {
      this->setValue(static_cast<const Option<DataType, OptionParser> &>(other)
                         .getValue());
+      optHasValue = other.optHasValue;
    }
  };

@ -149,16 +163,26 @@ public:
      assert(!this->isPositional() && !this->isSink() &&
             "sink and positional options are not supported");
      parent.options.push_back(this);
+
+      // Set a callback to track if this option has a value.
+      this->setCallback([this](const auto &) { this->optHasValue = true; });
    }
    ~ListOption() override = default;
-
-    /// Allow assigning from an ArrayRef.
-    ListOption<DataType, OptionParser> &operator=(ArrayRef<DataType> values) {
-      (*this)->assign(values.begin(), values.end());
+    ListOption<DataType, OptionParser> &
+    operator=(const ListOption<DataType, OptionParser> &other) {
+      *this = ArrayRef<DataType>(other);
+      this->optHasValue = other.optHasValue;
      return *this;
    }

-    std::vector<DataType> *operator->() { return &*this; }
+    /// Allow assigning from an ArrayRef.
+    ListOption<DataType, OptionParser> &operator=(ArrayRef<DataType> values) {
+      ((std::vector<DataType> &)*this).assign(values.begin(), values.end());
+      optHasValue = true;
+      return *this;
+    }
+
+    MutableArrayRef<DataType> operator->() const { return &*this; }

  private:
    /// Return the main option instance.
@ -175,9 +199,7 @@ public:

    /// Copy the value from the given option into this one.
    void copyValueFrom(const OptionBase &other) final {
-      (*this) = ArrayRef<DataType>(
-          (ListOption<DataType, OptionParser> &)(const_cast<OptionBase &>(
-              other)));
+      *this = static_cast<const ListOption<DataType, OptionParser> &>(other);
    }
  };

--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@ -15,6 +15,24 @@

 include "mlir/Pass/PassBase.td"

+def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> {
+  let summary = "Fuse affine loop nests";
+  let constructor = "mlir::createLoopFusionPass()";
+  let options = [
+    Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double",
+           /*default=*/"0.30f", "Fractional increase in additional computation "
+                                "tolerated while fusing">,
+    Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned",
+           /*default=*/"0",
+           "Faster memory space number to promote fusion buffers to">,
+    Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t",
+           /*default=*/"0", "Threshold size (KiB) for promoting local buffers "
+                            "to fast memory space">,
+    Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false",
+           "Enables maximal loop fusion">,
+  ];
+}
+
 def AffinePipelineDataTransfer
    : FunctionPass<"affine-pipeline-data-transfer"> {
  let summary = "Pipeline non-blocking data transfers between explicitly "
@ -84,11 +102,6 @@ def AffinePipelineDataTransfer
  let constructor = "mlir::createPipelineDataTransferPass()";
 }

-def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> {
-  let summary = "Fuse affine loop nests";
-  let constructor = "mlir::createLoopFusionPass()";
-}
-
 def Canonicalizer : Pass<"canonicalize"> {
  let summary = "Canonicalize operations";
  let constructor = "mlir::createCanonicalizerPass()";
@ -106,6 +119,14 @@ def CSE : Pass<"cse"> {
 def Inliner : Pass<"inline"> {
  let summary = "Inline function calls";
  let constructor = "mlir::createInlinerPass()";
+  let options = [
+    Option<"disableCanonicalization", "disable-simplify", "bool",
+           /*default=*/"false",
+           "Disable running simplifications during inlining">,
+    Option<"maxInliningIterations", "max-iterations", "unsigned",
+           /*default=*/"4",
+           "Maximum number of iterations when inlining within an SCC">,
+  ];
 }

 def LocationSnapshot : Pass<"snapshot-op-locations"> {
@ -113,7 +134,7 @@ def LocationSnapshot : Pass<"snapshot-op-locations"> {
  let constructor = "mlir::createLocationSnapshotPass()";
  let options = [
    Option<"fileName", "filename", "std::string", /*default=*/"",
-           "The filename to print the generated IR.">,
+           "The filename to print the generated IR">,
    Option<"tag", "tag", "std::string", /*default=*/"",
           "A tag to use when fusing the new locations with the "
           "original. If unset, the locations are replaced.">,
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
@ -61,8 +61,8 @@ struct ImperfectlyNestedForLoopMapper
  ImperfectlyNestedForLoopMapper() = default;
  ImperfectlyNestedForLoopMapper(ArrayRef<int64_t> numWorkGroups,
                                 ArrayRef<int64_t> workGroupSize) {
-    this->numWorkGroups->assign(numWorkGroups.begin(), numWorkGroups.end());
-    this->workGroupSize->assign(workGroupSize.begin(), workGroupSize.end());
+    this->numWorkGroups = numWorkGroups;
+    this->workGroupSize = workGroupSize;
  }

  void runOnFunction() override {
--- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
@ -35,32 +35,6 @@

 using namespace mlir;

-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::opt<unsigned long long> clFastMemoryCapacity(
-    "affine-data-copy-generate-fast-mem-capacity",
-    llvm::cl::desc(
-        "Set fast memory space capacity in KiB (default: unlimited)"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<bool>
-    clDma("affine-data-copy-generate-dma",
-          llvm::cl::desc("Generate DMA instead of point-wise copy"),
-          llvm::cl::cat(clOptionsCategory), llvm::cl::init(true));
-
-static llvm::cl::opt<unsigned> clFastMemorySpace(
-    "affine-data-copy-generate-fast-mem-space", llvm::cl::init(1),
-    llvm::cl::desc(
-        "Fast memory space identifier for copy generation (default: 1)"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<bool> clSkipNonUnitStrideLoop(
-    "affine-data-copy-generate-skip-non-unit-stride-loops", llvm::cl::Hidden,
-    llvm::cl::init(false),
-    llvm::cl::desc("Testing purposes: avoid non-unit stride loop choice depths "
-                   "for copy placement"),
-    llvm::cl::cat(clOptionsCategory));
-
 namespace {

 /// Replaces all loads and stores on memref's living in 'slowMemorySpace' by
@ -76,51 +50,22 @@ namespace {
 // are strided. Check for strided stores.
 struct AffineDataCopyGeneration
    : public AffineDataCopyGenerationBase<AffineDataCopyGeneration> {
-  explicit AffineDataCopyGeneration(
-      unsigned slowMemorySpace = 0,
-      unsigned fastMemorySpace = clFastMemorySpace, unsigned tagMemorySpace = 0,
-      int minDmaTransferSize = 1024,
-      uint64_t fastMemCapacityBytes =
-          (clFastMemoryCapacity.getNumOccurrences() > 0
-               ? clFastMemoryCapacity * 1024 // cl-provided size is in KiB
-               : std::numeric_limits<uint64_t>::max()),
-      bool generateDma = clDma,
-      bool skipNonUnitStrideLoops = clSkipNonUnitStrideLoop)
-      : slowMemorySpace(slowMemorySpace), fastMemorySpace(fastMemorySpace),
-        tagMemorySpace(tagMemorySpace), minDmaTransferSize(minDmaTransferSize),
-        fastMemCapacityBytes(fastMemCapacityBytes), generateDma(generateDma),
-        skipNonUnitStrideLoops(skipNonUnitStrideLoops) {}
-
-  explicit AffineDataCopyGeneration(const AffineDataCopyGeneration &other)
-      : AffineDataCopyGenerationBase<AffineDataCopyGeneration>(other),
-        slowMemorySpace(other.slowMemorySpace),
-        fastMemorySpace(other.fastMemorySpace),
-        tagMemorySpace(other.tagMemorySpace),
-        minDmaTransferSize(other.minDmaTransferSize),
-        fastMemCapacityBytes(other.fastMemCapacityBytes),
-        generateDma(other.generateDma),
-        skipNonUnitStrideLoops(other.skipNonUnitStrideLoops) {}
+  AffineDataCopyGeneration() = default;
+  explicit AffineDataCopyGeneration(unsigned slowMemorySpace,
+                                    unsigned fastMemorySpace,
+                                    unsigned tagMemorySpace,
+                                    int minDmaTransferSize,
+                                    uint64_t fastMemCapacityBytes) {
+    this->slowMemorySpace = slowMemorySpace;
+    this->fastMemorySpace = fastMemorySpace;
+    this->tagMemorySpace = tagMemorySpace;
+    this->minDmaTransferSize = minDmaTransferSize;
+    this->fastMemoryCapacity = fastMemCapacityBytes / 1024;
+  }

  void runOnFunction() override;
  LogicalResult runOnBlock(Block *block, DenseSet<Operation *> &copyNests);

-  // Slow memory space associated with copies.
-  const unsigned slowMemorySpace;
-  // Fast memory space associated with copies.
-  unsigned fastMemorySpace;
-  // Memory space associated with DMA tags.
-  unsigned tagMemorySpace;
-  // Minimum DMA transfer size supported by the target in bytes.
-  const int minDmaTransferSize;
-  // Capacity of the faster memory space.
-  uint64_t fastMemCapacityBytes;
-
-  // If set, generate DMA operations instead of read/write.
-  bool generateDma;
-
-  // If set, ignore loops with steps other than 1.
-  bool skipNonUnitStrideLoops;
-
  // Constant zero index to avoid too many duplicates.
  Value zeroIndex = nullptr;
 };
@ -153,6 +98,10 @@ AffineDataCopyGeneration::runOnBlock(Block *block,
  if (block->empty())
    return success();

+  uint64_t fastMemCapacityBytes =
+      fastMemoryCapacity != std::numeric_limits<uint64_t>::max()
+          ? fastMemoryCapacity * 1024
+          : fastMemoryCapacity;
  AffineCopyOptions copyOptions = {generateDma, slowMemorySpace,
                                   fastMemorySpace, tagMemorySpace,
                                   fastMemCapacityBytes};
--- a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
@ -28,40 +28,15 @@ using namespace mlir;

 #define DEBUG_TYPE "affine-loop-tile"

-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::opt<unsigned long long>
-    clCacheSizeKiB("affine-tile-cache-size",
-                   llvm::cl::desc("Set size of cache to tile for in KiB"),
-                   llvm::cl::cat(clOptionsCategory));
-
-// Separate full and partial tiles.
-static llvm::cl::opt<bool>
-    clSeparate("affine-tile-separate",
-               llvm::cl::desc("Separate full and partial tiles"),
-               llvm::cl::cat(clOptionsCategory));
-
-// Tile size to use for all loops (overrides -tile-sizes if provided).
-static llvm::cl::opt<unsigned>
-    clTileSize("affine-tile-size",
-               llvm::cl::desc("Use this tile size for all loops"),
-               llvm::cl::cat(clOptionsCategory));
-
-// List of tile sizes. If any of them aren't provided, they are filled with
-// clTileSize / kDefaultTileSize.
-static llvm::cl::list<unsigned> clTileSizes(
-    "affine-tile-sizes",
-    llvm::cl::desc(
-        "List of tile sizes for each perfect nest (overridden by -tile-size)"),
-    llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory));
-
 namespace {

 /// A pass to perform loop tiling on all suitable loop nests of a Function.
 struct LoopTiling : public AffineLoopTilingBase<LoopTiling> {
-  explicit LoopTiling(uint64_t cacheSizeBytes = kDefaultCacheMemCapacity,
-                      bool avoidMaxMinBounds = true)
-      : cacheSizeBytes(cacheSizeBytes), avoidMaxMinBounds(avoidMaxMinBounds) {}
+  LoopTiling() = default;
+  explicit LoopTiling(uint64_t cacheSizeBytes, bool avoidMaxMinBounds = true)
+      : avoidMaxMinBounds(avoidMaxMinBounds) {
+    this->cacheSizeInKiB = cacheSizeBytes / 1024;
+  }

  void runOnFunction() override;
  void getTileSizes(ArrayRef<AffineForOp> band,
@ -69,12 +44,9 @@ struct LoopTiling : public AffineLoopTilingBase<LoopTiling> {

  // Default tile size if nothing is provided.
  constexpr static unsigned kDefaultTileSize = 4;
-  constexpr static uint64_t kDefaultCacheMemCapacity = 512 * 1024UL;

-  // Capacity of the cache to tile for.
-  uint64_t cacheSizeBytes;
  // If true, tile sizes are set to avoid max/min in bounds if possible.
-  bool avoidMaxMinBounds;
+  bool avoidMaxMinBounds = true;
 };

 } // end anonymous namespace
@ -316,24 +288,20 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
  if (band.empty())
    return;

+  // Use tileSize for all loops if specified.
+  if (tileSize.hasValue()) {
+    tileSizes->assign(band.size(), tileSize);
+    return;
+  }
+
+  // Use tileSizes and fill them with default tile size if it's short.
+  if (!this->tileSizes.empty()) {
+    tileSizes->assign(this->tileSizes.begin(), this->tileSizes.end());
+    tileSizes->resize(band.size(), kDefaultTileSize);
+    return;
+  }
  tileSizes->resize(band.size());

-  // Use clTileSize for all loops if specified.
-  if (clTileSize.getNumOccurrences() > 0) {
-    std::fill(tileSizes->begin(), tileSizes->end(), clTileSize);
-    return;
-  }
-
-  // Use clTileSizes and fill them with default tile size if it's short.
-  if (!clTileSizes.empty()) {
-    std::fill(tileSizes->begin(), tileSizes->end(),
-              LoopTiling::kDefaultTileSize);
-    std::copy(clTileSizes.begin(),
-              clTileSizes.begin() + std::min(clTileSizes.size(), band.size()),
-              tileSizes->begin());
-    return;
-  }
-
  // The first loop in the band.
  auto rootForOp = band[0];
  (void)rootForOp;
@ -356,6 +324,7 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
  }

  // Check how many times larger the cache size is when compared to footprint.
+  uint64_t cacheSizeBytes = cacheSizeInKiB * 1024;
  uint64_t excessFactor = llvm::divideCeil(fp.getValue(), cacheSizeBytes);
  if (excessFactor <= 1) {
    // No need of any tiling - set tile size to 1.
@ -388,10 +357,6 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
 }

 void LoopTiling::runOnFunction() {
-  // Override cache size if provided on command line.
-  if (clCacheSizeKiB.getNumOccurrences() > 0)
-    cacheSizeBytes = clCacheSizeKiB * 1024;
-
  // Bands of loops to tile.
  std::vector<SmallVector<AffineForOp, 6>> bands;
  getTileableBands(getFunction(), &bands);
@ -399,7 +364,7 @@ void LoopTiling::runOnFunction() {
  // Tile each band.
  for (auto &band : bands) {
    // Set up tile sizes; fill missing tile sizes at the end with default tile
-    // size or clTileSize if one was provided.
+    // size or tileSize if one was provided.
    SmallVector<unsigned, 6> tileSizes;
    getTileSizes(band, &tileSizes);
    if (llvm::DebugFlag) {
@ -413,7 +378,7 @@ void LoopTiling::runOnFunction() {
      return signalPassFailure();

    // Separate full and partial tiles.
-    if (clSeparate) {
+    if (separate) {
      auto intraTileLoops =
          MutableArrayRef<AffineForOp>(tiledNest).drop_front(band.size());
      separateFullTiles(intraTileLoops);
@ -422,4 +387,3 @@ void LoopTiling::runOnFunction() {
 }

 constexpr unsigned LoopTiling::kDefaultTileSize;
-constexpr uint64_t LoopTiling::kDefaultCacheMemCapacity;
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@ -59,24 +59,27 @@ namespace {
 /// with trip count less than the specified threshold. The latter is for testing
 /// purposes, especially for testing outer loop unrolling.
 struct LoopUnroll : public AffineLoopUnrollBase<LoopUnroll> {
-  const Optional<unsigned> unrollFactor;
-  const Optional<bool> unrollFull;
  // Callback to obtain unroll factors; if this has a callable target, takes
  // precedence over command-line argument or passed argument.
  const std::function<unsigned(AffineForOp)> getUnrollFactor;

+  LoopUnroll() : getUnrollFactor(nullptr) {}
+  LoopUnroll(const LoopUnroll &other)
+      : AffineLoopUnrollBase<LoopUnroll>(other),
+        getUnrollFactor(other.getUnrollFactor) {}
  explicit LoopUnroll(
-      Optional<unsigned> unrollFactor = None, Optional<bool> unrollFull = None,
+      Optional<unsigned> unrollFactor = None, bool unrollFull = false,
      const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr)
-      : unrollFactor(unrollFactor), unrollFull(unrollFull),
-        getUnrollFactor(getUnrollFactor) {}
+      : getUnrollFactor(getUnrollFactor) {
+    if (unrollFactor)
+      this->unrollFactor = *unrollFactor;
+    this->unrollFull = unrollFull;
+  }

  void runOnFunction() override;

  /// Unroll this for op. Returns failure if nothing was done.
  LogicalResult runOnAffineForOp(AffineForOp forOp);
-
-  static const unsigned kDefaultUnrollFactor = 4;
 };
 } // end anonymous namespace

@ -102,8 +105,7 @@ static void gatherInnermostLoops(FuncOp f,
 }

 void LoopUnroll::runOnFunction() {
-  if (clUnrollFull.getNumOccurrences() > 0 &&
-      clUnrollFullThreshold.getNumOccurrences() > 0) {
+  if (unrollFull && unrollFullThreshold.hasValue()) {
    // Store short loops as we walk.
    SmallVector<AffineForOp, 4> loops;

@ -112,7 +114,7 @@ void LoopUnroll::runOnFunction() {
    // an outer one may delete gathered inner ones).
    getFunction().walk([&](AffineForOp forOp) {
      Optional<uint64_t> tripCount = getConstantTripCount(forOp);
-      if (tripCount.hasValue() && tripCount.getValue() <= clUnrollFullThreshold)
+      if (tripCount.hasValue() && tripCount.getValue() <= unrollFullThreshold)
        loops.push_back(forOp);
    });
    for (auto forOp : loops)
@ -120,9 +122,6 @@ void LoopUnroll::runOnFunction() {
    return;
  }

-  unsigned numRepetitions = clUnrollNumRepetitions.getNumOccurrences() > 0
-                                ? clUnrollNumRepetitions
-                                : 1;
  // If the call back is provided, we will recurse until no loops are found.
  FuncOp func = getFunction();
  SmallVector<AffineForOp, 4> loops;
@ -144,28 +143,19 @@ void LoopUnroll::runOnFunction() {
 /// failure otherwise. The default unroll factor is 4.
 LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
  // Use the function callback if one was provided.
-  if (getUnrollFactor) {
+  if (getUnrollFactor)
    return loopUnrollByFactor(forOp, getUnrollFactor(forOp));
-  }
-  // Unroll by the factor passed, if any.
-  if (unrollFactor.hasValue())
-    return loopUnrollByFactor(forOp, unrollFactor.getValue());
-  // Unroll by the command line factor if one was specified.
-  if (clUnrollFactor.getNumOccurrences() > 0)
-    return loopUnrollByFactor(forOp, clUnrollFactor);
  // Unroll completely if full loop unroll was specified.
-  if (clUnrollFull.getNumOccurrences() > 0 ||
-      (unrollFull.hasValue() && unrollFull.getValue()))
+  if (unrollFull)
    return loopUnrollFull(forOp);
-
-  // Unroll by four otherwise.
-  return loopUnrollByFactor(forOp, kDefaultUnrollFactor);
+  // Otherwise, unroll by the given unroll factor.
+  return loopUnrollByFactor(forOp, unrollFactor);
 }

 std::unique_ptr<OperationPass<FuncOp>> mlir::createLoopUnrollPass(
-    int unrollFactor, int unrollFull,
+    int unrollFactor, bool unrollFull,
    const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
  return std::make_unique<LoopUnroll>(
-      unrollFactor == -1 ? None : Optional<unsigned>(unrollFactor),
-      unrollFull == -1 ? None : Optional<bool>(unrollFull), getUnrollFactor);
+      unrollFactor == -1 ? None : Optional<unsigned>(unrollFactor), unrollFull,
+      getUnrollFactor);
 }
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
@ -49,27 +49,16 @@ using namespace mlir;

 #define DEBUG_TYPE "affine-loop-unroll-jam"

-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-// Loop unroll and jam factor.
-static llvm::cl::opt<unsigned>
-    clUnrollJamFactor("unroll-jam-factor", llvm::cl::Hidden,
-                      llvm::cl::desc("Use this unroll jam factor for all loops"
-                                     " (default 4)"),
-                      llvm::cl::cat(clOptionsCategory));
-
 namespace {
 /// Loop unroll jam pass. Currently, this just unroll jams the first
 /// outer loop in a Function.
 struct LoopUnrollAndJam : public AffineLoopUnrollAndJamBase<LoopUnrollAndJam> {
-  Optional<unsigned> unrollJamFactor;
-  static const unsigned kDefaultUnrollJamFactor = 4;
-
-  explicit LoopUnrollAndJam(Optional<unsigned> unrollJamFactor = None)
-      : unrollJamFactor(unrollJamFactor) {}
+  explicit LoopUnrollAndJam(Optional<unsigned> unrollJamFactor = None) {
+    if (unrollJamFactor)
+      this->unrollJamFactor = *unrollJamFactor;
+  }

  void runOnFunction() override;
-  LogicalResult runOnAffineForOp(AffineForOp forOp);
 };
 } // end anonymous namespace

@ -85,19 +74,5 @@ void LoopUnrollAndJam::runOnFunction() {
  // any for operation.
  auto &entryBlock = getFunction().front();
  if (auto forOp = dyn_cast<AffineForOp>(entryBlock.front()))
-    runOnAffineForOp(forOp);
-}
-
-/// Unroll and jam a 'affine.for' op. Default unroll jam factor is
-/// kDefaultUnrollJamFactor. Return failure if nothing was done.
-LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
-  // Unroll and jam by the factor that was passed if any.
-  if (unrollJamFactor.hasValue())
-    return loopUnrollJamByFactor(forOp, unrollJamFactor.getValue());
-  // Otherwise, unroll jam by the command-line factor if one was specified.
-  if (clUnrollJamFactor.getNumOccurrences() > 0)
-    return loopUnrollJamByFactor(forOp, clUnrollJamFactor);
-
-  // Unroll and jam by four otherwise.
-  return loopUnrollJamByFactor(forOp, kDefaultUnrollJamFactor);
+    loopUnrollJamByFactor(forOp, unrollJamFactor);
 }
--- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@ -582,7 +582,7 @@ struct Vectorize : public AffineVectorizeBase<Vectorize> {
 } // end anonymous namespace

 Vectorize::Vectorize(ArrayRef<int64_t> virtualVectorSize) {
-  vectorSizes->assign(virtualVectorSize.begin(), virtualVectorSize.end());
+  vectorSizes = virtualVectorSize;
 }

 /////// TODO(ntv): Hoist to a VectorizationStrategy.cpp when appropriate.
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@ -508,9 +508,7 @@ static void tileLinalgOps(FuncOp f, ArrayRef<int64_t> tileSizes) {
 namespace {
 struct LinalgTilingPass : public LinalgTilingBase<LinalgTilingPass> {
  LinalgTilingPass() = default;
-  LinalgTilingPass(ArrayRef<int64_t> sizes) {
-    tileSizes->assign(sizes.begin(), sizes.end());
-  }
+  LinalgTilingPass(ArrayRef<int64_t> sizes) { tileSizes = sizes; }

  void runOnFunction() override {
    tileLinalgOps<loop::ForOp>(getFunction(), tileSizes);
@ -521,7 +519,7 @@ struct LinalgTilingToParallelLoopsPass
    : public LinalgTilingToParallelLoopsBase<LinalgTilingToParallelLoopsPass> {
  LinalgTilingToParallelLoopsPass() = default;
  LinalgTilingToParallelLoopsPass(ArrayRef<int64_t> sizes) {
-    tileSizes->assign(sizes.begin(), sizes.end());
+    tileSizes = sizes;
  }

  void runOnFunction() override {
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@ -146,6 +146,11 @@ bool OpPrintingFlags::shouldElideElementsAttr(ElementsAttr attr) const {
         *elementsAttrElementLimit < int64_t(attr.getNumElements());
 }

+/// Return the size limit for printing large ElementsAttr.
+Optional<int64_t> OpPrintingFlags::getLargeElementsAttrLimit() const {
+  return elementsAttrElementLimit;
+}
+
 /// Return if debug information should be printed.
 bool OpPrintingFlags::shouldPrintDebugInfo() const {
  return printDebugInfoFlag;
--- a/mlir/lib/Transforms/Inliner.cpp
+++ b/mlir/lib/Transforms/Inliner.cpp
@ -27,16 +27,6 @@

 using namespace mlir;

-static llvm::cl::opt<bool> disableCanonicalization(
-    "mlir-disable-inline-simplify",
-    llvm::cl::desc("Disable running simplifications during inlining"),
-    llvm::cl::ReallyHidden, llvm::cl::init(false));
-
-static llvm::cl::opt<unsigned> maxInliningIterations(
-    "mlir-max-inline-iterations",
-    llvm::cl::desc("Maximum number of iterations when inlining within an SCC"),
-    llvm::cl::ReallyHidden, llvm::cl::init(4));
-
 //===----------------------------------------------------------------------===//
 // Symbol Use Tracking
 //===----------------------------------------------------------------------===//
@ -563,13 +553,55 @@ static void canonicalizeSCC(CallGraph &cg, CGUseList &useList,
    useList.recomputeUses(node, cg);
 }

-/// Attempt to inline calls within the given scc, and run canonicalizations with
-/// the given patterns, until a fixed point is reached. This allows for the
-/// inlining of newly devirtualized calls.
-static void inlineSCC(Inliner &inliner, CGUseList &useList,
-                      MutableArrayRef<CallGraphNode *> currentSCC,
-                      MLIRContext *context,
-                      const OwningRewritePatternList &canonPatterns) {
+//===----------------------------------------------------------------------===//
+// InlinerPass
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct InlinerPass : public InlinerBase<InlinerPass> {
+  void runOnOperation() override;
+
+  /// Attempt to inline calls within the given scc, and run canonicalizations
+  /// with the given patterns, until a fixed point is reached. This allows for
+  /// the inlining of newly devirtualized calls.
+  void inlineSCC(Inliner &inliner, CGUseList &useList,
+                 MutableArrayRef<CallGraphNode *> currentSCC,
+                 MLIRContext *context,
+                 const OwningRewritePatternList &canonPatterns);
+};
+} // end anonymous namespace
+
+void InlinerPass::runOnOperation() {
+  CallGraph &cg = getAnalysis<CallGraph>();
+  auto *context = &getContext();
+
+  // The inliner should only be run on operations that define a symbol table,
+  // as the callgraph will need to resolve references.
+  Operation *op = getOperation();
+  if (!op->hasTrait<OpTrait::SymbolTable>()) {
+    op->emitOpError() << " was scheduled to run under the inliner, but does "
+                         "not define a symbol table";
+    return signalPassFailure();
+  }
+
+  // Collect a set of canonicalization patterns to use when simplifying
+  // callable regions within an SCC.
+  OwningRewritePatternList canonPatterns;
+  for (auto *op : context->getRegisteredOperations())
+    op->getCanonicalizationPatterns(canonPatterns, context);
+
+  // Run the inline transform in post-order over the SCCs in the callgraph.
+  Inliner inliner(context, cg);
+  CGUseList useList(getOperation(), cg);
+  runTransformOnCGSCCs(cg, [&](MutableArrayRef<CallGraphNode *> scc) {
+    inlineSCC(inliner, useList, scc, context, canonPatterns);
+  });
+}
+
+void InlinerPass::inlineSCC(Inliner &inliner, CGUseList &useList,
+                            MutableArrayRef<CallGraphNode *> currentSCC,
+                            MLIRContext *context,
+                            const OwningRewritePatternList &canonPatterns) {
  // If we successfully inlined any calls, run some simplifications on the
  // nodes of the scc. Continue attempting to inline until we reach a fixed
  // point, or a maximum iteration count. We canonicalize here as it may
@ -584,41 +616,6 @@ static void inlineSCC(Inliner &inliner, CGUseList &useList,
  }
 }

-//===----------------------------------------------------------------------===//
-// InlinerPass
-//===----------------------------------------------------------------------===//
-
-namespace {
-struct InlinerPass : public InlinerBase<InlinerPass> {
-  void runOnOperation() override {
-    CallGraph &cg = getAnalysis<CallGraph>();
-    auto *context = &getContext();
-
-    // The inliner should only be run on operations that define a symbol table,
-    // as the callgraph will need to resolve references.
-    Operation *op = getOperation();
-    if (!op->hasTrait<OpTrait::SymbolTable>()) {
-      op->emitOpError() << " was scheduled to run under the inliner, but does "
-                           "not define a symbol table";
-      return signalPassFailure();
-    }
-
-    // Collect a set of canonicalization patterns to use when simplifying
-    // callable regions within an SCC.
-    OwningRewritePatternList canonPatterns;
-    for (auto *op : context->getRegisteredOperations())
-      op->getCanonicalizationPatterns(canonPatterns, context);
-
-    // Run the inline transform in post-order over the SCCs in the callgraph.
-    Inliner inliner(context, cg);
-    CGUseList useList(getOperation(), cg);
-    runTransformOnCGSCCs(cg, [&](MutableArrayRef<CallGraphNode *> scc) {
-      inlineSCC(inliner, useList, scc, context, canonPatterns);
-    });
-  }
-};
-} // end anonymous namespace
-
 std::unique_ptr<Pass> mlir::createInlinerPass() {
  return std::make_unique<InlinerPass>();
 }
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@ -37,36 +37,6 @@ using llvm::SetVector;

 using namespace mlir;

-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-/// Disables fusion profitability check and fuses if valid. Ignore any
-/// additional (redundant) computation tolerance threshold
-/// that would have prevented fusion.
-static llvm::cl::opt<bool>
-    clMaximalLoopFusion("fusion-maximal",
-                        llvm::cl::desc("Enables maximal loop fusion"),
-                        llvm::cl::cat(clOptionsCategory));
-
-/// A threshold in percent of additional computation allowed when fusing.
-static llvm::cl::opt<double> clFusionAddlComputeTolerance(
-    "fusion-compute-tolerance",
-    llvm::cl::desc("Fractional increase in additional "
-                   "computation tolerated while fusing"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<unsigned> clFusionFastMemorySpace(
-    "fusion-fast-mem-space",
-    llvm::cl::desc("Faster memory space number to promote fusion buffers to"),
-    llvm::cl::cat(clOptionsCategory));
-
-// A local buffer of size less than or equal to this size is automatically
-// promoted to fast memory after producer-consumer fusion.
-static llvm::cl::opt<unsigned long long> clFusionLocalBufThreshold(
-    "fusion-local-buf-threshold",
-    llvm::cl::desc("Threshold size (KiB) for promoting local buffers to fast "
-                   "memory space"),
-    llvm::cl::cat(clOptionsCategory));
-
 namespace {
 /// Loop fusion pass. This pass currently supports a greedy fusion policy,
 /// which fuses loop nests with single-writer/single-reader memref dependences
@ -78,24 +48,15 @@ namespace {
 // and add support for more general loop fusion algorithms.

 struct LoopFusion : public AffineLoopFusionBase<LoopFusion> {
-  LoopFusion(unsigned fastMemorySpace = 0, uint64_t localBufSizeThreshold = 0,
-             bool maximalFusion = false)
-      : localBufSizeThreshold(localBufSizeThreshold),
-        fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {}
+  LoopFusion() = default;
+  LoopFusion(unsigned fastMemorySpace, uint64_t localBufSizeThresholdBytes,
+             bool maximalFusion) {
+    this->fastMemorySpace = fastMemorySpace;
+    this->localBufSizeThreshold = localBufSizeThresholdBytes / 1024;
+    this->maximalFusion = maximalFusion;
+  }

  void runOnFunction() override;
-
-  // Any local buffers smaller than this size (in bytes) will be created in
-  // `fastMemorySpace` if provided.
-  uint64_t localBufSizeThreshold;
-  Optional<unsigned> fastMemorySpace = None;
-  // If true, ignore any additional (redundant) computation tolerance threshold
-  // that would have prevented fusion.
-  bool maximalFusion;
-
-  // The amount of additional computation that is tolerated while fusing
-  // pair-wise as a fraction of the total computation.
-  constexpr static double kComputeToleranceThreshold = 0.30f;
 };

 } // end anonymous namespace
@ -1098,7 +1059,8 @@ static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst,
                               ArrayRef<Operation *> dstLoadOpInsts,
                               ArrayRef<Operation *> dstStoreOpInsts,
                               ComputationSliceState *sliceState,
-                               unsigned *dstLoopDepth, bool maximalFusion) {
+                               unsigned *dstLoopDepth, bool maximalFusion,
+                               double computeToleranceThreshold) {
  LLVM_DEBUG({
    llvm::dbgs() << "Checking whether fusion is profitable between:\n";
    llvm::dbgs() << " " << *srcOpInst << " and \n";
@ -1247,11 +1209,6 @@ static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst,
      llvm::dbgs() << msg.str();
    });

-    double computeToleranceThreshold =
-        clFusionAddlComputeTolerance.getNumOccurrences() > 0
-            ? clFusionAddlComputeTolerance
-            : LoopFusion::kComputeToleranceThreshold;
-
    // TODO(b/123247369): This is a placeholder cost model.
    // Among all choices that add an acceptable amount of redundant computation
    // (as per computeToleranceThreshold), we will simply pick the one that
@ -1426,13 +1383,18 @@ public:
  // If true, ignore any additional (redundant) computation tolerance threshold
  // that would have prevented fusion.
  bool maximalFusion;
+  // The amount of additional computation that is tolerated while fusing
+  // pair-wise as a fraction of the total computation.
+  double computeToleranceThreshold;

  using Node = MemRefDependenceGraph::Node;

  GreedyFusion(MemRefDependenceGraph *mdg, unsigned localBufSizeThreshold,
-               Optional<unsigned> fastMemorySpace, bool maximalFusion)
+               Optional<unsigned> fastMemorySpace, bool maximalFusion,
+               double computeToleranceThreshold)
      : mdg(mdg), localBufSizeThreshold(localBufSizeThreshold),
-        fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {}
+        fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion),
+        computeToleranceThreshold(computeToleranceThreshold) {}

  // Initializes 'worklist' with nodes from 'mdg'
  void init() {
@ -1608,7 +1570,8 @@ public:
          // Check if fusion would be profitable.
          if (!isFusionProfitable(srcStoreOp, srcStoreOp, dstLoadOpInsts,
                                  dstStoreOpInsts, &sliceState,
-                                  &bestDstLoopDepth, maximalFusion))
+                                  &bestDstLoopDepth, maximalFusion,
+                                  computeToleranceThreshold))
            continue;

          // Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'.
@ -1769,7 +1732,7 @@ public:
      // Check if fusion would be profitable.
      if (!isFusionProfitable(sibLoadOpInst, sibStoreOpInst, dstLoadOpInsts,
                              dstStoreOpInsts, &sliceState, &bestDstLoopDepth,
-                              maximalFusion))
+                              maximalFusion, computeToleranceThreshold))
        continue;

      // Fuse computation slice of 'sibLoopNest' into 'dstLoopNest'.
@ -1954,21 +1917,15 @@ public:
 } // end anonymous namespace

 void LoopFusion::runOnFunction() {
-  // Override if a command line argument was provided.
-  if (clFusionFastMemorySpace.getNumOccurrences() > 0) {
-    fastMemorySpace = clFusionFastMemorySpace.getValue();
-  }
-
-  // Override if a command line argument was provided.
-  if (clFusionLocalBufThreshold.getNumOccurrences() > 0) {
-    localBufSizeThreshold = clFusionLocalBufThreshold * 1024;
-  }
-
-  if (clMaximalLoopFusion.getNumOccurrences() > 0)
-    maximalFusion = clMaximalLoopFusion;
-
  MemRefDependenceGraph g;
-  if (g.init(getFunction()))
-    GreedyFusion(&g, localBufSizeThreshold, fastMemorySpace, maximalFusion)
-        .run();
+  if (!g.init(getFunction()))
+    return;
+
+  Optional<unsigned> fastMemorySpaceOpt;
+  if (fastMemorySpace.hasValue())
+    fastMemorySpaceOpt = fastMemorySpace;
+  unsigned localBufSizeThresholdBytes = localBufSizeThreshold * 1024;
+  GreedyFusion fusion(&g, localBufSizeThresholdBytes, fastMemorySpaceOpt,
+                      maximalFusion, computeToleranceThreshold);
+  fusion.run();
 }
--- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
+++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
@ -23,13 +23,10 @@ using namespace mlir;

 #define DEBUG_TYPE "pattern-matcher"

-static llvm::cl::opt<unsigned> maxPatternMatchIterations(
-    "mlir-max-pattern-match-iterations",
-    llvm::cl::desc("Max number of iterations scanning for pattern match"),
-    llvm::cl::init(10));
+/// The max number of iterations scanning for pattern match.
+static unsigned maxPatternMatchIterations = 10;

 namespace {
-
 /// This is a worklist-driven driver for the PatternMatcher, which repeatedly
 /// applies the locally optimal patterns in a roughly "bottom up" way.
 class GreedyPatternRewriteDriver : public PatternRewriter {
--- a/mlir/lib/Transforms/ViewOpGraph.cpp
+++ b/mlir/lib/Transforms/ViewOpGraph.cpp
@ -14,13 +14,16 @@
 #include "mlir/Support/STLExtras.h"
 #include "llvm/Support/CommandLine.h"

-static llvm::cl::opt<int> elideIfLarger(
-    "print-op-graph-elide-if-larger",
-    llvm::cl::desc("Upper limit to emit elements attribute rather than elide"),
-    llvm::cl::init(16));
-
 using namespace mlir;

+/// Return the size limits for eliding large attributes.
+static int64_t getLargeAttributeSizeLimit() {
+  // Use the default from the printer flags if possible.
+  if (Optional<int64_t> limit = OpPrintingFlags().getLargeElementsAttrLimit())
+    return *limit;
+  return 16;
+}
+
 namespace llvm {

 // Specialize GraphTraits to treat Block as a graph of Operations as nodes and
@ -65,6 +68,8 @@ std::string DOTGraphTraits<Block *>::getNodeLabel(Operation *op, Block *b) {
  interleaveComma(op->getResultTypes(), os);
  os << "\n";

+  // A value used to elide large container attribute.
+  int64_t largeAttrLimit = getLargeAttributeSizeLimit();
  for (auto attr : op->getAttrs()) {
    os << '\n' << attr.first << ": ";
    // Always emit splat attributes.
@ -75,7 +80,7 @@ std::string DOTGraphTraits<Block *>::getNodeLabel(Operation *op, Block *b) {

    // Elide "big" elements attributes.
    auto elements = attr.second.dyn_cast<ElementsAttr>();
-    if (elements && elements.getNumElements() > elideIfLarger) {
+    if (elements && elements.getNumElements() > largeAttrLimit) {
      os << std::string(elements.getType().getRank(), '[') << "..."
         << std::string(elements.getType().getRank(), ']') << " : "
         << elements.getType();
@ -83,7 +88,7 @@ std::string DOTGraphTraits<Block *>::getNodeLabel(Operation *op, Block *b) {
    }

    auto array = attr.second.dyn_cast<ArrayAttr>();
-    if (array && static_cast<int64_t>(array.size()) > elideIfLarger) {
+    if (array && static_cast<int64_t>(array.size()) > largeAttrLimit) {
      os << "[...]";
      continue;
    }
--- a/mlir/test/Dialect/Affine/affine-data-copy.mlir
+++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir
@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-skip-non-unit-stride-loops | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate="generate-dma=false fast-mem-space=0 skip-non-unit-stride-loops" | FileCheck %s
 // Small buffer size to trigger fine copies.
-// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-fast-mem-capacity=1 | FileCheck --check-prefix=CHECK-SMALL %s
+// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate="generate-dma=false fast-mem-space=0 fast-mem-capacity=1" | FileCheck --check-prefix=CHECK-SMALL %s

 // Test affine data copy with a memref filter. We use a test pass that invokes
 // affine data copy utility on the input loop nest.
--- a/mlir/test/Dialect/Affine/dma-generate.mlir
+++ b/mlir/test/Dialect/Affine/dma-generate.mlir
@ -1,5 +1,5 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma -affine-data-copy-generate-fast-mem-space=2 -affine-data-copy-generate-skip-non-unit-stride-loops -verify-diagnostics | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma -affine-data-copy-generate-fast-mem-capacity=16 -affine-data-copy-generate-fast-mem-space=2 | FileCheck %s --check-prefix FAST-MEM-16KB
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-space=2 skip-non-unit-stride-loops" -verify-diagnostics | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-capacity=16 fast-mem-space=2" | FileCheck %s --check-prefix FAST-MEM-16KB

 // We run most test cases with -copy-skip-non-unit-stride-loops to allow testing
 // DMA generation at inner levels easily - since the DMA generation would
--- a/mlir/test/Dialect/Affine/inlining.mlir
+++ b/mlir/test/Dialect/Affine/inlining.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -inline -mlir-disable-inline-simplify | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -inline="disable-simplify" | FileCheck %s

 // Basic test that functions within affine operations are inlined.
 func @func_with_affine_ops(%N: index) {
--- a/mlir/test/Dialect/Affine/loop-tiling.mlir
+++ b/mlir/test/Dialect/Affine/loop-tiling.mlir
@ -1,6 +1,6 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file  -affine-loop-tile -affine-tile-size=32 | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile -affine-tile-cache-size=512 | FileCheck %s --check-prefix=MODEL
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile -affine-tile-size=32 -affine-tile-separate | FileCheck %s --check-prefix=SEPARATE
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="tile-size=32" | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="cache-size=512" | FileCheck %s --check-prefix=MODEL
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="tile-size=32 separate" | FileCheck %s --check-prefix=SEPARATE

 // -----

--- a/mlir/test/Dialect/Affine/unroll-jam.mlir
+++ b/mlir/test/Dialect/Affine/unroll-jam.mlir
@ -1,5 +1,5 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam -unroll-jam-factor=2 | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam -unroll-jam-factor=4 | FileCheck --check-prefix=UJAM-FOUR %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=2" | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=4" | FileCheck --check-prefix=UJAM-FOUR %s

 // CHECK-DAG: [[MAP_PLUS_1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
 // CHECK-DAG: [[MAP_DIV_OFFSET:#map[0-9]+]] = affine_map<()[s0] -> (((s0 - 1) floordiv 2) * 2 + 1)>
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@ -1,7 +1,7 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-full | FileCheck %s --check-prefix UNROLL-FULL
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-full -unroll-full-threshold=2 | FileCheck %s --check-prefix SHORT
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-factor=4 | FileCheck %s --check-prefix UNROLL-BY-4
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-factor=1 | FileCheck %s --check-prefix UNROLL-BY-1
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full" | FileCheck %s --check-prefix UNROLL-FULL
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1

 // UNROLL-FULL-DAG: [[MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
 // UNROLL-FULL-DAG: [[MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)>
--- a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -pass-pipeline='spv.module(inline)' -mlir-disable-inline-simplify | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -pass-pipeline='spv.module(inline{disable-simplify})' | FileCheck %s

 spv.module Logical GLSL450 {
  spv.func @callee() "None" {
--- a/mlir/test/Transforms/inlining.mlir
+++ b/mlir/test/Transforms/inlining.mlir
@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify | FileCheck %s
-// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify -mlir-print-debuginfo | FileCheck %s --check-prefix INLINE-LOC
-// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify=false | FileCheck %s --check-prefix INLINE_SIMPLIFY
+// RUN: mlir-opt %s -inline="disable-simplify" | FileCheck %s
+// RUN: mlir-opt %s -inline="disable-simplify" -mlir-print-debuginfo | FileCheck %s --check-prefix INLINE-LOC
+// RUN: mlir-opt %s -inline | FileCheck %s --check-prefix INLINE_SIMPLIFY

 // Inline a function that takes an argument.
 func @func_with_arg(%c : i32) -> i32 {
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
@ -1,5 +1,5 @@
 // RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion -split-input-file | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion -fusion-maximal -split-input-file | FileCheck %s --check-prefix=MAXIMAL
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion="fusion-maximal" -split-input-file | FileCheck %s --check-prefix=MAXIMAL

 // TODO(andydavis) Add more tests:
 // *) Add nested fusion test cases when non-constant loop bound support is
--- a/mlir/test/lib/Pass/TestPassManager.cpp
+++ b/mlir/test/lib/Pass/TestPassManager.cpp
@ -35,10 +35,9 @@ public:
  TestOptionsPass() = default;
  TestOptionsPass(const TestOptionsPass &) {}
  TestOptionsPass(const Options &options) {
-    listOption->assign(options.listOption.begin(), options.listOption.end());
-    stringOption.setValue(options.stringOption);
-    stringListOption->assign(options.stringListOption.begin(),
-                             options.stringListOption.end());
+    listOption = options.listOption;
+    stringOption = options.stringOption;
+    stringListOption = options.stringListOption;
  }

  void runOnFunction() final {}