[mlir][SCF] Generalize AffineMinSCFCanonicalization to min/max ops

* Add support for affine.max ops to SCF loop peeling pattern. * Add support for affine.max ops to `AffineMinSCFCanonicalizationPattern`. * Rename `AffineMinSCFCanonicalizationPattern` to `AffineOpSCFCanonicalizationPattern`. * Rename `AffineMinSCFCanonicalization` pass to `SCFAffineOpCanonicalization`. Differential Revision: https://reviews.llvm.org/D108009
2021-08-25 10:28:01 +09:00 · 2021-08-25 10:28:01 +09:00 · a9cff97f94
parent 90e0c657b7
commit a9cff97f94
6 changed files with 219 additions and 128 deletions
--- a/mlir/include/mlir/Dialect/SCF/Passes.h
+++ b/mlir/include/mlir/Dialect/SCF/Passes.h
@ -28,9 +28,9 @@ std::unique_ptr<Pass> createForLoopSpecializationPass();
 /// better vectorization.
 std::unique_ptr<Pass> createForLoopPeelingPass();

-/// Creates a pass that canonicalizes affine.min ops in scf.for loops with
-/// known lower and upper bounds.
-std::unique_ptr<Pass> createAffineMinSCFCanonicalizationPass();
+/// Creates a pass that canonicalizes affine.min and affine.max operations
+/// inside of scf.for loops with known lower and upper bounds.
+std::unique_ptr<Pass> createSCFAffineOpCanonicalizationPass();

 /// Creates a loop fusion pass which fuses parallel loops.
 std::unique_ptr<Pass> createParallelLoopFusionPass();
--- a/mlir/include/mlir/Dialect/SCF/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Passes.td
@ -19,11 +19,11 @@ def SCFBufferize : FunctionPass<"scf-bufferize"> {

 // Note: Making this a canonicalization pattern would require a dependency
 // of the SCF dialect on the Affine dialect or vice versa.
-def AffineMinSCFCanonicalization
-    : FunctionPass<"canonicalize-scf-affine-min"> {
-  let summary = "Canonicalize affine.min ops in the context of SCF loops with "
-                "known bounds";
-  let constructor = "mlir::createAffineMinSCFCanonicalizationPass()";
+def SCFAffineOpCanonicalization
+    : FunctionPass<"canonicalize-scf-affine-op"> {
+  let summary = "Canonicalize affine.min and affine.max ops in the context of "
+                "SCF loops with known bounds";
+  let constructor = "mlir::createSCFAffineOpCanonicalizationPass()";
  let dependentDialects = ["AffineDialect"];
 }

--- a/mlir/include/mlir/Dialect/SCF/Transforms.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms.h
@ -18,7 +18,7 @@

 namespace mlir {

-class AffineMinOp;
+class AffineMap;
 class ConversionTarget;
 struct LogicalResult;
 class MLIRContext;
@ -29,6 +29,7 @@ class RewritePatternSet;
 using OwningRewritePatternList = RewritePatternSet;
 class Operation;
 class Value;
+class ValueRange;

 namespace scf {

@ -37,20 +38,28 @@ class ForOp;
 class ParallelOp;
 class ForOp;

-/// Try to canonicalize an affine.min operation in the context of `for` loops
-/// with a known range.
+/// Match "for loop"-like operations: If the first parameter is an iteration
+/// variable, return lower/upper bounds via the second/third parameter and the
+/// step size via the last parameter. The function should return `success` in
+/// that case. If the first parameter is not an iteration variable, return
+/// `failure`.
+using LoopMatcherFn =
+    function_ref<LogicalResult(Value, Value &, Value &, Value &)>;
+
+/// Try to canonicalize an min/max operations in the context of for `loops` with
+/// a known range.
 ///
-/// `loopMatcher` is used to retrieve loop bounds and step size for a given
-/// iteration variable: If the first parameter is an iteration variable, return
-/// lower/upper bounds via the second/third parameter and the step size via the
-/// last parameter. The function should return `success` in that case. If the
-/// first parameter is not an iteration variable, return `failure`.
+/// `map` is the body of the min/max operation and `operands` are the SSA values
+/// that the dimensions and symbols are bound to; dimensions are listed first.
+/// If `isMin`, the operation is a min operation; otherwise, a max operation.
+/// `loopMatcher` is used to retrieve loop bounds and the step size for a given
+/// iteration variable.
 ///
 /// Note: `loopMatcher` allows this function to be used with any "for loop"-like
 /// operation (scf.for, scf.parallel and even ops defined in other dialects).
-LogicalResult canonicalizeAffineMinOpInLoop(
-    AffineMinOp minOp, RewriterBase &rewriter,
-    function_ref<LogicalResult(Value, Value &, Value &, Value &)> loopMatcher);
+LogicalResult canonicalizeMinMaxOpInLoop(RewriterBase &rewriter, Operation *op,
+                                         AffineMap map, ValueRange operands,
+                                         bool isMin, LoopMatcherFn loopMatcher);

 /// Fuses all adjacent scf.parallel operations with identical bounds and step
 /// into one scf.parallel operations. Uses a naive aliasing and dependency
@ -85,10 +94,10 @@ void naivelyFuseParallelOps(Region &region);
 /// ```
 ///
 /// After loop peeling, this function tries to simplify/canonicalize affine.min
-/// operations in the body of the loop and the scf.if, taking advantage of the
-/// fact that every iteration of the peeled loop is a "full" iteration. This
-/// canonicalization is expected to enable further canonicalization
-/// opportunities through other patterns.
+/// and affine.max ops in the body of the loop and the scf.if, taking advantage
+/// of the fact that the peeled loop has only "full" iterations and the scf.if
+/// is always a partial iteration (if any). This canonicalization is expected to
+/// enable further canonicalization opportunities through other patterns.
 ///
 /// The return value indicates whether the loop was rewritten or not. Loops are
 /// not rewritten if:
@ -168,7 +177,7 @@ void populateSCFLoopPipeliningPatterns(RewritePatternSet &patterns,
                                       const PipeliningOption &options);

 /// Populate patterns for canonicalizing operations inside SCF loop bodies.
-/// At the moment, only affine.min computations with iteration variables,
+/// At the moment, only affine.min/max computations with iteration variables,
 /// loop bounds and loop steps are canonicalized.
 void populateSCFLoopBodyCanonicalizationPatterns(RewritePatternSet &patterns);

--- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
@ -190,76 +190,80 @@ static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
  return constraints.addBound(type, pos, alignedMap);
 }

-/// This function tries to canonicalize affine.min operations by proving that
-/// its value is bounded by the same lower and upper bound. In that case, the
+/// This function tries to canonicalize min/max operations by proving that their
+/// value is bounded by the same lower and upper bound. In that case, the
 /// operation can be folded away.
 ///
 /// Bounds are computed by FlatAffineValueConstraints. Invariants required for
 /// finding/proving bounds should be supplied via `constraints`.
 ///
-/// 1. Add dimensions for `minOp` and `minOpUb` (upper bound of `minOp`).
-/// 2. Compute an upper bound of `minOp` and bind it to `minOpUb`. SSA values
-///    that are used in `minOp` but are not part of `dims`, are added as extra
-///    symbols to the constraint set.
-/// 3. For each result of `minOp`: Add result as a dimension `r_i`. Prove that
-///    r_i >= minOpUb. If this is the case, ub(minOp) == lb(minOp) and `minOp`
-///    can be replaced with that bound.
+/// 1. Add dimensions for `op` and `opBound` (lower or upper bound of `op`).
+/// 2. Compute an upper bound of `op` (in case of `isMin`) or a lower bound (in
+///    case of `!isMin`) and bind it to `opBound`. SSA values that are used in
+///    `op` but are not part of `constraints`, are added as extra symbols.
+/// 3. For each result of `op`: Add result as a dimension `r_i`. Prove that:
+///    * If `isMin`: r_i >= opBound
+///    * If `isMax`: r_i <= opBound
+///    If this is the case, ub(op) == lb(op).
+/// 4. Replace `op` with `opBound`.
 ///
 /// In summary, the following constraints are added throughout this function.
 /// Note: `invar` are dimensions added by the caller to express the invariants.
+/// (Showing only the case where `isMin`.)
 ///
-///  invar | minOp | minOpUb | r_i | extra syms... | const |           eq/ineq
+///  invar |    op | opBound | r_i | extra syms... | const |           eq/ineq
 ///  ------+-------+---------+-----+---------------+-------+-------------------
 ///   (various eq./ineq. constraining `invar`, added by the caller)
 ///    ... |     0 |       0 |   0 |             0 |   ... |               ...
 ///  ------+-------+---------+-----+---------------+-------+-------------------
-///   (various ineq. constraining `minOp` in terms of `minOp` operands (`invar`
-///    and extra `minOp` operands "extra syms" that are not in `invar`)).
+///   (various ineq. constraining `op` in terms of `op` operands (`invar` and
+///    extra `op` operands "extra syms" that are not in `invar`)).
 ///    ... |    -1 |       0 |   0 |           ... |   ... |              >= 0
 ///  ------+-------+---------+-----+---------------+-------+-------------------
-///   (set `minOpUb` to `minOp` upper bound in terms of `invar` and extra syms)
+///   (set `opBound` to `op` upper bound in terms of `invar` and "extra syms")
 ///    ... |     0 |      -1 |   0 |           ... |   ... |               = 0
 ///  ------+-------+---------+-----+---------------+-------+-------------------
-///   (for each `minOp` map result r_i: copy previous constraints, set r_i to
-///    corresponding map result, prove r_i >= minOpUb via contradiction)
+///   (for each `op` map result r_i: set r_i to corresponding map result,
+///    prove that r_i >= minOpUb via contradiction)
 ///    ... |     0 |       0 |  -1 |           ... |   ... |               = 0
 ///      0 |     0 |       1 |  -1 |             0 |    -1 |              >= 0
 ///
 static LogicalResult
-canonicalizeAffineMinOp(RewriterBase &rewriter, AffineMinOp minOp,
-                        FlatAffineValueConstraints constraints) {
+canonicalizeMinMaxOp(RewriterBase &rewriter, Operation *op, AffineMap map,
+                     ValueRange operands, bool isMin,
+                     FlatAffineValueConstraints constraints) {
  RewriterBase::InsertionGuard guard(rewriter);
-  AffineMap minOpMap = minOp.getAffineMap();
-  unsigned numResults = minOpMap.getNumResults();
+  unsigned numResults = map.getNumResults();

  // Add a few extra dimensions.
-  unsigned dimMinOp = constraints.addDimId();   // `minOp`
-  unsigned dimMinOpUb = constraints.addDimId(); // `minOp` upper bound
+  unsigned dimOp = constraints.addDimId();      // `op`
+  unsigned dimOpBound = constraints.addDimId(); // `op` lower/upper bound
  unsigned resultDimStart = constraints.getNumDimIds();
  for (unsigned i = 0; i < numResults; ++i)
    constraints.addDimId();

-  // Add an inequality for each result expr_i of minOpMap: minOp <= expr_i
-  if (failed(alignAndAddBound(constraints, FlatAffineConstraints::UB, dimMinOp,
-                              minOpMap, minOp.operands())))
+  // Add an inequality for each result expr_i of map:
+  // isMin: op <= expr_i, !isMin: op >= expr_i
+  auto boundType =
+      isMin ? FlatAffineConstraints::UB : FlatAffineConstraints::LB;
+  if (failed(alignAndAddBound(constraints, boundType, dimOp, map, operands)))
    return failure();

-  // Try to compute an upper bound for minOp, expressed in terms of the other
+  // Try to compute a lower/upper bound for op, expressed in terms of the other
  // `dims` and extra symbols.
-  SmallVector<AffineMap> minOpValLb(1), minOpValUb(1);
-  constraints.getSliceBounds(dimMinOp, 1, minOp.getContext(), &minOpValLb,
-                             &minOpValUb);
+  SmallVector<AffineMap> opLb(1), opUb(1);
+  constraints.getSliceBounds(dimOp, 1, rewriter.getContext(), &opLb, &opUb);
+  AffineMap boundMap = isMin ? opUb[0] : opLb[0];
  // TODO: `getSliceBounds` may return multiple bounds at the moment. This is
  // a TODO of `getSliceBounds` and not handled here.
-  if (!minOpValUb[0] || minOpValUb[0].getNumResults() != 1)
-    return failure(); // No or multiple upper bounds found.
+  if (!boundMap || boundMap.getNumResults() != 1)
+    return failure(); // No or multiple bounds found.

-  // Add an equality: dimMinOpUb = minOpValUb[0]
-  // Add back dimension for minOp. (Was removed by `getSliceBounds`.)
-  AffineMap alignedUbMap = minOpValUb[0].shiftDims(/*shift=*/1,
-                                                   /*offset=*/dimMinOp);
-  if (failed(constraints.addBound(FlatAffineConstraints::EQ, dimMinOpUb,
-                                  alignedUbMap)))
+  // Add an equality: Set dimOpBound to computed bound.
+  // Add back dimension for op. (Was removed by `getSliceBounds`.)
+  AffineMap alignedBoundMap = boundMap.shiftDims(/*shift=*/1, /*offset=*/dimOp);
+  if (failed(constraints.addBound(FlatAffineConstraints::EQ, dimOpBound,
+                                  alignedBoundMap)))
    return failure();

  // If the constraint system is empty, there is an inconsistency. (E.g., this
@ -267,12 +271,13 @@ canonicalizeAffineMinOp(RewriterBase &rewriter, AffineMinOp minOp,
  if (constraints.isEmpty())
    return failure();

-  // Prove that each result of minOpMap has a lower bound that is equal to (or
-  // greater than) the upper bound of minOp (`kDimMinOpUb`). In that case,
-  // minOp can be replaced with the bound. I.e., prove that for each result
+  // In the case of `isMin` (`!isMin` is inversed):
+  // Prove that each result of `map` has a lower bound that is equal to (or
+  // greater than) the upper bound of `op` (`dimOpBound`). In that case, `op`
+  // can be replaced with the bound. I.e., prove that for each result
  // expr_i (represented by dimension r_i):
  //
-  // r_i >= minOpUb
+  // r_i >= opBound
  //
  // To prove this inequality, add its negation to the constraint set and prove
  // that the constraint set is empty.
@ -284,33 +289,35 @@ canonicalizeAffineMinOp(RewriterBase &rewriter, AffineMinOp minOp,
    // minOp <= expr_i. However, then we run the risk that `getSliceBounds`
    // computes minOpUb in terms of r_i dims, which is not desired.
    if (failed(alignAndAddBound(newConstr, FlatAffineConstraints::EQ, i,
-                                minOpMap.getSubMap({i - resultDimStart}),
-                                minOp.operands())))
+                                map.getSubMap({i - resultDimStart}), operands)))
      return failure();

-    // Add inequality: r_i < minOpUb (equiv.: minOpUb - r_i - 1 >= 0)
+    // If `isMin`:  Add inequality: r_i < opBound
+    //              equiv.: opBound - r_i - 1 >= 0
+    // If `!isMin`: Add inequality: r_i > opBound
+    //              equiv.: -opBound + r_i - 1 >= 0
    SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
-    ineq[dimMinOpUb] = 1;
-    ineq[i] = -1;
+    ineq[dimOpBound] = isMin ? 1 : -1;
+    ineq[i] = isMin ? -1 : 1;
    ineq[newConstr.getNumCols() - 1] = -1;
    newConstr.addInequality(ineq);
    if (!newConstr.isEmpty())
      return failure();
  }

-  // Lower and upper bound of `minOp` are equal. Replace `minOp` with its bound.
-  AffineMap newMap = alignedUbMap;
+  // Lower and upper bound of `op` are equal. Replace `minOp` with its bound.
+  AffineMap newMap = alignedBoundMap;
  SmallVector<Value> newOperands;
  unpackOptionalValues(constraints.getMaybeDimAndSymbolValues(), newOperands);
  mlir::canonicalizeMapAndOperands(&newMap, &newOperands);
-  rewriter.setInsertionPoint(minOp);
-  rewriter.replaceOpWithNewOp<AffineApplyOp>(minOp, newMap, newOperands);
+  rewriter.setInsertionPoint(op);
+  rewriter.replaceOpWithNewOp<AffineApplyOp>(op, newMap, newOperands);
  return success();
 }

-/// Try to simplify an affine.min operation `minOp` after loop peeling. This
-/// function detects affine.min operations such as (ub is the previous upper
-/// bound of the unpeeled loop):
+/// Try to simplify a min/max operation `op` after loop peeling. This function
+/// can simplify min/max operations such as (ub is the previous upper bound of
+/// the unpeeled loop):
 /// ```
 /// #map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
 /// %r = affine.min #affine.min #map(%iv)[%step, %ub]
@ -319,19 +326,24 @@ canonicalizeAffineMinOp(RewriterBase &rewriter, AffineMinOp minOp,
 /// ```
 /// %r = %step
 /// ```
-/// affine.min operations inside the generated scf.if operation are rewritten in
+/// min/max operations inside the generated scf.if operation are rewritten in
 /// a similar way.
 ///
 /// This function builds up a set of constraints, capable of proving that:
 /// * Inside the peeled loop: min(step, ub - iv) == step
 /// * Inside the scf.if operation: min(step, ub - iv) == ub - iv
 ///
+/// Returns `success` if the given operation was replaced by a new operation;
+/// `failure` otherwise.
+///
 /// Note: `ub` is the previous upper bound of the loop (before peeling).
-/// `insideLoop` must be true for affine.min ops inside the loop and false for
-/// affine.min ops inside the scf.for op.
-static LogicalResult rewritePeeledAffineOp(RewriterBase &rewriter,
-                                           AffineMinOp minOp, Value iv,
-                                           Value ub, Value step,
+/// `insideLoop` must be true for min/max ops inside the loop and false for
+/// affine.min ops inside the scf.for op. For an explanation of the other
+/// parameters, see comment of `canonicalizeMinMaxOpInLoop`.
+static LogicalResult rewritePeeledMinMaxOp(RewriterBase &rewriter,
+                                           Operation *op, AffineMap map,
+                                           ValueRange operands, bool isMin,
+                                           Value iv, Value ub, Value step,
                                           bool insideLoop) {
  FlatAffineValueConstraints constraints;
  constraints.addDimId(0, iv);
@ -358,7 +370,23 @@ static LogicalResult rewritePeeledAffineOp(RewriterBase &rewriter,
    constraints.addInequality({1, -1, 1, -1});
  }

-  return canonicalizeAffineMinOp(rewriter, minOp, constraints);
+  return canonicalizeMinMaxOp(rewriter, op, map, operands, isMin, constraints);
+}
+
+template <typename OpTy, bool IsMin>
+static void
+rewriteAffineOpAfterPeeling(RewriterBase &rewriter, ForOp forOp, scf::IfOp ifOp,
+                            Value iv, Value splitBound, Value ub, Value step) {
+  forOp.walk([&](OpTy affineOp) {
+    (void)rewritePeeledMinMaxOp(rewriter, affineOp, affineOp.getAffineMap(),
+                                affineOp.operands(), IsMin, iv, ub, step,
+                                /*insideLoop=*/true);
+  });
+  ifOp.walk([&](OpTy affineOp) {
+    (void)rewritePeeledMinMaxOp(rewriter, affineOp, affineOp.getAffineMap(),
+                                affineOp.operands(), IsMin, splitBound, ub,
+                                step, /*insideLoop=*/false);
+  });
 }

 LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter,
@ -369,21 +397,18 @@ LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter,
  if (failed(peelForLoop(rewriter, forOp, ifOp, splitBound)))
    return failure();

-  // Rewrite affine.min ops.
-  forOp.walk([&](AffineMinOp minOp) {
-    (void)rewritePeeledAffineOp(rewriter, minOp, forOp.getInductionVar(), ub,
-                                forOp.step(), /*insideLoop=*/true);
-  });
-  ifOp.walk([&](AffineMinOp minOp) {
-    (void)rewritePeeledAffineOp(rewriter, minOp, splitBound, ub, forOp.step(),
-                                /*insideLoop=*/false);
-  });
+  // Rewrite affine.min and affine.max ops.
+  Value iv = forOp.getInductionVar(), step = forOp.step();
+  rewriteAffineOpAfterPeeling<AffineMinOp, /*IsMin=*/true>(
+      rewriter, forOp, ifOp, iv, splitBound, ub, step);
+  rewriteAffineOpAfterPeeling<AffineMaxOp, /*IsMin=*/false>(
+      rewriter, forOp, ifOp, iv, splitBound, ub, step);

  return success();
 }

-/// Canonicalize AffineMinOp operations in the context of for loops with a known
-/// range. Call `canonicalizeAffineMinOp` and add the following constraints to
+/// Canonicalize min/max operations in the context of for loops with a known
+/// range. Call `canonicalizeMinMaxOp` and add the following constraints to
 /// the constraint system (along with the missing dimensions):
 ///
 /// * iv >= lb
@ -391,14 +416,15 @@ LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter,
 ///
 /// Note: Due to limitations of FlatAffineConstraints, only constant step sizes
 /// are currently supported.
-LogicalResult mlir::scf::canonicalizeAffineMinOpInLoop(
-    AffineMinOp minOp, RewriterBase &rewriter,
-    function_ref<LogicalResult(Value, Value &, Value &, Value &)> loopMatcher) {
+LogicalResult
+mlir::scf::canonicalizeMinMaxOpInLoop(RewriterBase &rewriter, Operation *op,
+                                      AffineMap map, ValueRange operands,
+                                      bool isMin, LoopMatcherFn loopMatcher) {
  FlatAffineValueConstraints constraints;
  DenseSet<Value> allIvs;

  // Find all iteration variables among `minOp`'s operands add constrain them.
-  for (Value operand : minOp.operands()) {
+  for (Value operand : operands) {
    // Skip duplicate ivs.
    if (llvm::find(allIvs, operand) != allIvs.end())
      continue;
@ -450,7 +476,7 @@ LogicalResult mlir::scf::canonicalizeAffineMinOpInLoop(
      return failure();
  }

-  return canonicalizeAffineMinOp(rewriter, minOp, constraints);
+  return canonicalizeMinMaxOp(rewriter, op, map, operands, isMin, constraints);
 }

 static constexpr char kPeeledLoopLabel[] = "__peeled_loop__";
@ -495,13 +521,13 @@ struct ForLoopPeelingPattern : public OpRewritePattern<ForOp> {
  bool skipPartial;
 };

-/// Canonicalize AffineMinOp operations in the context of scf.for and
-/// scf.parallel loops with a known range.
-struct AffineMinSCFCanonicalizationPattern
-    : public OpRewritePattern<AffineMinOp> {
-  using OpRewritePattern<AffineMinOp>::OpRewritePattern;
+/// Canonicalize AffineMinOp/AffineMaxOp operations in the context of scf.for
+/// and scf.parallel loops with a known range.
+template <typename OpTy, bool IsMin>
+struct AffineOpSCFCanonicalizationPattern : public OpRewritePattern<OpTy> {
+  using OpRewritePattern<OpTy>::OpRewritePattern;

-  LogicalResult matchAndRewrite(AffineMinOp minOp,
+  LogicalResult matchAndRewrite(OpTy op,
                                PatternRewriter &rewriter) const override {
    auto loopMatcher = [](Value iv, Value &lb, Value &ub, Value &step) {
      if (scf::ForOp forOp = scf::getForInductionVarOwner(iv)) {
@ -524,7 +550,8 @@ struct AffineMinSCFCanonicalizationPattern
      return failure();
    };

-    return scf::canonicalizeAffineMinOpInLoop(minOp, rewriter, loopMatcher);
+    return scf::canonicalizeMinMaxOpInLoop(rewriter, op, op.getAffineMap(),
+                                           op.operands(), IsMin, loopMatcher);
  }
 };
 } // namespace
@ -561,21 +588,21 @@ struct ForLoopPeeling : public SCFForLoopPeelingBase<ForLoopPeeling> {
  }
 };

-struct AffineMinSCFCanonicalization
-    : public AffineMinSCFCanonicalizationBase<AffineMinSCFCanonicalization> {
+struct SCFAffineOpCanonicalization
+    : public SCFAffineOpCanonicalizationBase<SCFAffineOpCanonicalization> {
  void runOnFunction() override {
    FuncOp funcOp = getFunction();
    MLIRContext *ctx = funcOp.getContext();
    RewritePatternSet patterns(ctx);
-    patterns.add<AffineMinSCFCanonicalizationPattern>(ctx);
+    scf::populateSCFLoopBodyCanonicalizationPatterns(patterns);
    if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns))))
      signalPassFailure();
  }
 };
 } // namespace

-std::unique_ptr<Pass> mlir::createAffineMinSCFCanonicalizationPass() {
-  return std::make_unique<AffineMinSCFCanonicalization>();
+std::unique_ptr<Pass> mlir::createSCFAffineOpCanonicalizationPass() {
+  return std::make_unique<SCFAffineOpCanonicalization>();
 }

 std::unique_ptr<Pass> mlir::createParallelLoopSpecializationPass() {
@ -592,5 +619,9 @@ std::unique_ptr<Pass> mlir::createForLoopPeelingPass() {

 void mlir::scf::populateSCFLoopBodyCanonicalizationPatterns(
    RewritePatternSet &patterns) {
-  patterns.insert<AffineMinSCFCanonicalizationPattern>(patterns.getContext());
+  MLIRContext *ctx = patterns.getContext();
+  patterns
+      .insert<AffineOpSCFCanonicalizationPattern<AffineMinOp, /*IsMin=*/true>,
+              AffineOpSCFCanonicalizationPattern<AffineMaxOp, /*IsMin=*/false>>(
+          ctx);
 }
--- a/mlir/test/Dialect/SCF/canonicalize-scf-affine-min.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize-scf-affine-min.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -canonicalize-scf-affine-min -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -canonicalize-scf-affine-op -split-input-file | FileCheck %s

 // CHECK-LABEL: func @scf_for_canonicalize_min
 //       CHECK:   %[[C2:.*]] = constant 2 : i64
@ -19,6 +19,45 @@ func @scf_for_canonicalize_min(%A : memref<i64>) {

 // -----

+// CHECK-LABEL: func @scf_for_canonicalize_max
+//       CHECK:   %[[Cneg2:.*]] = constant -2 : i64
+//       CHECK:   scf.for
+//       CHECK:     memref.store %[[Cneg2]], %{{.*}}[] : memref<i64>
+func @scf_for_canonicalize_max(%A : memref<i64>) {
+  %c0 = constant 0 : index
+  %c2 = constant 2 : index
+  %c4 = constant 4 : index
+
+  scf.for %i = %c0 to %c4 step %c2 {
+    %1 = affine.max affine_map<(d0, d1)[] -> (-2, -(d1 - d0))> (%i, %c4)
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_for_max_not_canonicalizable
+//       CHECK:   scf.for
+//       CHECK:     affine.max
+//       CHECK:     index_cast
+func @scf_for_max_not_canonicalizable(%A : memref<i64>) {
+  %c0 = constant 0 : index
+  %c2 = constant 2 : index
+  %c3 = constant 3 : index
+  %c4 = constant 4 : index
+
+  scf.for %i = %c0 to %c4 step %c2 {
+    %1 = affine.max affine_map<(d0, d1)[] -> (-2, -(d1 - d0))> (%i, %c3)
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+  return
+}
+
+// -----
+
 // CHECK-LABEL: func @scf_for_loop_nest_canonicalize_min
 //       CHECK:   %[[C5:.*]] = constant 5 : i64
 //       CHECK:   scf.for
--- a/mlir/test/Dialect/SCF/for-loop-peeling.mlir
+++ b/mlir/test/Dialect/SCF/for-loop-peeling.mlir
@ -149,18 +149,20 @@ func @no_loop_results(%ub : index, %d : memref<i32>) {

 // -----

-// Test rewriting of affine.min ops. Make sure that more general cases than
+// Test rewriting of affine.min/max ops. Make sure that more general cases than
 // the ones above are successfully rewritten. Also make sure that the pattern
-// does not rewrite affine.min ops that should not be rewritten.
+// does not rewrite ops that should not be rewritten.

 //  CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
 //  CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1 - 1)>
 //  CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0, s1, s2] -> (s0, -d0 + s1, s2)>
-//  CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)>
-//  CHECK-DAG: #[[MAP5:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 + 1)>
-//  CHECK-DAG: #[[MAP6:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 - 1)>
-//  CHECK-DAG: #[[MAP7:.*]] = affine_map<()[s0, s1, s2, s3] -> (s0, s2 - (s2 - (s2 - s1) mod s0), s3)>
-//      CHECK: func @test_affine_min_rewrite(
+//  CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0] -> (-s0)>
+//  CHECK-DAG: #[[MAP5:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)>
+//  CHECK-DAG: #[[MAP6:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 + 1)>
+//  CHECK-DAG: #[[MAP7:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 - 1)>
+//  CHECK-DAG: #[[MAP8:.*]] = affine_map<()[s0, s1, s2, s3] -> (s0, s2 - (s2 - (s2 - s1) mod s0), s3)>
+//  CHECK-DAG: #[[MAP9:.*]] = affine_map<()[s0, s1, s2] -> (s0 - (s0 - s1) mod s2 - s0)>
+//      CHECK: func @test_affine_op_rewrite(
 // CHECK-SAME:     %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index,
 // CHECK-SAME:     %[[MEMREF:.*]]: memref<?xindex>, %[[SOME_VAL:.*]]: index
 //      CHECK:   scf.for %[[IV:.*]] = %[[LB]] to %{{.*}} step %[[STEP]] {
@ -174,31 +176,37 @@ func @no_loop_results(%ub : index, %d : memref<i32>) {
 //      CHECK:     memref.store %[[RES3]]
 //      CHECK:     %[[RES4:.*]] = affine.min #[[MAP3]](%[[IV]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]]
 //      CHECK:     memref.store %[[RES4]]
+//      CHECK:     %[[RES5:.*]] = affine.apply #[[MAP4]]()[%[[STEP]]]
+//      CHECK:     memref.store %[[RES5]]
 //      CHECK:   }
 //      CHECK:   scf.if {{.*}} {
-//      CHECK:     %[[RES_IF_0:.*]] = affine.apply #[[MAP4]]()[%[[UB]], %[[LB]], %[[STEP]]]
+//      CHECK:     %[[RES_IF_0:.*]] = affine.apply #[[MAP5]]()[%[[UB]], %[[LB]], %[[STEP]]]
 //      CHECK:     memref.store %[[RES_IF_0]]
-//      CHECK:     %[[RES_IF_1:.*]] = affine.apply #[[MAP5]]()[%[[UB]], %[[LB]], %[[STEP]]]
+//      CHECK:     %[[RES_IF_1:.*]] = affine.apply #[[MAP6]]()[%[[UB]], %[[LB]], %[[STEP]]]
 //      CHECK:     memref.store %[[RES_IF_1]]
-//      CHECK:     %[[RES_IF_2:.*]] = affine.apply #[[MAP5]]()[%[[UB]], %[[LB]], %[[STEP]]]
+//      CHECK:     %[[RES_IF_2:.*]] = affine.apply #[[MAP6]]()[%[[UB]], %[[LB]], %[[STEP]]]
 //      CHECK:     memref.store %[[RES_IF_2]]
-//      CHECK:     %[[RES_IF_3:.*]] = affine.apply #[[MAP6]]()[%[[UB]], %[[LB]], %[[STEP]]]
+//      CHECK:     %[[RES_IF_3:.*]] = affine.apply #[[MAP7]]()[%[[UB]], %[[LB]], %[[STEP]]]
 //      CHECK:     memref.store %[[RES_IF_3]]
-//      CHECK:     %[[RES_IF_4:.*]] = affine.min #[[MAP7]]()[%[[STEP]], %[[LB]], %[[UB]], %[[SOME_VAL]]]
+//      CHECK:     %[[RES_IF_4:.*]] = affine.min #[[MAP8]]()[%[[STEP]], %[[LB]], %[[UB]], %[[SOME_VAL]]]
 //      CHECK:     memref.store %[[RES_IF_4]]
+//      CHECK:     %[[RES_IF_5:.*]] = affine.apply #[[MAP9]]()[%[[UB]], %[[LB]], %[[STEP]]]
+//      CHECK:     memref.store %[[RES_IF_5]]
 #map0 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
 #map1 = affine_map<(d0, d1)[s0] -> (d0 - d1 + 1, s0)>
 #map2 = affine_map<(d0, d1)[s0] -> (s0 + 1, d0 - d1 + 1)>
 #map3 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1 - 1)>
 #map4 = affine_map<(d0, d1, d2)[s0] -> (s0, d0 - d1, d2)>
-func @test_affine_min_rewrite(%lb : index, %ub: index,
-                              %step: index, %d : memref<?xindex>,
-                              %some_val: index) {
+#map5 = affine_map<(d0, d1)[s0] -> (-s0, -d0 + d1)>
+func @test_affine_op_rewrite(%lb : index, %ub: index,
+                             %step: index, %d : memref<?xindex>,
+                             %some_val: index) {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
  %c2 = constant 2 : index
  %c3 = constant 3 : index
  %c4 = constant 4 : index
+  %c5 = constant 5 : index
  scf.for %iv = %lb to %ub step %step {
    // Most common case: Rewrite min(%ub - %iv, %step) to %step.
    %m0 = affine.min #map0(%ub, %iv)[%step]
@ -221,6 +229,10 @@ func @test_affine_min_rewrite(%lb : index, %ub: index,
    // of %some_val is unknown.
    %m4 = affine.min #map4(%ub, %iv, %some_val)[%step]
    memref.store %m4, %d[%c4] : memref<?xindex>
+
+    // Rewrite max(-%ub + %iv, -%step) to -%ub + %iv (and -%step in the scf.if).
+    %m5 = affine.max #map5(%ub, %iv)[%step]
+    memref.store %m5, %d[%c5] : memref<?xindex>
  }
  return
 }