[mlir][scf] Simplify affine.min ops after loop peeling

Simplify affine.min ops, enabling various other canonicalizations inside the peeled loop body.

affine.min ops such as:
```
map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
%r = affine.min #affine.min #map(%iv)[%step, %ub]
```
are rewritten them into (in the case the peeled loop):
```
%r = %step
```

To determine how an affine.min op should be rewritten and to prove its correctness, FlatAffineConstraints is utilized.

Differential Revision: https://reviews.llvm.org/D107222
This commit is contained in:
Matthias Springer 2021-08-19 17:08:21 +09:00
parent 3330b2532f
commit 8e8b70aa84
10 changed files with 439 additions and 59 deletions

View File

@ -250,6 +250,12 @@ public:
void addSymbolId(unsigned pos);
void addLocalId(unsigned pos);
virtual unsigned addId(IdKind kind, unsigned pos);
/// Add identifiers of the specified kind at the end of the table. Return the
/// position of the column. The coefficient column corresponding to the
/// added identifier is initialized to zero.
unsigned addDimId();
unsigned addSymbolId();
unsigned addLocalId();
/// Composes an affine map whose dimensions and symbols match one to one with
/// the dimensions and symbols of this FlatAffineConstraints. The results of
@ -659,6 +665,12 @@ public:
using FlatAffineConstraints::addSymbolId;
unsigned addId(IdKind kind, unsigned pos) override;
unsigned addId(IdKind kind, unsigned pos, Value val);
/// Add identifiers of the specified kind at the end of the table. Return the
/// position of the column. The coefficient column corresponding to the
/// added identifier is initialized to zero. `val` is the Value corresponding
/// to the identifier that can optionally be provided.
unsigned addDimId(Value val);
unsigned addSymbolId(Value val);
/// Add the specified values as a dim or symbol id depending on its nature, if
/// it already doesn't exist in the system. `val` has to be either a terminal
@ -766,6 +778,18 @@ public:
return {values.data(), values.size()};
}
inline ArrayRef<Optional<Value>> getMaybeDimValues() const {
return {values.data(), getNumDimIds()};
}
inline ArrayRef<Optional<Value>> getMaybeSymbolValues() const {
return {values.data() + getNumDimIds(), getNumSymbolIds()};
}
inline ArrayRef<Optional<Value>> getMaybeDimAndSymbolValues() const {
return {values.data(), getNumDimIds() + getNumSymbolIds()};
}
/// Sets the Value associated with the pos^th identifier.
inline void setValue(unsigned pos, Value val) {
assert(pos < numIds && "invalid id position");

View File

@ -44,9 +44,10 @@ void naivelyFuseParallelOps(Region &region);
/// by an scf.if for the last (partial) iteration (if any). This transformation
/// is called "loop peeling".
///
/// Other patterns can simplify/canonicalize operations in the body of the loop
/// and the scf.if. This is beneficial for a wide range of transformations such
/// as vectorization or loop tiling.
/// This transformation is beneficial for a wide range of transformations such
/// as vectorization or loop tiling: It enables additional canonicalizations
/// inside the peeled loop body such as rewriting masked loads into unmaked
/// loads.
///
/// E.g., assuming a lower bound of 0 (for illustration purposes):
/// ```
@ -65,11 +66,22 @@ void naivelyFuseParallelOps(Region &region);
/// }
/// ```
///
/// This function rewrites the given scf.for loop in-place and creates a new
/// scf.if operation (returned via `ifOp`) for the last iteration.
/// After loop peeling, this function tries to simplify/canonicalize affine.min
/// operations in the body of the loop and the scf.if, taking advantage of the
/// fact that every iteration of the peeled loop is a "full" iteration. This
/// canonicalization is expected to enable further canonicalization
/// opportunities through other patterns.
///
/// TODO: Simplify affine.min ops inside the new loop/if statement.
LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp);
/// The return value indicates whether the loop was rewritten or not. Loops are
/// not rewritten if:
/// * Loop step size is 1 or
/// * Loop bounds and step size are static, and step already divides the
/// iteration space evenly.
///
/// Note: This function rewrites the given scf.for loop in-place and creates a
/// new scf.if operation for the last iteration. It replaces all uses of the
/// unpeeled loop with the results of the newly generated scf.if.
LogicalResult peelAndCanonicalizeForLoop(RewriterBase &rewriter, ForOp forOp);
/// Tile a parallel loop of the form
/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)

View File

@ -143,12 +143,15 @@ public:
/// `*this` and apply replace with `map` on its subexpressions.
AffineExpr replace(const DenseMap<AffineExpr, AffineExpr> &map) const;
/// Replace dims[0 .. numDims - 1] by dims[shift .. shift + numDims - 1].
AffineExpr shiftDims(unsigned numDims, unsigned shift) const;
/// Replace dims[offset ... numDims)
/// by dims[offset + shift ... shift + numDims).
AffineExpr shiftDims(unsigned numDims, unsigned shift,
unsigned offset = 0) const;
/// Replace symbols[0 .. numSymbols - 1] by
/// symbols[shift .. shift + numSymbols - 1].
AffineExpr shiftSymbols(unsigned numSymbols, unsigned shift) const;
/// Replace symbols[offset ... numSymbols)
/// by symbols[offset + shift ... shift + numSymbols).
AffineExpr shiftSymbols(unsigned numSymbols, unsigned shift,
unsigned offset = 0) const;
AffineExpr operator+(int64_t v) const;
AffineExpr operator+(AffineExpr other) const;

View File

@ -207,24 +207,28 @@ public:
AffineMap replace(const DenseMap<AffineExpr, AffineExpr> &map,
unsigned numResultDims, unsigned numResultSyms) const;
/// Replace dims[0 .. numDims - 1] by dims[shift .. shift + numDims - 1].
AffineMap shiftDims(unsigned shift) const {
return AffineMap::get(
getNumDims() + shift, getNumSymbols(),
llvm::to_vector<4>(llvm::map_range(
getResults(),
[&](AffineExpr e) { return e.shiftDims(getNumDims(), shift); })),
getContext());
/// Replace dims[offset ... numDims)
/// by dims[offset + shift ... shift + numDims).
AffineMap shiftDims(unsigned shift, unsigned offset = 0) const {
assert(offset <= getNumDims());
return AffineMap::get(getNumDims() + shift, getNumSymbols(),
llvm::to_vector<4>(llvm::map_range(
getResults(),
[&](AffineExpr e) {
return e.shiftDims(getNumDims(), shift, offset);
})),
getContext());
}
/// Replace symbols[0 .. numSymbols - 1] by
/// symbols[shift .. shift + numSymbols - 1].
AffineMap shiftSymbols(unsigned shift) const {
/// Replace symbols[offset ... numSymbols)
/// by symbols[offset + shift ... shift + numSymbols).
AffineMap shiftSymbols(unsigned shift, unsigned offset = 0) const {
return AffineMap::get(getNumDims(), getNumSymbols() + shift,
llvm::to_vector<4>(llvm::map_range(
getResults(),
[&](AffineExpr e) {
return e.shiftSymbols(getNumSymbols(), shift);
return e.shiftSymbols(getNumSymbols(), shift,
offset);
})),
getContext());
}

View File

@ -263,22 +263,52 @@ void FlatAffineConstraints::addLocalId(unsigned pos) {
addId(IdKind::Local, pos);
}
unsigned FlatAffineConstraints::addLocalId() {
unsigned pos = getNumLocalIds();
addId(IdKind::Local, pos);
return pos;
}
void FlatAffineConstraints::addDimId(unsigned pos) {
addId(IdKind::Dimension, pos);
}
unsigned FlatAffineConstraints::addDimId() {
unsigned pos = getNumDimIds();
addId(IdKind::Dimension, pos);
return pos;
}
void FlatAffineValueConstraints::addDimId(unsigned pos, Value val) {
addId(IdKind::Dimension, pos, val);
}
unsigned FlatAffineValueConstraints::addDimId(Value val) {
unsigned pos = getNumDimIds();
addId(IdKind::Dimension, pos, val);
return pos;
}
void FlatAffineConstraints::addSymbolId(unsigned pos) {
addId(IdKind::Symbol, pos);
}
unsigned FlatAffineConstraints::addSymbolId() {
unsigned pos = getNumSymbolIds();
addId(IdKind::Symbol, pos);
return pos;
}
void FlatAffineValueConstraints::addSymbolId(unsigned pos, Value val) {
addId(IdKind::Symbol, pos, val);
}
unsigned FlatAffineValueConstraints::addSymbolId(Value val) {
unsigned pos = getNumSymbolIds();
addId(IdKind::Symbol, pos, val);
return pos;
}
unsigned FlatAffineConstraints::addId(IdKind kind, unsigned pos) {
if (kind == IdKind::Dimension)
assert(pos <= getNumDimIds());

View File

@ -357,6 +357,9 @@ static bool isDimOpValidSymbol(OpTy dimOp, Region *region) {
// *) It is a result of the dim op on a memref whose corresponding size is a
// valid symbol.
bool mlir::isValidSymbol(Value value) {
if (!value)
return false;
// The value must be an index type.
if (!value.getType().isIndex())
return false;

View File

@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "PassDetail.h"
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/SCF/Passes.h"
#include "mlir/Dialect/SCF/SCF.h"
@ -97,8 +98,16 @@ static void specializeForLoopForUnrolling(ForOp op) {
/// Rewrite a for loop with bounds/step that potentially do not divide evenly
/// into a for loop where the step divides the iteration space evenly, followed
/// by an scf.if for the last (partial) iteration (if any).
LogicalResult mlir::scf::peelForLoop(RewriterBase &b, ForOp forOp,
scf::IfOp &ifOp) {
///
/// This function rewrites the given scf.for loop in-place and creates a new
/// scf.if operation for the last iteration. It replaces all uses of the
/// unpeeled loop with the results of the newly generated scf.if.
///
/// The newly generated scf.if operation is returned via `ifOp`. The boundary
/// at which the loop is split (new upper bound) is returned via `splitBound`.
/// The return value indicates whether the loop was rewritten or not.
static LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp,
Value &splitBound) {
RewriterBase::InsertionGuard guard(b);
auto lbInt = getConstantIntValue(forOp.lowerBound());
auto ubInt = getConstantIntValue(forOp.upperBound());
@ -117,7 +126,7 @@ LogicalResult mlir::scf::peelForLoop(RewriterBase &b, ForOp forOp,
// New upper bound: %ub - (%ub - %lb) mod %step
auto modMap = AffineMap::get(3, 0, {dim1 - ((dim1 - dim0) % dim2)});
b.setInsertionPoint(forOp);
Value splitBound = b.createOrFold<AffineApplyOp>(
splitBound = b.createOrFold<AffineApplyOp>(
loc, modMap,
ValueRange{forOp.lowerBound(), forOp.upperBound(), forOp.step()});
@ -153,6 +162,226 @@ LogicalResult mlir::scf::peelForLoop(RewriterBase &b, ForOp forOp,
return success();
}
static void unpackOptionalValues(ArrayRef<Optional<Value>> source,
SmallVector<Value> &target) {
target = llvm::to_vector<4>(llvm::map_range(source, [](Optional<Value> val) {
return val.hasValue() ? *val : Value();
}));
}
/// Bound an identifier `pos` in a given FlatAffineValueConstraints with
/// constraints drawn from an affine map. Before adding the constraint, the
/// dimensions/symbols of the affine map are aligned with `constraints`.
/// `operands` are the SSA Value operands used with the affine map.
/// Note: This function adds a new symbol column to the `constraints` for each
/// dimension/symbol that exists in the affine map but not in `constraints`.
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
FlatAffineConstraints::BoundType type,
unsigned pos, AffineMap map,
ValueRange operands) {
SmallVector<Value> dims, syms, newSyms;
unpackOptionalValues(constraints.getMaybeDimValues(), dims);
unpackOptionalValues(constraints.getMaybeSymbolValues(), syms);
AffineMap alignedMap =
alignAffineMapWithValues(map, operands, dims, syms, &newSyms);
for (unsigned i = syms.size(); i < newSyms.size(); ++i)
constraints.addSymbolId(constraints.getNumSymbolIds(), newSyms[i]);
return constraints.addBound(type, pos, alignedMap);
}
/// This function tries to canonicalize affine.min operations by proving that
/// its value is bounded by the same lower and upper bound. In that case, the
/// operation can be folded away.
///
/// Bounds are computed by FlatAffineValueConstraints. Invariants required for
/// finding/proving bounds should be supplied via `constraints`.
///
/// 1. Add dimensions for `minOp` and `minOpUb` (upper bound of `minOp`).
/// 2. Compute an upper bound of `minOp` and bind it to `minOpUb`. SSA values
/// that are used in `minOp` but are not part of `dims`, are added as extra
/// symbols to the constraint set.
/// 3. For each result of `minOp`: Add result as a dimension `r_i`. Prove that
/// r_i >= minOpUb. If this is the case, ub(minOp) == lb(minOp) and `minOp`
/// can be replaced with that bound.
///
/// In summary, the following constraints are added throughout this function.
/// Note: `invar` are dimensions added by the caller to express the invariants.
///
/// invar | minOp | minOpUb | r_i | extra syms... | const | eq/ineq
/// ------+-------+---------+-----+---------------+-------+-------------------
/// (various eq./ineq. constraining `invar`, added by the caller)
/// ... | 0 | 0 | 0 | 0 | ... | ...
/// ------+-------+---------+-----+---------------+-------+-------------------
/// (various ineq. constraining `minOp` in terms of `minOp` operands (`invar`
/// and extra `minOp` operands "extra syms" that are not in `invar`)).
/// ... | -1 | 0 | 0 | ... | ... | >= 0
/// ------+-------+---------+-----+---------------+-------+-------------------
/// (set `minOpUb` to `minOp` upper bound in terms of `invar` and extra syms)
/// ... | 0 | -1 | 0 | ... | ... | = 0
/// ------+-------+---------+-----+---------------+-------+-------------------
/// (for each `minOp` map result r_i: copy previous constraints, set r_i to
/// corresponding map result, prove r_i >= minOpUb via contradiction)
/// ... | 0 | 0 | -1 | ... | ... | = 0
/// 0 | 0 | 1 | -1 | 0 | -1 | >= 0
///
static LogicalResult
canonicalizeAffineMinOp(RewriterBase &rewriter, AffineMinOp minOp,
FlatAffineValueConstraints constraints) {
RewriterBase::InsertionGuard guard(rewriter);
AffineMap minOpMap = minOp.getAffineMap();
unsigned numResults = minOpMap.getNumResults();
// Add a few extra dimensions.
unsigned dimMinOp = constraints.addDimId(); // `minOp`
unsigned dimMinOpUb = constraints.addDimId(); // `minOp` upper bound
unsigned resultDimStart = constraints.getNumDimIds();
for (unsigned i = 0; i < numResults; ++i)
constraints.addDimId();
// Add an inequality for each result expr_i of minOpMap: minOp <= expr_i
if (failed(alignAndAddBound(constraints, FlatAffineConstraints::UB, dimMinOp,
minOpMap, minOp.operands())))
return failure();
// Try to compute an upper bound for minOp, expressed in terms of the other
// `dims` and extra symbols.
SmallVector<AffineMap> minOpValLb(1), minOpValUb(1);
constraints.getSliceBounds(dimMinOp, 1, minOp.getContext(), &minOpValLb,
&minOpValUb);
// TODO: `getSliceBounds` may return multiple bounds at the moment. This is
// a TODO of `getSliceBounds` and not handled here.
if (!minOpValUb[0] || minOpValUb[0].getNumResults() != 1)
return failure(); // No or multiple upper bounds found.
// Add an equality: dimMinOpUb = minOpValUb[0]
// Add back dimension for minOp. (Was removed by `getSliceBounds`.)
AffineMap alignedUbMap = minOpValUb[0].shiftDims(/*shift=*/1,
/*offset=*/dimMinOp);
if (failed(constraints.addBound(FlatAffineConstraints::EQ, dimMinOpUb,
alignedUbMap)))
return failure();
// If the constraint system is empty, there is an inconsistency. (E.g., this
// can happen if loop lb > ub.)
if (constraints.isEmpty())
return failure();
// Prove that each result of minOpMap has a lower bound that is equal to (or
// greater than) the upper bound of minOp (`kDimMinOpUb`). In that case,
// minOp can be replaced with the bound. I.e., prove that for each result
// expr_i (represented by dimension r_i):
//
// r_i >= minOpUb
//
// To prove this inequality, add its negation to the constraint set and prove
// that the constraint set is empty.
for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
FlatAffineValueConstraints newConstr(constraints);
// Add an equality: r_i = expr_i
// Note: These equalities could have been added earlier and used to express
// minOp <= expr_i. However, then we run the risk that `getSliceBounds`
// computes minOpUb in terms of r_i dims, which is not desired.
if (failed(alignAndAddBound(newConstr, FlatAffineConstraints::EQ, i,
minOpMap.getSubMap({i - resultDimStart}),
minOp.operands())))
return failure();
// Add inequality: r_i < minOpUb (equiv.: minOpUb - r_i - 1 >= 0)
SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
ineq[dimMinOpUb] = 1;
ineq[i] = -1;
ineq[newConstr.getNumCols() - 1] = -1;
newConstr.addInequality(ineq);
if (!newConstr.isEmpty())
return failure();
}
// Lower and upper bound of `minOp` are equal. Replace `minOp` with its bound.
AffineMap newMap = alignedUbMap;
SmallVector<Value> newOperands;
unpackOptionalValues(constraints.getMaybeDimAndSymbolValues(), newOperands);
mlir::canonicalizeMapAndOperands(&newMap, &newOperands);
rewriter.setInsertionPoint(minOp);
rewriter.replaceOpWithNewOp<AffineApplyOp>(minOp, newMap, newOperands);
return success();
}
/// Try to simplify an affine.min operation `minOp` after loop peeling. This
/// function detects affine.min operations such as (ub is the previous upper
/// bound of the unpeeled loop):
/// ```
/// #map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
/// %r = affine.min #affine.min #map(%iv)[%step, %ub]
/// ```
/// and rewrites them into (in the case the peeled loop):
/// ```
/// %r = %step
/// ```
/// affine.min operations inside the generated scf.if operation are rewritten in
/// a similar way.
///
/// This function builds up a set of constraints, capable of proving that:
/// * Inside the peeled loop: min(step, ub - iv) == step
/// * Inside the scf.if operation: min(step, ub - iv) == ub - iv
///
/// Note: `ub` is the previous upper bound of the loop (before peeling).
/// `insideLoop` must be true for affine.min ops inside the loop and false for
/// affine.min ops inside the scf.for op.
static LogicalResult rewritePeeledAffineOp(RewriterBase &rewriter,
AffineMinOp minOp, Value iv,
Value ub, Value step,
bool insideLoop) {
FlatAffineValueConstraints constraints;
constraints.addDimId(0, iv);
constraints.addDimId(1, ub);
constraints.addDimId(2, step);
if (auto constUb = getConstantIntValue(ub))
constraints.addBound(FlatAffineConstraints::EQ, 1, *constUb);
if (auto constStep = getConstantIntValue(step))
constraints.addBound(FlatAffineConstraints::EQ, 2, *constStep);
// Add loop peeling invariant. This is the main piece of knowledge that
// enables AffineMinOp simplification.
if (insideLoop) {
// ub - iv >= step (equiv.: -iv + ub - step + 0 >= 0)
// Intuitively: Inside the peeled loop, every iteration is a "full"
// iteration, i.e., step divides the iteration space `ub - lb` evenly.
constraints.addInequality({-1, 1, -1, 0});
} else {
// ub - iv < step (equiv.: iv + -ub + step - 1 >= 0)
// Intuitively: `iv` is the split bound here, i.e., the iteration variable
// value of the very last iteration (in the unpeeled loop). At that point,
// there are less than `step` elements remaining. (Otherwise, the peeled
// loop would run for at least one more iteration.)
constraints.addInequality({1, -1, 1, -1});
}
return canonicalizeAffineMinOp(rewriter, minOp, constraints);
}
LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter,
ForOp forOp) {
Value ub = forOp.upperBound();
scf::IfOp ifOp;
Value splitBound;
if (failed(peelForLoop(rewriter, forOp, ifOp, splitBound)))
return failure();
// Rewrite affine.min ops.
forOp.walk([&](AffineMinOp minOp) {
(void)rewritePeeledAffineOp(rewriter, minOp, forOp.getInductionVar(), ub,
forOp.step(), /*insideLoop=*/true);
});
ifOp.walk([&](AffineMinOp minOp) {
(void)rewritePeeledAffineOp(rewriter, minOp, splitBound, ub, forOp.step(),
/*insideLoop=*/false);
});
return success();
}
static constexpr char kPeeledLoopLabel[] = "__peeled_loop__";
namespace {
@ -163,15 +392,12 @@ struct ForLoopPeelingPattern : public OpRewritePattern<ForOp> {
PatternRewriter &rewriter) const override {
if (forOp->hasAttr(kPeeledLoopLabel))
return failure();
scf::IfOp ifOp;
if (failed(peelForLoop(rewriter, forOp, ifOp)))
if (failed(peelAndCanonicalizeForLoop(rewriter, forOp)))
return failure();
// Apply label, so that the same loop is not rewritten a second time.
rewriter.updateRootInPlace(forOp, [&]() {
forOp->setAttr(kPeeledLoopLabel, rewriter.getUnitAttr());
});
return success();
}
};

View File

@ -101,19 +101,26 @@ AffineExpr::replaceSymbols(ArrayRef<AffineExpr> symReplacements) const {
return replaceDimsAndSymbols({}, symReplacements);
}
/// Replace symbols[0 .. numDims - 1] by symbols[shift .. shift + numDims - 1].
AffineExpr AffineExpr::shiftDims(unsigned numDims, unsigned shift) const {
/// Replace dims[offset ... numDims)
/// by dims[offset + shift ... shift + numDims).
AffineExpr AffineExpr::shiftDims(unsigned numDims, unsigned shift,
unsigned offset) const {
SmallVector<AffineExpr, 4> dims;
for (unsigned idx = 0; idx < numDims; ++idx)
for (unsigned idx = 0; idx < offset; ++idx)
dims.push_back(getAffineDimExpr(idx, getContext()));
for (unsigned idx = offset; idx < numDims; ++idx)
dims.push_back(getAffineDimExpr(idx + shift, getContext()));
return replaceDimsAndSymbols(dims, {});
}
/// Replace symbols[0 .. numSymbols - 1] by
/// symbols[shift .. shift + numSymbols - 1].
AffineExpr AffineExpr::shiftSymbols(unsigned numSymbols, unsigned shift) const {
/// Replace symbols[offset ... numSymbols)
/// by symbols[offset + shift ... shift + numSymbols).
AffineExpr AffineExpr::shiftSymbols(unsigned numSymbols, unsigned shift,
unsigned offset) const {
SmallVector<AffineExpr, 4> symbols;
for (unsigned idx = 0; idx < numSymbols; ++idx)
for (unsigned idx = 0; idx < offset; ++idx)
symbols.push_back(getAffineSymbolExpr(idx, getContext()));
for (unsigned idx = offset; idx < numSymbols; ++idx)
symbols.push_back(getAffineSymbolExpr(idx + shift, getContext()));
return replaceDimsAndSymbols({}, symbols);
}

View File

@ -1,22 +1,20 @@
// RUN: mlir-opt %s -for-loop-peeling -canonicalize -split-input-file | FileCheck %s
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - s0) mod s2)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0, s1, s2] -> (s0, s2 - (s2 - (s2 - s1) mod s0))>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)>
// CHECK: func @fully_dynamic_bounds(
// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index
// CHECK: %[[C0_I32:.*]] = constant 0 : i32
// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[LB]], %[[UB]], %[[STEP]]]
// CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[LB]] to %[[NEW_UB]]
// CHECK-SAME: step %[[STEP]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[STEP]], %[[UB]]]
// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
// CHECK: %[[CAST:.*]] = index_cast %[[STEP]] : index to i32
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
// CHECK: scf.yield %[[ADD]]
// CHECK: }
// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
// CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) {
// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[STEP]], %[[LB]], %[[UB]]]
// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]], %[[LB]], %[[STEP]]]
// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
// CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]]
// CHECK: scf.yield %[[ADD2]]
@ -38,18 +36,16 @@ func @fully_dynamic_bounds(%lb : index, %ub: index, %step: index) -> i32 {
// -----
// CHECK-DAG: #[[MAP:.*]] = affine_map<(d0) -> (4, -d0 + 17)>
// CHECK: func @fully_static_bounds(
// CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32
// CHECK-DAG: %[[C1_I32:.*]] = constant 1 : i32
// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
// CHECK-DAG: %[[C16:.*]] = constant 16 : index
// CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C16]]
// CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
// CHECK: %[[MINOP:.*]] = affine.min #[[MAP]](%[[IV]])
// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32
// CHECK: scf.yield %[[ADD]]
// CHECK: }
// CHECK: %[[RESULT:.*]] = addi %[[LOOP]], %[[C1_I32]] : i32
@ -73,24 +69,22 @@ func @fully_static_bounds() -> i32 {
// -----
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)>
// CHECK: func @dynamic_upper_bound(
// CHECK-SAME: %[[UB:.*]]: index
// CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32
// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]]
// CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]]
// CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]]
// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32
// CHECK: scf.yield %[[ADD]]
// CHECK: }
// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
// CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) {
// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]]
// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]]
// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
// CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]]
// CHECK: scf.yield %[[ADD2]]
@ -116,23 +110,21 @@ func @dynamic_upper_bound(%ub : index) -> i32 {
// -----
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)>
// CHECK: func @no_loop_results(
// CHECK-SAME: %[[UB:.*]]: index, %[[MEMREF:.*]]: memref<i32>
// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]]
// CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] step %[[C4]] {
// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]]
// CHECK: %[[LOAD:.*]] = memref.load %[[MEMREF]][]
// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
// CHECK: %[[ADD:.*]] = addi %[[LOAD]], %[[CAST]] : i32
// CHECK: %[[ADD:.*]] = addi %[[LOAD]], %[[C4_I32]] : i32
// CHECK: memref.store %[[ADD]], %[[MEMREF]]
// CHECK: }
// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
// CHECK: scf.if %[[HAS_MORE]] {
// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]]
// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]]
// CHECK: %[[LOAD2:.*]] = memref.load %[[MEMREF]][]
// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
// CHECK: %[[ADD2:.*]] = addi %[[LOAD2]], %[[CAST2]]
@ -153,3 +145,81 @@ func @no_loop_results(%ub : index, %d : memref<i32>) {
}
return
}
// -----
// Test rewriting of affine.min ops. Make sure that more general cases than
// the ones above are successfully rewritten. Also make sure that the pattern
// does not rewrite affine.min ops that should not be rewritten.
// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1 - 1)>
// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0, s1, s2] -> (s0, -d0 + s1, s2)>
// CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)>
// CHECK-DAG: #[[MAP5:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 + 1)>
// CHECK-DAG: #[[MAP6:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 - 1)>
// CHECK-DAG: #[[MAP7:.*]] = affine_map<()[s0, s1, s2, s3] -> (s0, s2 - (s2 - (s2 - s1) mod s0), s3)>
// CHECK: func @test_affine_min_rewrite(
// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index,
// CHECK-SAME: %[[MEMREF:.*]]: memref<?xindex>, %[[SOME_VAL:.*]]: index
// CHECK: scf.for %[[IV:.*]] = %[[LB]] to %{{.*}} step %[[STEP]] {
// (affine.min folded away)
// CHECK: memref.store %[[STEP]]
// (affine.min folded away)
// CHECK: memref.store %[[STEP]]
// CHECK: %[[RES2:.*]] = affine.apply #[[MAP1]]()[%[[STEP]]]
// CHECK: memref.store %[[RES2]]
// CHECK: %[[RES3:.*]] = affine.min #[[MAP2]](%[[IV]])[%[[STEP]], %[[UB]]]
// CHECK: memref.store %[[RES3]]
// CHECK: %[[RES4:.*]] = affine.min #[[MAP3]](%[[IV]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]]
// CHECK: memref.store %[[RES4]]
// CHECK: }
// CHECK: scf.if {{.*}} {
// CHECK: %[[RES_IF_0:.*]] = affine.apply #[[MAP4]]()[%[[UB]], %[[LB]], %[[STEP]]]
// CHECK: memref.store %[[RES_IF_0]]
// CHECK: %[[RES_IF_1:.*]] = affine.apply #[[MAP5]]()[%[[UB]], %[[LB]], %[[STEP]]]
// CHECK: memref.store %[[RES_IF_1]]
// CHECK: %[[RES_IF_2:.*]] = affine.apply #[[MAP5]]()[%[[UB]], %[[LB]], %[[STEP]]]
// CHECK: memref.store %[[RES_IF_2]]
// CHECK: %[[RES_IF_3:.*]] = affine.apply #[[MAP6]]()[%[[UB]], %[[LB]], %[[STEP]]]
// CHECK: memref.store %[[RES_IF_3]]
// CHECK: %[[RES_IF_4:.*]] = affine.min #[[MAP7]]()[%[[STEP]], %[[LB]], %[[UB]], %[[SOME_VAL]]]
// CHECK: memref.store %[[RES_IF_4]]
#map0 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
#map1 = affine_map<(d0, d1)[s0] -> (d0 - d1 + 1, s0)>
#map2 = affine_map<(d0, d1)[s0] -> (s0 + 1, d0 - d1 + 1)>
#map3 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1 - 1)>
#map4 = affine_map<(d0, d1, d2)[s0] -> (s0, d0 - d1, d2)>
func @test_affine_min_rewrite(%lb : index, %ub: index,
%step: index, %d : memref<?xindex>,
%some_val: index) {
%c0 = constant 0 : index
%c1 = constant 1 : index
%c2 = constant 2 : index
%c3 = constant 3 : index
%c4 = constant 4 : index
scf.for %iv = %lb to %ub step %step {
// Most common case: Rewrite min(%ub - %iv, %step) to %step.
%m0 = affine.min #map0(%ub, %iv)[%step]
memref.store %m0, %d[%c0] : memref<?xindex>
// Increase %ub - %iv a little bit, pattern should still apply.
%m1 = affine.min #map1(%ub, %iv)[%step]
memref.store %m1, %d[%c1] : memref<?xindex>
// Rewrite min(%ub - %iv + 1, %step + 1) to %step + 1.
%m2 = affine.min #map2(%ub, %iv)[%step]
memref.store %m2, %d[%c2] : memref<?xindex>
// min(%ub - %iv - 1, %step) cannot be simplified because %ub - %iv - 1
// can be smaller than %step. (Can be simplified in if-statement.)
%m3 = affine.min #map3(%ub, %iv)[%step]
memref.store %m3, %d[%c3] : memref<?xindex>
// min(%ub - %iv, %step, %some_val) cannot be simplified because the range
// of %some_val is unknown.
%m4 = affine.min #map4(%ub, %iv, %some_val)[%step]
memref.store %m4, %d[%c4] : memref<?xindex>
}
return
}

View File

@ -1479,6 +1479,7 @@ cc_library(
includes = ["include"],
deps = [
":Affine",
":Analysis",
":DialectUtils",
":IR",
":MemRefDialect",