forked from OSchip/llvm-project
[mlir][scf] Simplify affine.min ops after loop peeling
Simplify affine.min ops, enabling various other canonicalizations inside the peeled loop body. affine.min ops such as: ``` map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)> %r = affine.min #affine.min #map(%iv)[%step, %ub] ``` are rewritten them into (in the case the peeled loop): ``` %r = %step ``` To determine how an affine.min op should be rewritten and to prove its correctness, FlatAffineConstraints is utilized. Differential Revision: https://reviews.llvm.org/D107222
This commit is contained in:
parent
3330b2532f
commit
8e8b70aa84
|
@ -250,6 +250,12 @@ public:
|
|||
void addSymbolId(unsigned pos);
|
||||
void addLocalId(unsigned pos);
|
||||
virtual unsigned addId(IdKind kind, unsigned pos);
|
||||
/// Add identifiers of the specified kind at the end of the table. Return the
|
||||
/// position of the column. The coefficient column corresponding to the
|
||||
/// added identifier is initialized to zero.
|
||||
unsigned addDimId();
|
||||
unsigned addSymbolId();
|
||||
unsigned addLocalId();
|
||||
|
||||
/// Composes an affine map whose dimensions and symbols match one to one with
|
||||
/// the dimensions and symbols of this FlatAffineConstraints. The results of
|
||||
|
@ -659,6 +665,12 @@ public:
|
|||
using FlatAffineConstraints::addSymbolId;
|
||||
unsigned addId(IdKind kind, unsigned pos) override;
|
||||
unsigned addId(IdKind kind, unsigned pos, Value val);
|
||||
/// Add identifiers of the specified kind at the end of the table. Return the
|
||||
/// position of the column. The coefficient column corresponding to the
|
||||
/// added identifier is initialized to zero. `val` is the Value corresponding
|
||||
/// to the identifier that can optionally be provided.
|
||||
unsigned addDimId(Value val);
|
||||
unsigned addSymbolId(Value val);
|
||||
|
||||
/// Add the specified values as a dim or symbol id depending on its nature, if
|
||||
/// it already doesn't exist in the system. `val` has to be either a terminal
|
||||
|
@ -766,6 +778,18 @@ public:
|
|||
return {values.data(), values.size()};
|
||||
}
|
||||
|
||||
inline ArrayRef<Optional<Value>> getMaybeDimValues() const {
|
||||
return {values.data(), getNumDimIds()};
|
||||
}
|
||||
|
||||
inline ArrayRef<Optional<Value>> getMaybeSymbolValues() const {
|
||||
return {values.data() + getNumDimIds(), getNumSymbolIds()};
|
||||
}
|
||||
|
||||
inline ArrayRef<Optional<Value>> getMaybeDimAndSymbolValues() const {
|
||||
return {values.data(), getNumDimIds() + getNumSymbolIds()};
|
||||
}
|
||||
|
||||
/// Sets the Value associated with the pos^th identifier.
|
||||
inline void setValue(unsigned pos, Value val) {
|
||||
assert(pos < numIds && "invalid id position");
|
||||
|
|
|
@ -44,9 +44,10 @@ void naivelyFuseParallelOps(Region ®ion);
|
|||
/// by an scf.if for the last (partial) iteration (if any). This transformation
|
||||
/// is called "loop peeling".
|
||||
///
|
||||
/// Other patterns can simplify/canonicalize operations in the body of the loop
|
||||
/// and the scf.if. This is beneficial for a wide range of transformations such
|
||||
/// as vectorization or loop tiling.
|
||||
/// This transformation is beneficial for a wide range of transformations such
|
||||
/// as vectorization or loop tiling: It enables additional canonicalizations
|
||||
/// inside the peeled loop body such as rewriting masked loads into unmaked
|
||||
/// loads.
|
||||
///
|
||||
/// E.g., assuming a lower bound of 0 (for illustration purposes):
|
||||
/// ```
|
||||
|
@ -65,11 +66,22 @@ void naivelyFuseParallelOps(Region ®ion);
|
|||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// This function rewrites the given scf.for loop in-place and creates a new
|
||||
/// scf.if operation (returned via `ifOp`) for the last iteration.
|
||||
/// After loop peeling, this function tries to simplify/canonicalize affine.min
|
||||
/// operations in the body of the loop and the scf.if, taking advantage of the
|
||||
/// fact that every iteration of the peeled loop is a "full" iteration. This
|
||||
/// canonicalization is expected to enable further canonicalization
|
||||
/// opportunities through other patterns.
|
||||
///
|
||||
/// TODO: Simplify affine.min ops inside the new loop/if statement.
|
||||
LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp);
|
||||
/// The return value indicates whether the loop was rewritten or not. Loops are
|
||||
/// not rewritten if:
|
||||
/// * Loop step size is 1 or
|
||||
/// * Loop bounds and step size are static, and step already divides the
|
||||
/// iteration space evenly.
|
||||
///
|
||||
/// Note: This function rewrites the given scf.for loop in-place and creates a
|
||||
/// new scf.if operation for the last iteration. It replaces all uses of the
|
||||
/// unpeeled loop with the results of the newly generated scf.if.
|
||||
LogicalResult peelAndCanonicalizeForLoop(RewriterBase &rewriter, ForOp forOp);
|
||||
|
||||
/// Tile a parallel loop of the form
|
||||
/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
|
||||
|
|
|
@ -143,12 +143,15 @@ public:
|
|||
/// `*this` and apply replace with `map` on its subexpressions.
|
||||
AffineExpr replace(const DenseMap<AffineExpr, AffineExpr> &map) const;
|
||||
|
||||
/// Replace dims[0 .. numDims - 1] by dims[shift .. shift + numDims - 1].
|
||||
AffineExpr shiftDims(unsigned numDims, unsigned shift) const;
|
||||
/// Replace dims[offset ... numDims)
|
||||
/// by dims[offset + shift ... shift + numDims).
|
||||
AffineExpr shiftDims(unsigned numDims, unsigned shift,
|
||||
unsigned offset = 0) const;
|
||||
|
||||
/// Replace symbols[0 .. numSymbols - 1] by
|
||||
/// symbols[shift .. shift + numSymbols - 1].
|
||||
AffineExpr shiftSymbols(unsigned numSymbols, unsigned shift) const;
|
||||
/// Replace symbols[offset ... numSymbols)
|
||||
/// by symbols[offset + shift ... shift + numSymbols).
|
||||
AffineExpr shiftSymbols(unsigned numSymbols, unsigned shift,
|
||||
unsigned offset = 0) const;
|
||||
|
||||
AffineExpr operator+(int64_t v) const;
|
||||
AffineExpr operator+(AffineExpr other) const;
|
||||
|
|
|
@ -207,24 +207,28 @@ public:
|
|||
AffineMap replace(const DenseMap<AffineExpr, AffineExpr> &map,
|
||||
unsigned numResultDims, unsigned numResultSyms) const;
|
||||
|
||||
/// Replace dims[0 .. numDims - 1] by dims[shift .. shift + numDims - 1].
|
||||
AffineMap shiftDims(unsigned shift) const {
|
||||
return AffineMap::get(
|
||||
getNumDims() + shift, getNumSymbols(),
|
||||
llvm::to_vector<4>(llvm::map_range(
|
||||
getResults(),
|
||||
[&](AffineExpr e) { return e.shiftDims(getNumDims(), shift); })),
|
||||
getContext());
|
||||
/// Replace dims[offset ... numDims)
|
||||
/// by dims[offset + shift ... shift + numDims).
|
||||
AffineMap shiftDims(unsigned shift, unsigned offset = 0) const {
|
||||
assert(offset <= getNumDims());
|
||||
return AffineMap::get(getNumDims() + shift, getNumSymbols(),
|
||||
llvm::to_vector<4>(llvm::map_range(
|
||||
getResults(),
|
||||
[&](AffineExpr e) {
|
||||
return e.shiftDims(getNumDims(), shift, offset);
|
||||
})),
|
||||
getContext());
|
||||
}
|
||||
|
||||
/// Replace symbols[0 .. numSymbols - 1] by
|
||||
/// symbols[shift .. shift + numSymbols - 1].
|
||||
AffineMap shiftSymbols(unsigned shift) const {
|
||||
/// Replace symbols[offset ... numSymbols)
|
||||
/// by symbols[offset + shift ... shift + numSymbols).
|
||||
AffineMap shiftSymbols(unsigned shift, unsigned offset = 0) const {
|
||||
return AffineMap::get(getNumDims(), getNumSymbols() + shift,
|
||||
llvm::to_vector<4>(llvm::map_range(
|
||||
getResults(),
|
||||
[&](AffineExpr e) {
|
||||
return e.shiftSymbols(getNumSymbols(), shift);
|
||||
return e.shiftSymbols(getNumSymbols(), shift,
|
||||
offset);
|
||||
})),
|
||||
getContext());
|
||||
}
|
||||
|
|
|
@ -263,22 +263,52 @@ void FlatAffineConstraints::addLocalId(unsigned pos) {
|
|||
addId(IdKind::Local, pos);
|
||||
}
|
||||
|
||||
unsigned FlatAffineConstraints::addLocalId() {
|
||||
unsigned pos = getNumLocalIds();
|
||||
addId(IdKind::Local, pos);
|
||||
return pos;
|
||||
}
|
||||
|
||||
void FlatAffineConstraints::addDimId(unsigned pos) {
|
||||
addId(IdKind::Dimension, pos);
|
||||
}
|
||||
|
||||
unsigned FlatAffineConstraints::addDimId() {
|
||||
unsigned pos = getNumDimIds();
|
||||
addId(IdKind::Dimension, pos);
|
||||
return pos;
|
||||
}
|
||||
|
||||
void FlatAffineValueConstraints::addDimId(unsigned pos, Value val) {
|
||||
addId(IdKind::Dimension, pos, val);
|
||||
}
|
||||
|
||||
unsigned FlatAffineValueConstraints::addDimId(Value val) {
|
||||
unsigned pos = getNumDimIds();
|
||||
addId(IdKind::Dimension, pos, val);
|
||||
return pos;
|
||||
}
|
||||
|
||||
void FlatAffineConstraints::addSymbolId(unsigned pos) {
|
||||
addId(IdKind::Symbol, pos);
|
||||
}
|
||||
|
||||
unsigned FlatAffineConstraints::addSymbolId() {
|
||||
unsigned pos = getNumSymbolIds();
|
||||
addId(IdKind::Symbol, pos);
|
||||
return pos;
|
||||
}
|
||||
|
||||
void FlatAffineValueConstraints::addSymbolId(unsigned pos, Value val) {
|
||||
addId(IdKind::Symbol, pos, val);
|
||||
}
|
||||
|
||||
unsigned FlatAffineValueConstraints::addSymbolId(Value val) {
|
||||
unsigned pos = getNumSymbolIds();
|
||||
addId(IdKind::Symbol, pos, val);
|
||||
return pos;
|
||||
}
|
||||
|
||||
unsigned FlatAffineConstraints::addId(IdKind kind, unsigned pos) {
|
||||
if (kind == IdKind::Dimension)
|
||||
assert(pos <= getNumDimIds());
|
||||
|
|
|
@ -357,6 +357,9 @@ static bool isDimOpValidSymbol(OpTy dimOp, Region *region) {
|
|||
// *) It is a result of the dim op on a memref whose corresponding size is a
|
||||
// valid symbol.
|
||||
bool mlir::isValidSymbol(Value value) {
|
||||
if (!value)
|
||||
return false;
|
||||
|
||||
// The value must be an index type.
|
||||
if (!value.getType().isIndex())
|
||||
return false;
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PassDetail.h"
|
||||
#include "mlir/Analysis/AffineStructures.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/SCF/Passes.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
|
@ -97,8 +98,16 @@ static void specializeForLoopForUnrolling(ForOp op) {
|
|||
/// Rewrite a for loop with bounds/step that potentially do not divide evenly
|
||||
/// into a for loop where the step divides the iteration space evenly, followed
|
||||
/// by an scf.if for the last (partial) iteration (if any).
|
||||
LogicalResult mlir::scf::peelForLoop(RewriterBase &b, ForOp forOp,
|
||||
scf::IfOp &ifOp) {
|
||||
///
|
||||
/// This function rewrites the given scf.for loop in-place and creates a new
|
||||
/// scf.if operation for the last iteration. It replaces all uses of the
|
||||
/// unpeeled loop with the results of the newly generated scf.if.
|
||||
///
|
||||
/// The newly generated scf.if operation is returned via `ifOp`. The boundary
|
||||
/// at which the loop is split (new upper bound) is returned via `splitBound`.
|
||||
/// The return value indicates whether the loop was rewritten or not.
|
||||
static LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp,
|
||||
Value &splitBound) {
|
||||
RewriterBase::InsertionGuard guard(b);
|
||||
auto lbInt = getConstantIntValue(forOp.lowerBound());
|
||||
auto ubInt = getConstantIntValue(forOp.upperBound());
|
||||
|
@ -117,7 +126,7 @@ LogicalResult mlir::scf::peelForLoop(RewriterBase &b, ForOp forOp,
|
|||
// New upper bound: %ub - (%ub - %lb) mod %step
|
||||
auto modMap = AffineMap::get(3, 0, {dim1 - ((dim1 - dim0) % dim2)});
|
||||
b.setInsertionPoint(forOp);
|
||||
Value splitBound = b.createOrFold<AffineApplyOp>(
|
||||
splitBound = b.createOrFold<AffineApplyOp>(
|
||||
loc, modMap,
|
||||
ValueRange{forOp.lowerBound(), forOp.upperBound(), forOp.step()});
|
||||
|
||||
|
@ -153,6 +162,226 @@ LogicalResult mlir::scf::peelForLoop(RewriterBase &b, ForOp forOp,
|
|||
return success();
|
||||
}
|
||||
|
||||
static void unpackOptionalValues(ArrayRef<Optional<Value>> source,
|
||||
SmallVector<Value> &target) {
|
||||
target = llvm::to_vector<4>(llvm::map_range(source, [](Optional<Value> val) {
|
||||
return val.hasValue() ? *val : Value();
|
||||
}));
|
||||
}
|
||||
|
||||
/// Bound an identifier `pos` in a given FlatAffineValueConstraints with
|
||||
/// constraints drawn from an affine map. Before adding the constraint, the
|
||||
/// dimensions/symbols of the affine map are aligned with `constraints`.
|
||||
/// `operands` are the SSA Value operands used with the affine map.
|
||||
/// Note: This function adds a new symbol column to the `constraints` for each
|
||||
/// dimension/symbol that exists in the affine map but not in `constraints`.
|
||||
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
|
||||
FlatAffineConstraints::BoundType type,
|
||||
unsigned pos, AffineMap map,
|
||||
ValueRange operands) {
|
||||
SmallVector<Value> dims, syms, newSyms;
|
||||
unpackOptionalValues(constraints.getMaybeDimValues(), dims);
|
||||
unpackOptionalValues(constraints.getMaybeSymbolValues(), syms);
|
||||
|
||||
AffineMap alignedMap =
|
||||
alignAffineMapWithValues(map, operands, dims, syms, &newSyms);
|
||||
for (unsigned i = syms.size(); i < newSyms.size(); ++i)
|
||||
constraints.addSymbolId(constraints.getNumSymbolIds(), newSyms[i]);
|
||||
return constraints.addBound(type, pos, alignedMap);
|
||||
}
|
||||
|
||||
/// This function tries to canonicalize affine.min operations by proving that
|
||||
/// its value is bounded by the same lower and upper bound. In that case, the
|
||||
/// operation can be folded away.
|
||||
///
|
||||
/// Bounds are computed by FlatAffineValueConstraints. Invariants required for
|
||||
/// finding/proving bounds should be supplied via `constraints`.
|
||||
///
|
||||
/// 1. Add dimensions for `minOp` and `minOpUb` (upper bound of `minOp`).
|
||||
/// 2. Compute an upper bound of `minOp` and bind it to `minOpUb`. SSA values
|
||||
/// that are used in `minOp` but are not part of `dims`, are added as extra
|
||||
/// symbols to the constraint set.
|
||||
/// 3. For each result of `minOp`: Add result as a dimension `r_i`. Prove that
|
||||
/// r_i >= minOpUb. If this is the case, ub(minOp) == lb(minOp) and `minOp`
|
||||
/// can be replaced with that bound.
|
||||
///
|
||||
/// In summary, the following constraints are added throughout this function.
|
||||
/// Note: `invar` are dimensions added by the caller to express the invariants.
|
||||
///
|
||||
/// invar | minOp | minOpUb | r_i | extra syms... | const | eq/ineq
|
||||
/// ------+-------+---------+-----+---------------+-------+-------------------
|
||||
/// (various eq./ineq. constraining `invar`, added by the caller)
|
||||
/// ... | 0 | 0 | 0 | 0 | ... | ...
|
||||
/// ------+-------+---------+-----+---------------+-------+-------------------
|
||||
/// (various ineq. constraining `minOp` in terms of `minOp` operands (`invar`
|
||||
/// and extra `minOp` operands "extra syms" that are not in `invar`)).
|
||||
/// ... | -1 | 0 | 0 | ... | ... | >= 0
|
||||
/// ------+-------+---------+-----+---------------+-------+-------------------
|
||||
/// (set `minOpUb` to `minOp` upper bound in terms of `invar` and extra syms)
|
||||
/// ... | 0 | -1 | 0 | ... | ... | = 0
|
||||
/// ------+-------+---------+-----+---------------+-------+-------------------
|
||||
/// (for each `minOp` map result r_i: copy previous constraints, set r_i to
|
||||
/// corresponding map result, prove r_i >= minOpUb via contradiction)
|
||||
/// ... | 0 | 0 | -1 | ... | ... | = 0
|
||||
/// 0 | 0 | 1 | -1 | 0 | -1 | >= 0
|
||||
///
|
||||
static LogicalResult
|
||||
canonicalizeAffineMinOp(RewriterBase &rewriter, AffineMinOp minOp,
|
||||
FlatAffineValueConstraints constraints) {
|
||||
RewriterBase::InsertionGuard guard(rewriter);
|
||||
AffineMap minOpMap = minOp.getAffineMap();
|
||||
unsigned numResults = minOpMap.getNumResults();
|
||||
|
||||
// Add a few extra dimensions.
|
||||
unsigned dimMinOp = constraints.addDimId(); // `minOp`
|
||||
unsigned dimMinOpUb = constraints.addDimId(); // `minOp` upper bound
|
||||
unsigned resultDimStart = constraints.getNumDimIds();
|
||||
for (unsigned i = 0; i < numResults; ++i)
|
||||
constraints.addDimId();
|
||||
|
||||
// Add an inequality for each result expr_i of minOpMap: minOp <= expr_i
|
||||
if (failed(alignAndAddBound(constraints, FlatAffineConstraints::UB, dimMinOp,
|
||||
minOpMap, minOp.operands())))
|
||||
return failure();
|
||||
|
||||
// Try to compute an upper bound for minOp, expressed in terms of the other
|
||||
// `dims` and extra symbols.
|
||||
SmallVector<AffineMap> minOpValLb(1), minOpValUb(1);
|
||||
constraints.getSliceBounds(dimMinOp, 1, minOp.getContext(), &minOpValLb,
|
||||
&minOpValUb);
|
||||
// TODO: `getSliceBounds` may return multiple bounds at the moment. This is
|
||||
// a TODO of `getSliceBounds` and not handled here.
|
||||
if (!minOpValUb[0] || minOpValUb[0].getNumResults() != 1)
|
||||
return failure(); // No or multiple upper bounds found.
|
||||
|
||||
// Add an equality: dimMinOpUb = minOpValUb[0]
|
||||
// Add back dimension for minOp. (Was removed by `getSliceBounds`.)
|
||||
AffineMap alignedUbMap = minOpValUb[0].shiftDims(/*shift=*/1,
|
||||
/*offset=*/dimMinOp);
|
||||
if (failed(constraints.addBound(FlatAffineConstraints::EQ, dimMinOpUb,
|
||||
alignedUbMap)))
|
||||
return failure();
|
||||
|
||||
// If the constraint system is empty, there is an inconsistency. (E.g., this
|
||||
// can happen if loop lb > ub.)
|
||||
if (constraints.isEmpty())
|
||||
return failure();
|
||||
|
||||
// Prove that each result of minOpMap has a lower bound that is equal to (or
|
||||
// greater than) the upper bound of minOp (`kDimMinOpUb`). In that case,
|
||||
// minOp can be replaced with the bound. I.e., prove that for each result
|
||||
// expr_i (represented by dimension r_i):
|
||||
//
|
||||
// r_i >= minOpUb
|
||||
//
|
||||
// To prove this inequality, add its negation to the constraint set and prove
|
||||
// that the constraint set is empty.
|
||||
for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
|
||||
FlatAffineValueConstraints newConstr(constraints);
|
||||
|
||||
// Add an equality: r_i = expr_i
|
||||
// Note: These equalities could have been added earlier and used to express
|
||||
// minOp <= expr_i. However, then we run the risk that `getSliceBounds`
|
||||
// computes minOpUb in terms of r_i dims, which is not desired.
|
||||
if (failed(alignAndAddBound(newConstr, FlatAffineConstraints::EQ, i,
|
||||
minOpMap.getSubMap({i - resultDimStart}),
|
||||
minOp.operands())))
|
||||
return failure();
|
||||
|
||||
// Add inequality: r_i < minOpUb (equiv.: minOpUb - r_i - 1 >= 0)
|
||||
SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
|
||||
ineq[dimMinOpUb] = 1;
|
||||
ineq[i] = -1;
|
||||
ineq[newConstr.getNumCols() - 1] = -1;
|
||||
newConstr.addInequality(ineq);
|
||||
if (!newConstr.isEmpty())
|
||||
return failure();
|
||||
}
|
||||
|
||||
// Lower and upper bound of `minOp` are equal. Replace `minOp` with its bound.
|
||||
AffineMap newMap = alignedUbMap;
|
||||
SmallVector<Value> newOperands;
|
||||
unpackOptionalValues(constraints.getMaybeDimAndSymbolValues(), newOperands);
|
||||
mlir::canonicalizeMapAndOperands(&newMap, &newOperands);
|
||||
rewriter.setInsertionPoint(minOp);
|
||||
rewriter.replaceOpWithNewOp<AffineApplyOp>(minOp, newMap, newOperands);
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Try to simplify an affine.min operation `minOp` after loop peeling. This
|
||||
/// function detects affine.min operations such as (ub is the previous upper
|
||||
/// bound of the unpeeled loop):
|
||||
/// ```
|
||||
/// #map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
|
||||
/// %r = affine.min #affine.min #map(%iv)[%step, %ub]
|
||||
/// ```
|
||||
/// and rewrites them into (in the case the peeled loop):
|
||||
/// ```
|
||||
/// %r = %step
|
||||
/// ```
|
||||
/// affine.min operations inside the generated scf.if operation are rewritten in
|
||||
/// a similar way.
|
||||
///
|
||||
/// This function builds up a set of constraints, capable of proving that:
|
||||
/// * Inside the peeled loop: min(step, ub - iv) == step
|
||||
/// * Inside the scf.if operation: min(step, ub - iv) == ub - iv
|
||||
///
|
||||
/// Note: `ub` is the previous upper bound of the loop (before peeling).
|
||||
/// `insideLoop` must be true for affine.min ops inside the loop and false for
|
||||
/// affine.min ops inside the scf.for op.
|
||||
static LogicalResult rewritePeeledAffineOp(RewriterBase &rewriter,
|
||||
AffineMinOp minOp, Value iv,
|
||||
Value ub, Value step,
|
||||
bool insideLoop) {
|
||||
FlatAffineValueConstraints constraints;
|
||||
constraints.addDimId(0, iv);
|
||||
constraints.addDimId(1, ub);
|
||||
constraints.addDimId(2, step);
|
||||
if (auto constUb = getConstantIntValue(ub))
|
||||
constraints.addBound(FlatAffineConstraints::EQ, 1, *constUb);
|
||||
if (auto constStep = getConstantIntValue(step))
|
||||
constraints.addBound(FlatAffineConstraints::EQ, 2, *constStep);
|
||||
|
||||
// Add loop peeling invariant. This is the main piece of knowledge that
|
||||
// enables AffineMinOp simplification.
|
||||
if (insideLoop) {
|
||||
// ub - iv >= step (equiv.: -iv + ub - step + 0 >= 0)
|
||||
// Intuitively: Inside the peeled loop, every iteration is a "full"
|
||||
// iteration, i.e., step divides the iteration space `ub - lb` evenly.
|
||||
constraints.addInequality({-1, 1, -1, 0});
|
||||
} else {
|
||||
// ub - iv < step (equiv.: iv + -ub + step - 1 >= 0)
|
||||
// Intuitively: `iv` is the split bound here, i.e., the iteration variable
|
||||
// value of the very last iteration (in the unpeeled loop). At that point,
|
||||
// there are less than `step` elements remaining. (Otherwise, the peeled
|
||||
// loop would run for at least one more iteration.)
|
||||
constraints.addInequality({1, -1, 1, -1});
|
||||
}
|
||||
|
||||
return canonicalizeAffineMinOp(rewriter, minOp, constraints);
|
||||
}
|
||||
|
||||
LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter,
|
||||
ForOp forOp) {
|
||||
Value ub = forOp.upperBound();
|
||||
scf::IfOp ifOp;
|
||||
Value splitBound;
|
||||
if (failed(peelForLoop(rewriter, forOp, ifOp, splitBound)))
|
||||
return failure();
|
||||
|
||||
// Rewrite affine.min ops.
|
||||
forOp.walk([&](AffineMinOp minOp) {
|
||||
(void)rewritePeeledAffineOp(rewriter, minOp, forOp.getInductionVar(), ub,
|
||||
forOp.step(), /*insideLoop=*/true);
|
||||
});
|
||||
ifOp.walk([&](AffineMinOp minOp) {
|
||||
(void)rewritePeeledAffineOp(rewriter, minOp, splitBound, ub, forOp.step(),
|
||||
/*insideLoop=*/false);
|
||||
});
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
static constexpr char kPeeledLoopLabel[] = "__peeled_loop__";
|
||||
|
||||
namespace {
|
||||
|
@ -163,15 +392,12 @@ struct ForLoopPeelingPattern : public OpRewritePattern<ForOp> {
|
|||
PatternRewriter &rewriter) const override {
|
||||
if (forOp->hasAttr(kPeeledLoopLabel))
|
||||
return failure();
|
||||
|
||||
scf::IfOp ifOp;
|
||||
if (failed(peelForLoop(rewriter, forOp, ifOp)))
|
||||
if (failed(peelAndCanonicalizeForLoop(rewriter, forOp)))
|
||||
return failure();
|
||||
// Apply label, so that the same loop is not rewritten a second time.
|
||||
rewriter.updateRootInPlace(forOp, [&]() {
|
||||
forOp->setAttr(kPeeledLoopLabel, rewriter.getUnitAttr());
|
||||
});
|
||||
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
|
|
@ -101,19 +101,26 @@ AffineExpr::replaceSymbols(ArrayRef<AffineExpr> symReplacements) const {
|
|||
return replaceDimsAndSymbols({}, symReplacements);
|
||||
}
|
||||
|
||||
/// Replace symbols[0 .. numDims - 1] by symbols[shift .. shift + numDims - 1].
|
||||
AffineExpr AffineExpr::shiftDims(unsigned numDims, unsigned shift) const {
|
||||
/// Replace dims[offset ... numDims)
|
||||
/// by dims[offset + shift ... shift + numDims).
|
||||
AffineExpr AffineExpr::shiftDims(unsigned numDims, unsigned shift,
|
||||
unsigned offset) const {
|
||||
SmallVector<AffineExpr, 4> dims;
|
||||
for (unsigned idx = 0; idx < numDims; ++idx)
|
||||
for (unsigned idx = 0; idx < offset; ++idx)
|
||||
dims.push_back(getAffineDimExpr(idx, getContext()));
|
||||
for (unsigned idx = offset; idx < numDims; ++idx)
|
||||
dims.push_back(getAffineDimExpr(idx + shift, getContext()));
|
||||
return replaceDimsAndSymbols(dims, {});
|
||||
}
|
||||
|
||||
/// Replace symbols[0 .. numSymbols - 1] by
|
||||
/// symbols[shift .. shift + numSymbols - 1].
|
||||
AffineExpr AffineExpr::shiftSymbols(unsigned numSymbols, unsigned shift) const {
|
||||
/// Replace symbols[offset ... numSymbols)
|
||||
/// by symbols[offset + shift ... shift + numSymbols).
|
||||
AffineExpr AffineExpr::shiftSymbols(unsigned numSymbols, unsigned shift,
|
||||
unsigned offset) const {
|
||||
SmallVector<AffineExpr, 4> symbols;
|
||||
for (unsigned idx = 0; idx < numSymbols; ++idx)
|
||||
for (unsigned idx = 0; idx < offset; ++idx)
|
||||
symbols.push_back(getAffineSymbolExpr(idx, getContext()));
|
||||
for (unsigned idx = offset; idx < numSymbols; ++idx)
|
||||
symbols.push_back(getAffineSymbolExpr(idx + shift, getContext()));
|
||||
return replaceDimsAndSymbols({}, symbols);
|
||||
}
|
||||
|
|
|
@ -1,22 +1,20 @@
|
|||
// RUN: mlir-opt %s -for-loop-peeling -canonicalize -split-input-file | FileCheck %s
|
||||
|
||||
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - s0) mod s2)>
|
||||
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
|
||||
// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0, s1, s2] -> (s0, s2 - (s2 - (s2 - s1) mod s0))>
|
||||
// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)>
|
||||
// CHECK: func @fully_dynamic_bounds(
|
||||
// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index
|
||||
// CHECK: %[[C0_I32:.*]] = constant 0 : i32
|
||||
// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[LB]], %[[UB]], %[[STEP]]]
|
||||
// CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[LB]] to %[[NEW_UB]]
|
||||
// CHECK-SAME: step %[[STEP]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
|
||||
// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[STEP]], %[[UB]]]
|
||||
// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
|
||||
// CHECK: %[[CAST:.*]] = index_cast %[[STEP]] : index to i32
|
||||
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
|
||||
// CHECK: scf.yield %[[ADD]]
|
||||
// CHECK: }
|
||||
// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
|
||||
// CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) {
|
||||
// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[STEP]], %[[LB]], %[[UB]]]
|
||||
// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]], %[[LB]], %[[STEP]]]
|
||||
// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
|
||||
// CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]]
|
||||
// CHECK: scf.yield %[[ADD2]]
|
||||
|
@ -38,18 +36,16 @@ func @fully_dynamic_bounds(%lb : index, %ub: index, %step: index) -> i32 {
|
|||
|
||||
// -----
|
||||
|
||||
// CHECK-DAG: #[[MAP:.*]] = affine_map<(d0) -> (4, -d0 + 17)>
|
||||
// CHECK: func @fully_static_bounds(
|
||||
// CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32
|
||||
// CHECK-DAG: %[[C1_I32:.*]] = constant 1 : i32
|
||||
// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32
|
||||
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
|
||||
// CHECK-DAG: %[[C16:.*]] = constant 16 : index
|
||||
// CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C16]]
|
||||
// CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
|
||||
// CHECK: %[[MINOP:.*]] = affine.min #[[MAP]](%[[IV]])
|
||||
// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
|
||||
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
|
||||
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32
|
||||
// CHECK: scf.yield %[[ADD]]
|
||||
// CHECK: }
|
||||
// CHECK: %[[RESULT:.*]] = addi %[[LOOP]], %[[C1_I32]] : i32
|
||||
|
@ -73,24 +69,22 @@ func @fully_static_bounds() -> i32 {
|
|||
// -----
|
||||
|
||||
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)>
|
||||
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
|
||||
// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)>
|
||||
// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)>
|
||||
// CHECK: func @dynamic_upper_bound(
|
||||
// CHECK-SAME: %[[UB:.*]]: index
|
||||
// CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32
|
||||
// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32
|
||||
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
|
||||
// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]]
|
||||
// CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]]
|
||||
// CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
|
||||
// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]]
|
||||
// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
|
||||
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
|
||||
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32
|
||||
// CHECK: scf.yield %[[ADD]]
|
||||
// CHECK: }
|
||||
// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
|
||||
// CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) {
|
||||
// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]]
|
||||
// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]]
|
||||
// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
|
||||
// CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]]
|
||||
// CHECK: scf.yield %[[ADD2]]
|
||||
|
@ -116,23 +110,21 @@ func @dynamic_upper_bound(%ub : index) -> i32 {
|
|||
// -----
|
||||
|
||||
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)>
|
||||
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
|
||||
// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)>
|
||||
// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)>
|
||||
// CHECK: func @no_loop_results(
|
||||
// CHECK-SAME: %[[UB:.*]]: index, %[[MEMREF:.*]]: memref<i32>
|
||||
// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32
|
||||
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
|
||||
// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]]
|
||||
// CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] step %[[C4]] {
|
||||
// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]]
|
||||
// CHECK: %[[LOAD:.*]] = memref.load %[[MEMREF]][]
|
||||
// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
|
||||
// CHECK: %[[ADD:.*]] = addi %[[LOAD]], %[[CAST]] : i32
|
||||
// CHECK: %[[ADD:.*]] = addi %[[LOAD]], %[[C4_I32]] : i32
|
||||
// CHECK: memref.store %[[ADD]], %[[MEMREF]]
|
||||
// CHECK: }
|
||||
// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
|
||||
// CHECK: scf.if %[[HAS_MORE]] {
|
||||
// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]]
|
||||
// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]]
|
||||
// CHECK: %[[LOAD2:.*]] = memref.load %[[MEMREF]][]
|
||||
// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
|
||||
// CHECK: %[[ADD2:.*]] = addi %[[LOAD2]], %[[CAST2]]
|
||||
|
@ -153,3 +145,81 @@ func @no_loop_results(%ub : index, %d : memref<i32>) {
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Test rewriting of affine.min ops. Make sure that more general cases than
|
||||
// the ones above are successfully rewritten. Also make sure that the pattern
|
||||
// does not rewrite affine.min ops that should not be rewritten.
|
||||
|
||||
// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
|
||||
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1 - 1)>
|
||||
// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0, s1, s2] -> (s0, -d0 + s1, s2)>
|
||||
// CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)>
|
||||
// CHECK-DAG: #[[MAP5:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 + 1)>
|
||||
// CHECK-DAG: #[[MAP6:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 - 1)>
|
||||
// CHECK-DAG: #[[MAP7:.*]] = affine_map<()[s0, s1, s2, s3] -> (s0, s2 - (s2 - (s2 - s1) mod s0), s3)>
|
||||
// CHECK: func @test_affine_min_rewrite(
|
||||
// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index,
|
||||
// CHECK-SAME: %[[MEMREF:.*]]: memref<?xindex>, %[[SOME_VAL:.*]]: index
|
||||
// CHECK: scf.for %[[IV:.*]] = %[[LB]] to %{{.*}} step %[[STEP]] {
|
||||
// (affine.min folded away)
|
||||
// CHECK: memref.store %[[STEP]]
|
||||
// (affine.min folded away)
|
||||
// CHECK: memref.store %[[STEP]]
|
||||
// CHECK: %[[RES2:.*]] = affine.apply #[[MAP1]]()[%[[STEP]]]
|
||||
// CHECK: memref.store %[[RES2]]
|
||||
// CHECK: %[[RES3:.*]] = affine.min #[[MAP2]](%[[IV]])[%[[STEP]], %[[UB]]]
|
||||
// CHECK: memref.store %[[RES3]]
|
||||
// CHECK: %[[RES4:.*]] = affine.min #[[MAP3]](%[[IV]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]]
|
||||
// CHECK: memref.store %[[RES4]]
|
||||
// CHECK: }
|
||||
// CHECK: scf.if {{.*}} {
|
||||
// CHECK: %[[RES_IF_0:.*]] = affine.apply #[[MAP4]]()[%[[UB]], %[[LB]], %[[STEP]]]
|
||||
// CHECK: memref.store %[[RES_IF_0]]
|
||||
// CHECK: %[[RES_IF_1:.*]] = affine.apply #[[MAP5]]()[%[[UB]], %[[LB]], %[[STEP]]]
|
||||
// CHECK: memref.store %[[RES_IF_1]]
|
||||
// CHECK: %[[RES_IF_2:.*]] = affine.apply #[[MAP5]]()[%[[UB]], %[[LB]], %[[STEP]]]
|
||||
// CHECK: memref.store %[[RES_IF_2]]
|
||||
// CHECK: %[[RES_IF_3:.*]] = affine.apply #[[MAP6]]()[%[[UB]], %[[LB]], %[[STEP]]]
|
||||
// CHECK: memref.store %[[RES_IF_3]]
|
||||
// CHECK: %[[RES_IF_4:.*]] = affine.min #[[MAP7]]()[%[[STEP]], %[[LB]], %[[UB]], %[[SOME_VAL]]]
|
||||
// CHECK: memref.store %[[RES_IF_4]]
|
||||
#map0 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
|
||||
#map1 = affine_map<(d0, d1)[s0] -> (d0 - d1 + 1, s0)>
|
||||
#map2 = affine_map<(d0, d1)[s0] -> (s0 + 1, d0 - d1 + 1)>
|
||||
#map3 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1 - 1)>
|
||||
#map4 = affine_map<(d0, d1, d2)[s0] -> (s0, d0 - d1, d2)>
|
||||
func @test_affine_min_rewrite(%lb : index, %ub: index,
|
||||
%step: index, %d : memref<?xindex>,
|
||||
%some_val: index) {
|
||||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%c2 = constant 2 : index
|
||||
%c3 = constant 3 : index
|
||||
%c4 = constant 4 : index
|
||||
scf.for %iv = %lb to %ub step %step {
|
||||
// Most common case: Rewrite min(%ub - %iv, %step) to %step.
|
||||
%m0 = affine.min #map0(%ub, %iv)[%step]
|
||||
memref.store %m0, %d[%c0] : memref<?xindex>
|
||||
|
||||
// Increase %ub - %iv a little bit, pattern should still apply.
|
||||
%m1 = affine.min #map1(%ub, %iv)[%step]
|
||||
memref.store %m1, %d[%c1] : memref<?xindex>
|
||||
|
||||
// Rewrite min(%ub - %iv + 1, %step + 1) to %step + 1.
|
||||
%m2 = affine.min #map2(%ub, %iv)[%step]
|
||||
memref.store %m2, %d[%c2] : memref<?xindex>
|
||||
|
||||
// min(%ub - %iv - 1, %step) cannot be simplified because %ub - %iv - 1
|
||||
// can be smaller than %step. (Can be simplified in if-statement.)
|
||||
%m3 = affine.min #map3(%ub, %iv)[%step]
|
||||
memref.store %m3, %d[%c3] : memref<?xindex>
|
||||
|
||||
// min(%ub - %iv, %step, %some_val) cannot be simplified because the range
|
||||
// of %some_val is unknown.
|
||||
%m4 = affine.min #map4(%ub, %iv, %some_val)[%step]
|
||||
memref.store %m4, %d[%c4] : memref<?xindex>
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -1479,6 +1479,7 @@ cc_library(
|
|||
includes = ["include"],
|
||||
deps = [
|
||||
":Affine",
|
||||
":Analysis",
|
||||
":DialectUtils",
|
||||
":IR",
|
||||
":MemRefDialect",
|
||||
|
|
Loading…
Reference in New Issue