forked from OSchip/llvm-project
Extend loop unrolling and unroll-jamming to non-matching bound operands and
multi-result upper bounds, complete TODOs, fix/improve test cases. - complete TODOs for loop unroll/unroll-and-jam. Something as simple as "for %i = 0 to %N" wasn't being unrolled earlier (unless it had been written as "for %i = ()[s0] -> (0)()[%N] to %N"; addressed now. - update/replace getTripCountExpr with buildTripCountMapAndOperands; makes it more powerful as it composes inputs into it - getCleanupLowerBound and getUnrolledLoopUpperBound actually needed the same code; refactor and remove one. - reorganize test cases, write previous ones better; most of these changes are "label replacements". - fix wrongly labeled test cases in unroll-jam.mlir PiperOrigin-RevId: 238014653
This commit is contained in:
parent
9abea4a466
commit
075090f891
|
@ -36,10 +36,17 @@ class Instruction;
|
|||
class MemRefType;
|
||||
class Value;
|
||||
|
||||
/// Returns the trip count of the loop as an affine expression if the latter is
|
||||
/// expressible as an affine expression, and nullptr otherwise. The trip count
|
||||
/// expression is simplified before returning.
|
||||
AffineExpr getTripCountExpr(ConstOpPointer<AffineForOp> forOp);
|
||||
/// Returns the trip count of the loop as an affine map with its corresponding
|
||||
/// operands if the latter is expressible as an affine expression, and nullptr
|
||||
/// otherwise. This method always succeeds as long as the lower bound is not a
|
||||
/// multi-result map. The trip count expression is simplified before returning.
|
||||
/// This method only utilizes map composition to construct lower and upper
|
||||
/// bounds before computing the trip count expressions
|
||||
// TODO(mlir-team): this should be moved into 'Transforms/' and be replaced by a
|
||||
// pure analysis method relying on FlatAffineConstraints
|
||||
void buildTripCountMapAndOperands(ConstOpPointer<AffineForOp> forOp,
|
||||
AffineMap *map,
|
||||
SmallVectorImpl<Value *> *operands);
|
||||
|
||||
/// Returns the trip count of the loop if it's a constant, None otherwise. This
|
||||
/// uses affine expression analysis and is able to determine constant trip count
|
||||
|
|
|
@ -34,6 +34,7 @@ template <typename T> class ConstOpPointer;
|
|||
class Function;
|
||||
class FuncBuilder;
|
||||
template <typename T> class OpPointer;
|
||||
class Value;
|
||||
|
||||
/// Unrolls this for instruction completely if the trip count is known to be
|
||||
/// constant. Returns failure otherwise.
|
||||
|
@ -66,16 +67,15 @@ LogicalResult promoteIfSingleIteration(OpPointer<AffineForOp> forOp);
|
|||
/// their body into the containing Block.
|
||||
void promoteSingleIterationLoops(Function *f);
|
||||
|
||||
/// Returns the lower bound of the cleanup loop when unrolling a loop
|
||||
/// with the specified unroll factor.
|
||||
AffineMap getCleanupLoopLowerBound(ConstOpPointer<AffineForOp> forOp,
|
||||
unsigned unrollFactor, FuncBuilder *builder);
|
||||
|
||||
/// Returns the upper bound of an unrolled loop when unrolling with
|
||||
/// the specified trip count, stride, and unroll factor.
|
||||
AffineMap getUnrolledLoopUpperBound(ConstOpPointer<AffineForOp> forOp,
|
||||
unsigned unrollFactor,
|
||||
FuncBuilder *builder);
|
||||
/// Computes the cleanup loop lower bound of the loop being unrolled with
|
||||
/// the specified unroll factor; this bound will also be upper bound of the main
|
||||
/// part of the unrolled loop. Computes the bound as an AffineMap with its
|
||||
/// operands or a null map when the trip count can't be expressed as an affine
|
||||
/// expression.
|
||||
void getCleanupLoopLowerBound(ConstOpPointer<AffineForOp> forOp,
|
||||
unsigned unrollFactor, AffineMap *map,
|
||||
SmallVectorImpl<Value *> *operands,
|
||||
FuncBuilder *builder);
|
||||
|
||||
/// Skew the instructions in the body of a 'for' instruction with the specified
|
||||
/// instruction-wise shifts. The shifts are with respect to the original
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "mlir/Analysis/AffineStructures.h"
|
||||
#include "mlir/Analysis/NestedMatcher.h"
|
||||
#include "mlir/Analysis/VectorAnalysis.h"
|
||||
#include "mlir/IR/AffineMap.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/Instruction.h"
|
||||
#include "mlir/StandardOps/Ops.h"
|
||||
|
@ -41,88 +42,141 @@ using namespace mlir;
|
|||
|
||||
/// Returns the trip count of the loop as an affine expression if the latter is
|
||||
/// expressible as an affine expression, and nullptr otherwise. The trip count
|
||||
/// expression is simplified before returning.
|
||||
AffineExpr mlir::getTripCountExpr(ConstOpPointer<AffineForOp> forOp) {
|
||||
// upper_bound - lower_bound
|
||||
/// expression is simplified before returning. This method only utilizes map
|
||||
/// composition to construct lower and upper bounds before computing the trip
|
||||
/// count expressions.
|
||||
// TODO(mlir-team): this should be moved into 'Transforms/' and be replaced by a
|
||||
// pure analysis method relying on FlatAffineConstraints; the latter will also
|
||||
// be more powerful (since both inequalities and equalities will be considered).
|
||||
void mlir::buildTripCountMapAndOperands(
|
||||
ConstOpPointer<AffineForOp> forOp, AffineMap *map,
|
||||
SmallVectorImpl<Value *> *tripCountOperands) {
|
||||
int64_t loopSpan;
|
||||
|
||||
int64_t step = forOp->getStep();
|
||||
auto *context = forOp->getInstruction()->getContext();
|
||||
|
||||
// We need to get operands; we aren't changing them here.
|
||||
auto ncForOp = *reinterpret_cast<OpPointer<AffineForOp> *>(&forOp);
|
||||
|
||||
FuncBuilder b(ncForOp->getInstruction());
|
||||
|
||||
if (forOp->hasConstantBounds()) {
|
||||
int64_t lb = forOp->getConstantLowerBound();
|
||||
int64_t ub = forOp->getConstantUpperBound();
|
||||
loopSpan = ub - lb;
|
||||
} else {
|
||||
auto lbMap = forOp->getLowerBoundMap();
|
||||
auto ubMap = forOp->getUpperBoundMap();
|
||||
// TODO(bondhugula): handle max/min of multiple expressions.
|
||||
if (lbMap.getNumResults() != 1 || ubMap.getNumResults() != 1)
|
||||
return nullptr;
|
||||
|
||||
// TODO(bondhugula): handle bounds with different operands.
|
||||
// Bounds have different operands, unhandled for now.
|
||||
if (!forOp->matchingBoundOperandList())
|
||||
return nullptr;
|
||||
|
||||
// ub_expr - lb_expr
|
||||
AffineExpr lbExpr(lbMap.getResult(0));
|
||||
AffineExpr ubExpr(ubMap.getResult(0));
|
||||
auto loopSpanExpr = simplifyAffineExpr(
|
||||
ubExpr - lbExpr, std::max(lbMap.getNumDims(), ubMap.getNumDims()),
|
||||
std::max(lbMap.getNumSymbols(), ubMap.getNumSymbols()));
|
||||
auto cExpr = loopSpanExpr.dyn_cast<AffineConstantExpr>();
|
||||
if (!cExpr)
|
||||
return loopSpanExpr.ceilDiv(step);
|
||||
loopSpan = cExpr.getValue();
|
||||
if (loopSpan < 0)
|
||||
loopSpan = 0;
|
||||
*map = b.getConstantAffineMap(ceilDiv(loopSpan, step));
|
||||
tripCountOperands->clear();
|
||||
return;
|
||||
}
|
||||
auto lbMap = forOp->getLowerBoundMap();
|
||||
auto ubMap = forOp->getUpperBoundMap();
|
||||
if (lbMap.getNumResults() != 1) {
|
||||
*map = AffineMap();
|
||||
return;
|
||||
}
|
||||
SmallVector<Value *, 4> lbOperands(ncForOp->getLowerBoundOperands());
|
||||
SmallVector<Value *, 4> ubOperands(ncForOp->getUpperBoundOperands());
|
||||
auto lb = b.create<AffineApplyOp>(forOp->getLoc(), lbMap, lbOperands);
|
||||
SmallVector<Value *, 4> ubs;
|
||||
ubs.reserve(ubMap.getNumResults());
|
||||
for (auto ubExpr : ubMap.getResults())
|
||||
ubs.push_back(b.create<AffineApplyOp>(
|
||||
forOp->getLoc(),
|
||||
b.getAffineMap(ubMap.getNumDims(), ubMap.getNumSymbols(), {ubExpr}, {}),
|
||||
ubOperands));
|
||||
|
||||
// 0 iteration loops.
|
||||
if (loopSpan < 0)
|
||||
return 0;
|
||||
tripCountOperands->clear();
|
||||
tripCountOperands->reserve(1 + ubs.size());
|
||||
tripCountOperands->push_back(lb);
|
||||
tripCountOperands->append(ubs.begin(), ubs.end());
|
||||
|
||||
return getAffineConstantExpr(static_cast<uint64_t>(ceilDiv(loopSpan, step)),
|
||||
context);
|
||||
SmallVector<AffineExpr, 4> tripCountExprs(ubs.size());
|
||||
for (unsigned i = 0, e = ubs.size(); i < e; i++)
|
||||
tripCountExprs[i] =
|
||||
(b.getAffineDimExpr(1 + i) - b.getAffineDimExpr(0)).ceilDiv(step);
|
||||
*map = b.getAffineMap(1 + ubs.size(), 0, tripCountExprs, {});
|
||||
forOp->getInstruction()->getFunction()->dump();
|
||||
fullyComposeAffineMapAndOperands(map, tripCountOperands);
|
||||
*map = simplifyAffineMap(*map);
|
||||
canonicalizeMapAndOperands(map, tripCountOperands);
|
||||
// Remove any affine.apply's that became dead as a result of composition,
|
||||
// simplification, and canonicalization above.
|
||||
for (auto *v : ubs)
|
||||
if (v->use_empty())
|
||||
v->getDefiningInst()->erase();
|
||||
if (lb->use_empty())
|
||||
lb->erase();
|
||||
}
|
||||
|
||||
/// Returns the trip count of the loop if it's a constant, None otherwise. This
|
||||
/// method uses affine expression analysis (in turn using getTripCount) and is
|
||||
/// able to determine constant trip count in non-trivial cases.
|
||||
// FIXME(mlir-team): this is really relying on buildTripCountMapAndOperands;
|
||||
// being an analysis utility, it shouldn't. Replace with a version that just
|
||||
// works with analysis structures (FlatAffineConstraints) and thus doesn't
|
||||
// update the IR.
|
||||
llvm::Optional<uint64_t>
|
||||
mlir::getConstantTripCount(ConstOpPointer<AffineForOp> forOp) {
|
||||
auto tripCountExpr = getTripCountExpr(forOp);
|
||||
SmallVector<Value *, 4> operands;
|
||||
AffineMap map;
|
||||
buildTripCountMapAndOperands(forOp, &map, &operands);
|
||||
|
||||
if (!tripCountExpr)
|
||||
if (!map)
|
||||
return None;
|
||||
|
||||
if (auto constExpr = tripCountExpr.dyn_cast<AffineConstantExpr>())
|
||||
return constExpr.getValue();
|
||||
|
||||
return None;
|
||||
// Take the min if all trip counts are constant.
|
||||
Optional<uint64_t> tripCount;
|
||||
for (auto resultExpr : map.getResults()) {
|
||||
if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
|
||||
if (tripCount.hasValue())
|
||||
tripCount = std::min(tripCount.getValue(),
|
||||
static_cast<uint64_t>(constExpr.getValue()));
|
||||
else
|
||||
tripCount = constExpr.getValue();
|
||||
} else
|
||||
return None;
|
||||
}
|
||||
return tripCount;
|
||||
}
|
||||
|
||||
/// Returns the greatest known integral divisor of the trip count. Affine
|
||||
/// expression analysis is used (indirectly through getTripCount), and
|
||||
/// this method is thus able to determine non-trivial divisors.
|
||||
uint64_t mlir::getLargestDivisorOfTripCount(ConstOpPointer<AffineForOp> forOp) {
|
||||
auto tripCountExpr = getTripCountExpr(forOp);
|
||||
SmallVector<Value *, 4> operands;
|
||||
AffineMap map;
|
||||
buildTripCountMapAndOperands(forOp, &map, &operands);
|
||||
|
||||
if (!tripCountExpr)
|
||||
if (!map)
|
||||
return 1;
|
||||
|
||||
if (auto constExpr = tripCountExpr.dyn_cast<AffineConstantExpr>()) {
|
||||
uint64_t tripCount = constExpr.getValue();
|
||||
|
||||
// 0 iteration loops (greatest divisor is 2^64 - 1).
|
||||
if (tripCount == 0)
|
||||
return ULONG_MAX;
|
||||
|
||||
// The greatest divisor is the trip count.
|
||||
return tripCount;
|
||||
// The largest divisor of the trip count is the GCD of the individual largest
|
||||
// divisors.
|
||||
assert(map.getNumResults() >= 1 && "expected one or more results");
|
||||
Optional<uint64_t> gcd;
|
||||
for (auto resultExpr : map.getResults()) {
|
||||
uint64_t thisGcd;
|
||||
if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
|
||||
uint64_t tripCount = constExpr.getValue();
|
||||
// 0 iteration loops (greatest divisor is 2^64 - 1).
|
||||
if (tripCount == 0)
|
||||
thisGcd = std::numeric_limits<uint64_t>::max();
|
||||
else
|
||||
// The greatest divisor is the trip count.
|
||||
thisGcd = tripCount;
|
||||
} else {
|
||||
// Trip count is not a known constant; return its largest known divisor.
|
||||
thisGcd = resultExpr.getLargestKnownDivisor();
|
||||
}
|
||||
if (gcd.hasValue())
|
||||
gcd = llvm::GreatestCommonDivisor64(gcd.getValue(), thisGcd);
|
||||
else
|
||||
gcd = thisGcd;
|
||||
}
|
||||
|
||||
// Trip count is not a known constant; return its largest known divisor.
|
||||
return tripCountExpr.getLargestKnownDivisor();
|
||||
assert(gcd.hasValue() && "value expected per above logic");
|
||||
return gcd.getValue();
|
||||
}
|
||||
|
||||
bool mlir::isAccessInvariant(const Value &iv, const Value &index) {
|
||||
|
|
|
@ -152,32 +152,23 @@ LogicalResult mlir::loopUnrollJamByFactor(OpPointer<AffineForOp> forOp,
|
|||
|
||||
assert(unrollJamFactor >= 1 && "unroll jam factor should be >= 1");
|
||||
|
||||
if (unrollJamFactor == 1 || forOp->getBody()->empty())
|
||||
if (unrollJamFactor == 1)
|
||||
return promoteIfSingleIteration(forOp);
|
||||
|
||||
if (forOp->getBody()->empty())
|
||||
return failure();
|
||||
|
||||
// Loops where both lower and upper bounds are multi-result maps won't be
|
||||
// unrolled (since the trip can't be expressed as an affine function in
|
||||
// general).
|
||||
// TODO(mlir-team): this may not be common, but we could support the case
|
||||
// where the lower bound is a multi-result map and the ub is a single result
|
||||
// one.
|
||||
if (forOp->getLowerBoundMap().getNumResults() != 1)
|
||||
return failure();
|
||||
|
||||
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
|
||||
|
||||
if (!mayBeConstantTripCount.hasValue() &&
|
||||
getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0)
|
||||
return failure();
|
||||
|
||||
auto lbMap = forOp->getLowerBoundMap();
|
||||
auto ubMap = forOp->getUpperBoundMap();
|
||||
|
||||
// Loops with max/min expressions won't be unrolled here (the output can't be
|
||||
// expressed as a Function in the general case). However, the right way to
|
||||
// do such unrolling for a Function would be to specialize the loop for the
|
||||
// 'hotspot' case and unroll that hotspot.
|
||||
if (lbMap.getNumResults() != 1 || ubMap.getNumResults() != 1)
|
||||
return failure();
|
||||
|
||||
// Same operand list for lower and upper bound for now.
|
||||
// TODO(bondhugula): handle bounds with different sets of operands.
|
||||
if (!forOp->matchingBoundOperandList())
|
||||
return failure();
|
||||
|
||||
// If the trip count is lower than the unroll jam factor, no unroll jam.
|
||||
// TODO(bondhugula): option to specify cleanup loop unrolling.
|
||||
if (mayBeConstantTripCount.hasValue() &&
|
||||
mayBeConstantTripCount.getValue() < unrollJamFactor)
|
||||
return failure();
|
||||
|
@ -191,21 +182,25 @@ LogicalResult mlir::loopUnrollJamByFactor(OpPointer<AffineForOp> forOp,
|
|||
|
||||
// Generate the cleanup loop if trip count isn't a multiple of
|
||||
// unrollJamFactor.
|
||||
if (mayBeConstantTripCount.hasValue() &&
|
||||
mayBeConstantTripCount.getValue() % unrollJamFactor != 0) {
|
||||
if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0) {
|
||||
// Insert the cleanup loop right after 'forOp'.
|
||||
FuncBuilder builder(forInst->getBlock(),
|
||||
std::next(Block::iterator(forInst)));
|
||||
auto cleanupAffineForOp = builder.clone(*forInst)->cast<AffineForOp>();
|
||||
cleanupAffineForOp->setLowerBoundMap(
|
||||
getCleanupLoopLowerBound(forOp, unrollJamFactor, &builder));
|
||||
// Adjust the lower bound of the cleanup loop; its upper bound is the same
|
||||
// as the original loop's upper bound.
|
||||
AffineMap cleanupMap;
|
||||
SmallVector<Value *, 4> cleanupOperands;
|
||||
getCleanupLoopLowerBound(forOp, unrollJamFactor, &cleanupMap,
|
||||
&cleanupOperands, &builder);
|
||||
cleanupAffineForOp->setLowerBound(cleanupOperands, cleanupMap);
|
||||
|
||||
// The upper bound needs to be adjusted.
|
||||
forOp->setUpperBoundMap(
|
||||
getUnrolledLoopUpperBound(forOp, unrollJamFactor, &builder));
|
||||
|
||||
// Promote the loop body up if this has turned into a single iteration loop.
|
||||
// Promote the cleanup loop if it has turned into a single iteration loop.
|
||||
promoteIfSingleIteration(cleanupAffineForOp);
|
||||
|
||||
// Adjust the upper bound of the original loop - it will be the same as the
|
||||
// cleanup loop's lower bound. Its lower bound remains unchanged.
|
||||
forOp->setUpperBound(cleanupOperands, cleanupMap);
|
||||
}
|
||||
|
||||
// Scale the step of loop being unroll-jammed by the unroll-jam factor.
|
||||
|
|
|
@ -38,54 +38,78 @@
|
|||
|
||||
using namespace mlir;
|
||||
|
||||
/// Returns the upper bound of an unrolled loop with lower bound 'lb' and with
|
||||
/// the specified trip count, stride, and unroll factor. Returns nullptr when
|
||||
/// the trip count can't be expressed as an affine expression.
|
||||
AffineMap mlir::getUnrolledLoopUpperBound(ConstOpPointer<AffineForOp> forOp,
|
||||
unsigned unrollFactor,
|
||||
FuncBuilder *builder) {
|
||||
/// Computes the cleanup loop lower bound of the loop being unrolled with
|
||||
/// the specified unroll factor; this bound will also be upper bound of the main
|
||||
/// part of the unrolled loop. Computes the bound as an AffineMap with its
|
||||
/// operands or a null map when the trip count can't be expressed as an affine
|
||||
/// expression.
|
||||
void mlir::getCleanupLoopLowerBound(ConstOpPointer<AffineForOp> forOp,
|
||||
unsigned unrollFactor, AffineMap *map,
|
||||
SmallVectorImpl<Value *> *operands,
|
||||
FuncBuilder *b) {
|
||||
auto lbMap = forOp->getLowerBoundMap();
|
||||
|
||||
// Single result lower bound map only.
|
||||
if (lbMap.getNumResults() != 1)
|
||||
return AffineMap();
|
||||
if (lbMap.getNumResults() != 1) {
|
||||
*map = AffineMap();
|
||||
return;
|
||||
}
|
||||
|
||||
// Sometimes, the trip count cannot be expressed as an affine expression.
|
||||
auto tripCount = getTripCountExpr(forOp);
|
||||
if (!tripCount)
|
||||
return AffineMap();
|
||||
|
||||
AffineExpr lb(lbMap.getResult(0));
|
||||
unsigned step = forOp->getStep();
|
||||
auto newUb = lb + (tripCount - tripCount % unrollFactor - 1) * step;
|
||||
|
||||
return builder->getAffineMap(lbMap.getNumDims(), lbMap.getNumSymbols(),
|
||||
{newUb}, {});
|
||||
}
|
||||
|
||||
/// Returns the lower bound of the cleanup loop when unrolling a loop with lower
|
||||
/// bound 'lb' and with the specified trip count, stride, and unroll factor.
|
||||
/// Returns an AffinMap with nullptr storage (that evaluates to false)
|
||||
/// when the trip count can't be expressed as an affine expression.
|
||||
AffineMap mlir::getCleanupLoopLowerBound(ConstOpPointer<AffineForOp> forOp,
|
||||
unsigned unrollFactor,
|
||||
FuncBuilder *builder) {
|
||||
auto lbMap = forOp->getLowerBoundMap();
|
||||
|
||||
// Single result lower bound map only.
|
||||
if (lbMap.getNumResults() != 1)
|
||||
return AffineMap();
|
||||
AffineMap tripCountMap;
|
||||
SmallVector<Value *, 4> tripCountOperands;
|
||||
buildTripCountMapAndOperands(forOp, &tripCountMap, &tripCountOperands);
|
||||
|
||||
// Sometimes the trip count cannot be expressed as an affine expression.
|
||||
AffineExpr tripCount(getTripCountExpr(forOp));
|
||||
if (!tripCount)
|
||||
return AffineMap();
|
||||
if (!tripCountMap) {
|
||||
*map = AffineMap();
|
||||
return;
|
||||
}
|
||||
|
||||
AffineExpr lb(lbMap.getResult(0));
|
||||
unsigned step = forOp->getStep();
|
||||
auto newLb = lb + (tripCount - tripCount % unrollFactor) * step;
|
||||
return builder->getAffineMap(lbMap.getNumDims(), lbMap.getNumSymbols(),
|
||||
{newLb}, {});
|
||||
|
||||
// We need to get non-const operands; we aren't changing them here.
|
||||
auto ncForOp = *reinterpret_cast<OpPointer<AffineForOp> *>(&forOp);
|
||||
|
||||
SmallVector<Value *, 4> lbOperands(ncForOp->getLowerBoundOperands());
|
||||
auto lb = b->create<AffineApplyOp>(ncForOp->getLoc(), lbMap, lbOperands);
|
||||
|
||||
// For each upper bound expr, get the range.
|
||||
// Eg: for %i = lb to min (ub1, ub2),
|
||||
// where tripCountExprs yield (tr1, tr2), we create affine.apply's:
|
||||
// lb + tr1 - tr1 % ufactor, lb + tr2 - tr2 % ufactor; the results of all
|
||||
// these affine.apply's make up the cleanup loop lower bound.
|
||||
SmallVector<AffineExpr, 4> bumpExprs(tripCountMap.getNumResults());
|
||||
SmallVector<Value *, 4> bumpValues(tripCountMap.getNumResults());
|
||||
for (unsigned i = 0, e = tripCountMap.getNumResults(); i < e; i++) {
|
||||
auto tripCountExpr = tripCountMap.getResult(i);
|
||||
bumpExprs[i] = (tripCountExpr - tripCountExpr % unrollFactor) * step;
|
||||
auto bumpMap =
|
||||
b->getAffineMap(tripCountMap.getNumDims(), tripCountMap.getNumSymbols(),
|
||||
bumpExprs[i], {});
|
||||
bumpValues[i] =
|
||||
b->create<AffineApplyOp>(forOp->getLoc(), bumpMap, tripCountOperands);
|
||||
}
|
||||
|
||||
SmallVector<AffineExpr, 4> newUbExprs(tripCountMap.getNumResults());
|
||||
for (unsigned i = 0, e = bumpExprs.size(); i < e; i++)
|
||||
newUbExprs[i] = b->getAffineDimExpr(0) + b->getAffineDimExpr(i + 1);
|
||||
|
||||
operands->clear();
|
||||
operands->push_back(lb);
|
||||
operands->append(bumpValues.begin(), bumpValues.end());
|
||||
*map = b->getAffineMap(1 + tripCountMap.getNumResults(), 0, newUbExprs, {});
|
||||
// Simplify the map + operands.
|
||||
fullyComposeAffineMapAndOperands(map, operands);
|
||||
*map = simplifyAffineMap(*map);
|
||||
canonicalizeMapAndOperands(map, operands);
|
||||
// Remove any affine.apply's that became dead from the simplification above.
|
||||
for (auto *v : bumpValues) {
|
||||
if (v->use_empty()) {
|
||||
v->getDefiningInst()->erase();
|
||||
}
|
||||
}
|
||||
if (lb->use_empty())
|
||||
lb->erase();
|
||||
}
|
||||
|
||||
/// Promotes the loop body of a forOp to its containing block if the forOp
|
||||
|
@ -369,25 +393,17 @@ LogicalResult mlir::loopUnrollByFactor(OpPointer<AffineForOp> forOp,
|
|||
if (forOp->getBody()->empty())
|
||||
return failure();
|
||||
|
||||
auto lbMap = forOp->getLowerBoundMap();
|
||||
auto ubMap = forOp->getUpperBoundMap();
|
||||
|
||||
// Loops with max/min expressions won't be unrolled here (the output can't be
|
||||
// expressed as a Function in the general case). However, the right way to
|
||||
// do such unrolling for a Function would be to specialize the loop for the
|
||||
// 'hotspot' case and unroll that hotspot.
|
||||
if (lbMap.getNumResults() != 1 || ubMap.getNumResults() != 1)
|
||||
// Loops where the lower bound is a max expression isn't supported for
|
||||
// unrolling since the trip count can be expressed as an affine function when
|
||||
// both the lower bound and the upper bound are multi-result maps. However,
|
||||
// one meaningful way to do such unrolling would be to specialize the loop for
|
||||
// the 'hotspot' case and unroll that hotspot.
|
||||
if (forOp->getLowerBoundMap().getNumResults() != 1)
|
||||
return failure();
|
||||
|
||||
// Same operand list for lower and upper bound for now.
|
||||
// TODO(bondhugula): handle bounds with different operand lists.
|
||||
if (!forOp->matchingBoundOperandList())
|
||||
return failure();
|
||||
|
||||
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
|
||||
|
||||
// If the trip count is lower than the unroll factor, no unrolled body.
|
||||
// TODO(bondhugula): option to specify cleanup loop unrolling.
|
||||
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
|
||||
if (mayBeConstantTripCount.hasValue() &&
|
||||
mayBeConstantTripCount.getValue() < unrollFactor)
|
||||
return failure();
|
||||
|
@ -397,21 +413,20 @@ LogicalResult mlir::loopUnrollByFactor(OpPointer<AffineForOp> forOp,
|
|||
if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
|
||||
FuncBuilder builder(forInst->getBlock(), ++Block::iterator(forInst));
|
||||
auto cleanupForInst = builder.clone(*forInst)->cast<AffineForOp>();
|
||||
auto clLbMap = getCleanupLoopLowerBound(forOp, unrollFactor, &builder);
|
||||
assert(clLbMap &&
|
||||
"cleanup loop lower bound map for single result bound maps can "
|
||||
"always be determined");
|
||||
cleanupForInst->setLowerBoundMap(clLbMap);
|
||||
AffineMap cleanupMap;
|
||||
SmallVector<Value *, 4> cleanupOperands;
|
||||
getCleanupLoopLowerBound(forOp, unrollFactor, &cleanupMap, &cleanupOperands,
|
||||
&builder);
|
||||
assert(cleanupMap &&
|
||||
"cleanup loop lower bound map for single result lower bound maps "
|
||||
"can always be determined");
|
||||
cleanupForInst->setLowerBound(cleanupOperands, cleanupMap);
|
||||
// Promote the loop body up if this has turned into a single iteration loop.
|
||||
promoteIfSingleIteration(cleanupForInst);
|
||||
|
||||
// Adjust upper bound.
|
||||
auto unrolledUbMap =
|
||||
getUnrolledLoopUpperBound(forOp, unrollFactor, &builder);
|
||||
assert(unrolledUbMap &&
|
||||
"upper bound map can alwayys be determined for an unrolled loop "
|
||||
"with single result bounds");
|
||||
forOp->setUpperBoundMap(unrolledUbMap);
|
||||
// Adjust upper bound of the original loop; this is the same as the lower
|
||||
// bound of the cleanup loop.
|
||||
forOp->setUpperBound(cleanupOperands, cleanupMap);
|
||||
}
|
||||
|
||||
// Scale the step of loop being unrolled by unroll factor.
|
||||
|
|
|
@ -1,20 +1,21 @@
|
|||
// RUN: mlir-opt %s -loop-unroll-jam -unroll-jam-factor=2 | FileCheck %s
|
||||
|
||||
// CHECK: [[MAP_PLUS_1:#map[0-9]+]] = (d0) -> (d0 + 1)
|
||||
// This should be matched to M1, but M1 is defined later.
|
||||
// CHECK: {{#map[0-9]+}} = ()[s0] -> (s0 + 8)
|
||||
// CHECK-DAG: [[MAP_PLUS_1:#map[0-9]+]] = (d0) -> (d0 + 1)
|
||||
// CHECK-DAG: [[M1:#map[0-9]+]] = ()[s0] -> (s0 + 8)
|
||||
// CHECK-DAG: [[MAP_DIV_OFFSET:#map[0-9]+]] = ()[s0] -> (((s0 - 1) floordiv 2) * 2 + 1)
|
||||
// CHECK-DAG: [[MAP_MULTI_RES:#map[0-9]+]] = ()[s0, s1] -> ((s0 floordiv 2) * 2, (s1 floordiv 2) * 2, 1024)
|
||||
|
||||
// CHECK-LABEL: func @unroll_jam_imperfect_nest() {
|
||||
func @unroll_jam_imperfect_nest() {
|
||||
// CHECK: %c100 = constant 100 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 99 step 2 {
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 101 {
|
||||
// CHECK: %0 = "addi32"(%i0, %i0) : (index, index) -> i32
|
||||
// CHECK-NEXT: %1 = affine.apply [[MAP_PLUS_1]](%i0)
|
||||
// CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
%x = "addi32"(%i, %i) : (index, index) -> i32
|
||||
for %j = 0 to 17 {
|
||||
// CHECK: %3 = "addi32"(%i0, %i0) : (index, index) -> i32
|
||||
// CHECK: %3 = "addi32"(%i0, %i0) : (index, index) -> i32
|
||||
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_1]](%i0)
|
||||
// CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
|
@ -30,31 +31,28 @@ func @unroll_jam_imperfect_nest() {
|
|||
// cleanup loop (single iteration)
|
||||
// CHECK: %11 = "addi32"(%c100, %c100) : (index, index) -> i32
|
||||
// CHECK-NEXT: for %i2 = 0 to 17 {
|
||||
// CHECK-NEXT: %12 = "addi32"(%c100, %c100) : (index, index) -> i32
|
||||
// CHECK-NEXT: %13 = "addi32"(%12, %12) : (i32, i32) -> i32
|
||||
// CHECK-NEXT: %12 = "addi32"(%c100, %c100) : (index, index) -> i32
|
||||
// CHECK-NEXT: %13 = "addi32"(%12, %12) : (i32, i32) -> i32
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: %14 = "addi32"(%c100, %c100) : (index, index) -> i32
|
||||
return
|
||||
}
|
||||
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_unknown_count_1(%arg0: index) {
|
||||
// CHECK-LABEL: func @loop_nest_unknown_count_1(%arg0: index) {
|
||||
func @loop_nest_unknown_count_1(%N : index) {
|
||||
// UNROLL-BY-4-NEXT: for %i0 = 1 to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4-NEXT: for %i1 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// CHECK-NEXT: for %i0 = 1 to [[MAP_DIV_OFFSET]]()[%arg0] step 2 {
|
||||
// CHECK-NEXT: for %i1 = 1 to 100 {
|
||||
// CHECK-NEXT: %0 = "foo"() : () -> i32
|
||||
// CHECK-NEXT: %1 = "foo"() : () -> i32
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// A cleanup loop should be generated here.
|
||||
// UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
|
||||
// UNROLL-BY-4-NEXT: for %i3 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4_NEXT: }
|
||||
// UNROLL-BY-4_NEXT: }
|
||||
// Specify the lower bound in a form so that both lb and ub operands match.
|
||||
for %i = ()[s0] -> (1)()[%N] to %N {
|
||||
// CHECK-NEXT: for %i2 = [[MAP_DIV_OFFSET]]()[%arg0] to %arg0 {
|
||||
// CHECK-NEXT: for %i3 = 1 to 100 {
|
||||
// CHECK-NEXT: %2 = "foo"() : () -> i32
|
||||
// CHECK_NEXT: }
|
||||
// CHECK_NEXT: }
|
||||
for %i = 1 to %N {
|
||||
for %j = 1 to 100 {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
|
@ -62,29 +60,47 @@ func @loop_nest_unknown_count_1(%N : index) {
|
|||
return
|
||||
}
|
||||
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_unknown_count_2(%arg0: index) {
|
||||
// CHECK-LABEL: func @loop_nest_unknown_count_2(%arg0: index) {
|
||||
func @loop_nest_unknown_count_2(%arg : index) {
|
||||
// UNROLL-BY-4-NEXT: for %i0 = %arg0 to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4-NEXT: for %i1 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"(%i0) : (index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"(%1) : (index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %3 = affine.apply #map{{[0-9]+}}(%i0)
|
||||
// UNROLL-BY-4-NEXT: %4 = "foo"(%3) : (index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]+}}(%i0)
|
||||
// UNROLL-BY-4-NEXT: %6 = "foo"(%5) : (index) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// CHECK-NEXT: for %i0 = %arg0 to [[M1]]()[%arg0] step 2 {
|
||||
// CHECK-NEXT: for %i1 = 1 to 100 {
|
||||
// CHECK-NEXT: %0 = "foo"(%i0) : (index) -> i32
|
||||
// CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
|
||||
// CHECK-NEXT: %2 = "foo"(%1) : (index) -> i32
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// The cleanup loop is a single iteration one and is promoted.
|
||||
// UNROLL-BY-4-NEXT: %7 = affine.apply [[M1:#map{{[0-9]+}}]]()[%arg0]
|
||||
// UNROLL-BY-4-NEXT: for %i3 = 1 to 100 {
|
||||
// UNROLL-BY-4-NEXT: %8 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4_NEXT: }
|
||||
// Specify the lower bound in a form so that both lb and ub operands match.
|
||||
for %i = ()[s0] -> (s0) ()[%arg] to ()[s0] -> (s0+8) ()[%arg] {
|
||||
// CHECK-NEXT: %3 = affine.apply [[M1]]()[%arg0]
|
||||
// CHECK-NEXT: for %i2 = 1 to 100 {
|
||||
// CHECK-NEXT: %4 = "foo"(%3) : (index) -> i32
|
||||
// CHECK_NEXT: }
|
||||
for %i = %arg to ()[s0] -> (s0+9) ()[%arg] {
|
||||
for %j = 1 to 100 {
|
||||
%x = "foo"(%i) : (index) -> i32
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @loop_nest_symbolic_and_min_upper_bound
|
||||
func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
|
||||
for %i = 0 to min ()[s0, s1] -> (s0, s1, 1024)()[%M, %N] {
|
||||
for %j = 0 to %K {
|
||||
"foo"(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
// CHECK-NEXT: for %i0 = 0 to min [[MAP_MULTI_RES]]()[%arg0, %arg1] step 2 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg2 {
|
||||
// CHECK-NEXT: "foo"(%i0, %i1) : (index, index) -> ()
|
||||
// CHECK-NEXT: %0 = affine.apply #map2(%i0)
|
||||
// CHECK-NEXT: "foo"(%0, %i1) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i2 = max [[MAP_MULTI_RES]]()[%arg0, %arg1] to min #map9()[%arg0, %arg1] {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg2 {
|
||||
// CHECK-NEXT: "foo"(%i2, %i3) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
|
|
|
@ -1,253 +1,244 @@
|
|||
// RUN: mlir-opt %s -loop-unroll -unroll-full | FileCheck %s
|
||||
// RUN: mlir-opt %s -loop-unroll -unroll-full | FileCheck %s --check-prefix UNROLL-FULL
|
||||
// RUN: mlir-opt %s -loop-unroll -unroll-full -unroll-full-threshold=2 | FileCheck %s --check-prefix SHORT
|
||||
// RUN: mlir-opt %s -loop-unroll -unroll-factor=4 | FileCheck %s --check-prefix UNROLL-BY-4
|
||||
// RUN: mlir-opt %s -loop-unroll -unroll-factor=1 | FileCheck %s --check-prefix UNROLL-BY-1
|
||||
|
||||
// CHECK: [[MAP0:#map[0-9]+]] = (d0) -> (d0 + 1)
|
||||
// CHECK: [[MAP1:#map[0-9]+]] = (d0) -> (d0 + 2)
|
||||
// CHECK: [[MAP2:#map[0-9]+]] = (d0) -> (d0 + 3)
|
||||
// CHECK: [[MAP3:#map[0-9]+]] = (d0) -> (d0 + 4)
|
||||
// CHECK: [[MAP4:#map[0-9]+]] = (d0, d1) -> (d0 + 1)
|
||||
// CHECK: [[MAP5:#map[0-9]+]] = (d0, d1) -> (d0 + 3)
|
||||
// CHECK: [[MAP6:#map[0-9]+]] = (d0)[s0] -> (d0 + s0 + 1)
|
||||
// CHECK: [[MAP7:#map[0-9]+]] = (d0) -> (d0 + 5)
|
||||
// CHECK: [[MAP8:#map[0-9]+]] = (d0) -> (d0 + 6)
|
||||
// CHECK: [[MAP9:#map[0-9]+]] = (d0) -> (d0 + 7)
|
||||
// CHECK: [[MAP10:#map[0-9]+]] = (d0, d1) -> (d0 * 16 + d1)
|
||||
// CHECK: [[MAP11:#map[0-9]+]] = (d0) -> (d0 + 8)
|
||||
// CHECK: [[MAP12:#map[0-9]+]] = (d0) -> (d0 + 9)
|
||||
// CHECK: [[MAP13:#map[0-9]+]] = (d0) -> (d0 + 10)
|
||||
// CHECK: [[MAP14:#map[0-9]+]] = (d0) -> (d0 + 15)
|
||||
// CHECK: [[MAP15:#map[0-9]+]] = (d0) -> (d0 + 20)
|
||||
// CHECK: [[MAP16:#map[0-9]+]] = (d0) -> (d0 + 25)
|
||||
// CHECK: [[MAP17:#map[0-9]+]] = (d0) -> (d0 + 30)
|
||||
// CHECK: [[MAP18:#map[0-9]+]] = (d0) -> (d0 + 35)
|
||||
// UNROLL-FULL-DAG: [[MAP0:#map[0-9]+]] = (d0) -> (d0 + 1)
|
||||
// UNROLL-FULL-DAG: [[MAP1:#map[0-9]+]] = (d0) -> (d0 + 2)
|
||||
// UNROLL-FULL-DAG: [[MAP2:#map[0-9]+]] = (d0) -> (d0 + 3)
|
||||
// UNROLL-FULL-DAG: [[MAP3:#map[0-9]+]] = (d0) -> (d0 + 4)
|
||||
// UNROLL-FULL-DAG: [[MAP4:#map[0-9]+]] = (d0, d1) -> (d0 + 1)
|
||||
// UNROLL-FULL-DAG: [[MAP5:#map[0-9]+]] = (d0, d1) -> (d0 + 3)
|
||||
// UNROLL-FULL-DAG: [[MAP6:#map[0-9]+]] = (d0)[s0] -> (d0 + s0 + 1)
|
||||
|
||||
// SHORT: [[MAP0:#map[0-9]+]] = (d0) -> (d0 + 1)
|
||||
// SHORT: [[MAP1:#map[0-9]+]] = (d0) -> (d0 + 2)
|
||||
// SHORT: [[MAP2:#map[0-9]+]] = (d0, d1) -> (d0 + 1)
|
||||
// SHORT: [[MAP3:#map[0-9]+]] = (d0, d1) -> (d0 + 3)
|
||||
// SHORT: [[MAP4:#map[0-9]+]] = (d0)[s0] -> (d0 + s0 + 1)
|
||||
// SHORT: [[MAP5:#map[0-9]+]] = (d0, d1) -> (d0 * 16 + d1)
|
||||
// SHORT-DAG: [[MAP0:#map[0-9]+]] = (d0) -> (d0 + 1)
|
||||
|
||||
// UNROLL-BY-4: [[MAP0:#map[0-9]+]] = (d0) -> (d0 + 1)
|
||||
// UNROLL-BY-4: [[MAP1:#map[0-9]+]] = (d0) -> (d0 + 2)
|
||||
// UNROLL-BY-4: [[MAP2:#map[0-9]+]] = (d0) -> (d0 + 3)
|
||||
// UNROLL-BY-4: [[MAP3:#map[0-9]+]] = (d0, d1) -> (d0 + 1)
|
||||
// UNROLL-BY-4: [[MAP4:#map[0-9]+]] = (d0, d1) -> (d0 + 3)
|
||||
// UNROLL-BY-4: [[MAP5:#map[0-9]+]] = (d0)[s0] -> (d0 + s0 + 1)
|
||||
// UNROLL-BY-4: [[MAP6:#map[0-9]+]] = (d0, d1) -> (d0 * 16 + d1)
|
||||
// UNROLL-BY-4: [[MAP7:#map[0-9]+]] = (d0) -> (d0 + 5)
|
||||
// UNROLL-BY-4: [[MAP8:#map[0-9]+]] = (d0) -> (d0 + 10)
|
||||
// UNROLL-BY-4: [[MAP9:#map[0-9]+]] = (d0) -> (d0 + 15)
|
||||
// UNROLL-BY-4: [[MAP10:#map[0-9]+]] = (d0) -> (0)
|
||||
// UNROLL-BY-4: [[MAP11:#map[0-9]+]] = (d0) -> (d0)
|
||||
// UNROLL-BY-4: [[MAP12:#map[0-9]+]] = ()[s0] -> (0)
|
||||
// UNROLL-BY-4-DAG: [[MAP0:#map[0-9]+]] = (d0) -> (d0 + 1)
|
||||
// UNROLL-BY-4-DAG: [[MAP1:#map[0-9]+]] = (d0) -> (d0 + 2)
|
||||
// UNROLL-BY-4-DAG: [[MAP2:#map[0-9]+]] = (d0) -> (d0 + 3)
|
||||
// UNROLL-BY-4-DAG: [[MAP3:#map[0-9]+]] = (d0, d1) -> (d0 + 1)
|
||||
// UNROLL-BY-4-DAG: [[MAP4:#map[0-9]+]] = (d0, d1) -> (d0 + 3)
|
||||
// UNROLL-BY-4-DAG: [[MAP5:#map[0-9]+]] = (d0)[s0] -> (d0 + s0 + 1)
|
||||
// UNROLL-BY-4-DAG: [[MAP6:#map[0-9]+]] = (d0, d1) -> (d0 * 16 + d1)
|
||||
// UNROLL-BY-4-DAG: [[MAP11:#map[0-9]+]] = (d0) -> (d0)
|
||||
// UNROLL-BY-4-DAG: [[MAP_TRIP_COUNT_MULTIPLE_FOUR:#map[0-9]+]] = ()[s0, s1, s2] -> (s0 + ((-s0 + s1) floordiv 4) * 4, s0 + ((-s0 + s2) floordiv 4) * 4, s0 + ((-s0 + 1024) floordiv 4) * 4)
|
||||
|
||||
// CHECK-LABEL: func @loop_nest_simplest() {
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_simplest() {
|
||||
func @loop_nest_simplest() {
|
||||
// CHECK: for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-FULL: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// CHECK: %c1_i32 = constant 1 : i32
|
||||
// CHECK-NEXT: %c1_i32_0 = constant 1 : i32
|
||||
// CHECK-NEXT: %c1_i32_1 = constant 1 : i32
|
||||
// CHECK-NEXT: %c1_i32_2 = constant 1 : i32
|
||||
// UNROLL-FULL: %c1_i32 = constant 1 : i32
|
||||
// UNROLL-FULL-NEXT: %c1_i32_0 = constant 1 : i32
|
||||
// UNROLL-FULL-NEXT: %c1_i32_1 = constant 1 : i32
|
||||
// UNROLL-FULL-NEXT: %c1_i32_2 = constant 1 : i32
|
||||
for %j = 0 to 4 {
|
||||
%x = constant 1 : i32
|
||||
}
|
||||
} // CHECK: }
|
||||
return // CHECK: return
|
||||
} // CHECK }
|
||||
} // UNROLL-FULL: }
|
||||
return // UNROLL-FULL: return
|
||||
} // UNROLL-FULL }
|
||||
|
||||
// CHECK-LABEL: func @loop_nest_simple_iv_use() {
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_simple_iv_use() {
|
||||
func @loop_nest_simple_iv_use() {
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-FULL: %c0 = constant 0 : index
|
||||
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// CHECK: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
|
||||
// CHECK: %1 = affine.apply [[MAP0]](%c0)
|
||||
// CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
// CHECK: %3 = affine.apply [[MAP1]](%c0)
|
||||
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> i32
|
||||
// CHECK: %5 = affine.apply [[MAP2]](%c0)
|
||||
// CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
// UNROLL-FULL: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
|
||||
// UNROLL-FULL: %1 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-FULL-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
// UNROLL-FULL: %3 = affine.apply [[MAP1]](%c0)
|
||||
// UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> i32
|
||||
// UNROLL-FULL: %5 = affine.apply [[MAP2]](%c0)
|
||||
// UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
for %j = 0 to 4 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
}
|
||||
} // CHECK: }
|
||||
return // CHECK: return
|
||||
} // CHECK }
|
||||
} // UNROLL-FULL: }
|
||||
return // UNROLL-FULL: return
|
||||
} // UNROLL-FULL }
|
||||
|
||||
// Operations in the loop body have results that are used therein.
|
||||
// CHECK-LABEL: func @loop_nest_body_def_use() {
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_body_def_use() {
|
||||
func @loop_nest_body_def_use() {
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-FULL: %c0 = constant 0 : index
|
||||
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// CHECK: %c0_0 = constant 0 : index
|
||||
// UNROLL-FULL: %c0_0 = constant 0 : index
|
||||
%c0 = constant 0 : index
|
||||
// CHECK: %0 = affine.apply [[MAP0]](%c0)
|
||||
// CHECK-NEXT: %1 = "addi32"(%0, %c0_0) : (index, index) -> index
|
||||
// CHECK-NEXT: %2 = affine.apply [[MAP0]](%c0)
|
||||
// CHECK-NEXT: %3 = affine.apply [[MAP0]](%2)
|
||||
// CHECK-NEXT: %4 = "addi32"(%3, %c0_0) : (index, index) -> index
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP1]](%c0)
|
||||
// CHECK-NEXT: %6 = affine.apply [[MAP0]](%5)
|
||||
// CHECK-NEXT: %7 = "addi32"(%6, %c0_0) : (index, index) -> index
|
||||
// CHECK-NEXT: %8 = affine.apply [[MAP2]](%c0)
|
||||
// CHECK-NEXT: %9 = affine.apply [[MAP0]](%8)
|
||||
// CHECK-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
|
||||
// UNROLL-FULL: %0 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-FULL-NEXT: %1 = "addi32"(%0, %c0_0) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %2 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-FULL-NEXT: %3 = affine.apply [[MAP0]](%2)
|
||||
// UNROLL-FULL-NEXT: %4 = "addi32"(%3, %c0_0) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %5 = affine.apply [[MAP1]](%c0)
|
||||
// UNROLL-FULL-NEXT: %6 = affine.apply [[MAP0]](%5)
|
||||
// UNROLL-FULL-NEXT: %7 = "addi32"(%6, %c0_0) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %8 = affine.apply [[MAP2]](%c0)
|
||||
// UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%8)
|
||||
// UNROLL-FULL-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
|
||||
for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %c0) : (index, index) -> index
|
||||
}
|
||||
} // CHECK: }
|
||||
return // CHECK: return
|
||||
} // CHECK }
|
||||
} // UNROLL-FULL: }
|
||||
return // UNROLL-FULL: return
|
||||
} // UNROLL-FULL }
|
||||
|
||||
// CHECK-LABEL: func @loop_nest_strided() {
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_strided() {
|
||||
func @loop_nest_strided() {
|
||||
// CHECK: %c2 = constant 2 : index
|
||||
// CHECK-NEXT: %c2_0 = constant 2 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 {
|
||||
// UNROLL-FULL: %c2 = constant 2 : index
|
||||
// UNROLL-FULL-NEXT: %c2_0 = constant 2 : index
|
||||
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// CHECK: %0 = affine.apply [[MAP0]](%c2_0)
|
||||
// CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// CHECK-NEXT: %2 = affine.apply [[MAP1]](%c2_0)
|
||||
// CHECK-NEXT: %3 = affine.apply [[MAP0]](%2)
|
||||
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
|
||||
// UNROLL-FULL: %0 = affine.apply [[MAP0]](%c2_0)
|
||||
// UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %2 = affine.apply [[MAP1]](%c2_0)
|
||||
// UNROLL-FULL-NEXT: %3 = affine.apply [[MAP0]](%2)
|
||||
// UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
|
||||
for %j = 2 to 6 step 2 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %x) : (index, index) -> index
|
||||
}
|
||||
// CHECK: %5 = affine.apply [[MAP0]](%c2)
|
||||
// CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
|
||||
// CHECK-NEXT: %7 = affine.apply [[MAP1]](%c2)
|
||||
// CHECK-NEXT: %8 = affine.apply [[MAP0]](%7)
|
||||
// CHECK-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> index
|
||||
// CHECK-NEXT: %10 = affine.apply [[MAP3]](%c2)
|
||||
// CHECK-NEXT: %11 = affine.apply [[MAP0]](%10)
|
||||
// CHECK-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
|
||||
// UNROLL-FULL: %5 = affine.apply [[MAP0]](%c2)
|
||||
// UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %7 = affine.apply [[MAP1]](%c2)
|
||||
// UNROLL-FULL-NEXT: %8 = affine.apply [[MAP0]](%7)
|
||||
// UNROLL-FULL-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %10 = affine.apply [[MAP3]](%c2)
|
||||
// UNROLL-FULL-NEXT: %11 = affine.apply [[MAP0]](%10)
|
||||
// UNROLL-FULL-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
|
||||
for %k = 2 to 7 step 2 {
|
||||
%z = "affine.apply" (%k) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%w = "addi32"(%z, %z) : (index, index) -> index
|
||||
}
|
||||
} // CHECK: }
|
||||
return // CHECK: return
|
||||
} // CHECK }
|
||||
} // UNROLL-FULL: }
|
||||
return // UNROLL-FULL: return
|
||||
} // UNROLL-FULL }
|
||||
|
||||
// CHECK-LABEL: func @loop_nest_multiple_results() {
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_multiple_results() {
|
||||
func @loop_nest_multiple_results() {
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 {
|
||||
// UNROLL-FULL: %c0 = constant 0 : index
|
||||
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// CHECK: %0 = affine.apply [[MAP4]](%i0, %c0)
|
||||
// CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// CHECK-NEXT: %2 = affine.apply #map{{.*}}(%i0, %c0)
|
||||
// CHECK-NEXT: %3 = "fma"(%2, %0, %0) : (index, index, index) -> (index, index)
|
||||
// CHECK-NEXT: %4 = affine.apply #map{{.*}}(%c0)
|
||||
// CHECK-NEXT: %5 = affine.apply #map{{.*}}(%i0, %4)
|
||||
// CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
|
||||
// CHECK-NEXT: %7 = affine.apply #map{{.*}}(%i0, %4)
|
||||
// CHECK-NEXT: %8 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
|
||||
// UNROLL-FULL: %0 = affine.apply [[MAP4]](%i0, %c0)
|
||||
// UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %2 = affine.apply #map{{.*}}(%i0, %c0)
|
||||
// UNROLL-FULL-NEXT: %3 = "fma"(%2, %0, %0) : (index, index, index) -> (index, index)
|
||||
// UNROLL-FULL-NEXT: %4 = affine.apply #map{{.*}}(%c0)
|
||||
// UNROLL-FULL-NEXT: %5 = affine.apply #map{{.*}}(%i0, %4)
|
||||
// UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %7 = affine.apply #map{{.*}}(%i0, %4)
|
||||
// UNROLL-FULL-NEXT: %8 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
|
||||
for %j = 0 to 2 step 1 {
|
||||
%x = affine.apply (d0, d1) -> (d0 + 1) (%i, %j)
|
||||
%y = "addi32"(%x, %x) : (index, index) -> index
|
||||
%z = affine.apply (d0, d1) -> (d0 + 3) (%i, %j)
|
||||
%w = "fma"(%z, %x, %x) : (index, index, index) -> (index, index)
|
||||
}
|
||||
} // CHECK: }
|
||||
return // CHECK: return
|
||||
} // CHECK }
|
||||
} // UNROLL-FULL: }
|
||||
return // UNROLL-FULL: return
|
||||
} // UNROLL-FULL }
|
||||
|
||||
|
||||
// Imperfect loop nest. Unrolling innermost here yields a perfect nest.
|
||||
// CHECK-LABEL: func @loop_nest_seq_imperfect(%arg0: memref<128x128xf32>) {
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_seq_imperfect(%arg0: memref<128x128xf32>) {
|
||||
func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: %c128 = constant 128 : index
|
||||
// UNROLL-FULL: %c0 = constant 0 : index
|
||||
// UNROLL-FULL-NEXT: %c128 = constant 128 : index
|
||||
%c128 = constant 128 : index
|
||||
// CHECK: for %i0 = 0 to 100 {
|
||||
// UNROLL-FULL: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// CHECK: %0 = "vld"(%i0) : (index) -> i32
|
||||
// UNROLL-FULL: %0 = "vld"(%i0) : (index) -> i32
|
||||
%ld = "vld"(%i) : (index) -> i32
|
||||
// CHECK: %1 = affine.apply [[MAP0]](%c0)
|
||||
// CHECK-NEXT: %2 = "vmulf"(%c0, %1) : (index, index) -> index
|
||||
// CHECK-NEXT: %3 = "vaddf"(%2, %2) : (index, index) -> index
|
||||
// CHECK-NEXT: %4 = affine.apply [[MAP0]](%c0)
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP0]](%4)
|
||||
// CHECK-NEXT: %6 = "vmulf"(%4, %5) : (index, index) -> index
|
||||
// CHECK-NEXT: %7 = "vaddf"(%6, %6) : (index, index) -> index
|
||||
// CHECK-NEXT: %8 = affine.apply [[MAP1]](%c0)
|
||||
// CHECK-NEXT: %9 = affine.apply [[MAP0]](%8)
|
||||
// CHECK-NEXT: %10 = "vmulf"(%8, %9) : (index, index) -> index
|
||||
// CHECK-NEXT: %11 = "vaddf"(%10, %10) : (index, index) -> index
|
||||
// CHECK-NEXT: %12 = affine.apply [[MAP2]](%c0)
|
||||
// CHECK-NEXT: %13 = affine.apply [[MAP0]](%12)
|
||||
// CHECK-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
|
||||
// CHECK-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
|
||||
// UNROLL-FULL: %1 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-FULL-NEXT: %2 = "vmulf"(%c0, %1) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %3 = "vaddf"(%2, %2) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %4 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-FULL-NEXT: %5 = affine.apply [[MAP0]](%4)
|
||||
// UNROLL-FULL-NEXT: %6 = "vmulf"(%4, %5) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %7 = "vaddf"(%6, %6) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %8 = affine.apply [[MAP1]](%c0)
|
||||
// UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%8)
|
||||
// UNROLL-FULL-NEXT: %10 = "vmulf"(%8, %9) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %11 = "vaddf"(%10, %10) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %12 = affine.apply [[MAP2]](%c0)
|
||||
// UNROLL-FULL-NEXT: %13 = affine.apply [[MAP0]](%12)
|
||||
// UNROLL-FULL-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
|
||||
for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "vmulf"(%j, %x) : (index, index) -> index
|
||||
%z = "vaddf"(%y, %y) : (index, index) -> index
|
||||
}
|
||||
// CHECK: %16 = "scale"(%c128, %i0) : (index, index) -> index
|
||||
// UNROLL-FULL: %16 = "scale"(%c128, %i0) : (index, index) -> index
|
||||
%addr = "scale"(%c128, %i) : (index, index) -> index
|
||||
// CHECK: "vst"(%16, %i0) : (index, index) -> ()
|
||||
// UNROLL-FULL: "vst"(%16, %i0) : (index, index) -> ()
|
||||
"vst"(%addr, %i) : (index, index) -> ()
|
||||
} // CHECK }
|
||||
return // CHECK: return
|
||||
} // UNROLL-FULL }
|
||||
return // UNROLL-FULL: return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @loop_nest_seq_multiple() {
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_seq_multiple() {
|
||||
func @loop_nest_seq_multiple() {
|
||||
// CHECK: c0 = constant 0 : index
|
||||
// CHECK-NEXT: %c0_0 = constant 0 : index
|
||||
// CHECK-NEXT: %0 = affine.apply [[MAP0]](%c0_0)
|
||||
// CHECK-NEXT: "mul"(%0, %0) : (index, index) -> ()
|
||||
// CHECK-NEXT: %1 = affine.apply [[MAP0]](%c0_0)
|
||||
// CHECK-NEXT: %2 = affine.apply [[MAP0]](%1)
|
||||
// CHECK-NEXT: "mul"(%2, %2) : (index, index) -> ()
|
||||
// CHECK-NEXT: %3 = affine.apply [[MAP1]](%c0_0)
|
||||
// CHECK-NEXT: %4 = affine.apply [[MAP0]](%3)
|
||||
// CHECK-NEXT: "mul"(%4, %4) : (index, index) -> ()
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP2]](%c0_0)
|
||||
// CHECK-NEXT: %6 = affine.apply [[MAP0]](%5)
|
||||
// CHECK-NEXT: "mul"(%6, %6) : (index, index) -> ()
|
||||
// UNROLL-FULL: c0 = constant 0 : index
|
||||
// UNROLL-FULL-NEXT: %c0_0 = constant 0 : index
|
||||
// UNROLL-FULL-NEXT: %0 = affine.apply [[MAP0]](%c0_0)
|
||||
// UNROLL-FULL-NEXT: "mul"(%0, %0) : (index, index) -> ()
|
||||
// UNROLL-FULL-NEXT: %1 = affine.apply [[MAP0]](%c0_0)
|
||||
// UNROLL-FULL-NEXT: %2 = affine.apply [[MAP0]](%1)
|
||||
// UNROLL-FULL-NEXT: "mul"(%2, %2) : (index, index) -> ()
|
||||
// UNROLL-FULL-NEXT: %3 = affine.apply [[MAP1]](%c0_0)
|
||||
// UNROLL-FULL-NEXT: %4 = affine.apply [[MAP0]](%3)
|
||||
// UNROLL-FULL-NEXT: "mul"(%4, %4) : (index, index) -> ()
|
||||
// UNROLL-FULL-NEXT: %5 = affine.apply [[MAP2]](%c0_0)
|
||||
// UNROLL-FULL-NEXT: %6 = affine.apply [[MAP0]](%5)
|
||||
// UNROLL-FULL-NEXT: "mul"(%6, %6) : (index, index) -> ()
|
||||
for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
"mul"(%x, %x) : (index, index) -> ()
|
||||
}
|
||||
|
||||
// CHECK: %c99 = constant 99 : index
|
||||
// UNROLL-FULL: %c99 = constant 99 : index
|
||||
%k = constant 99 : index
|
||||
// CHECK: for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-FULL: for %i0 = 0 to 100 step 2 {
|
||||
for %m = 0 to 100 step 2 {
|
||||
// CHECK: %7 = affine.apply [[MAP0]](%c0)
|
||||
// CHECK-NEXT: %8 = affine.apply [[MAP6]](%c0)[%c99]
|
||||
// CHECK-NEXT: %9 = affine.apply [[MAP0]](%c0)
|
||||
// CHECK-NEXT: %10 = affine.apply [[MAP0]](%9)
|
||||
// CHECK-NEXT: %11 = affine.apply [[MAP6]](%9)[%c99]
|
||||
// CHECK-NEXT: %12 = affine.apply [[MAP1]](%c0)
|
||||
// CHECK-NEXT: %13 = affine.apply [[MAP0]](%12)
|
||||
// CHECK-NEXT: %14 = affine.apply [[MAP6]](%12)[%c99]
|
||||
// CHECK-NEXT: %15 = affine.apply [[MAP2]](%c0)
|
||||
// CHECK-NEXT: %16 = affine.apply [[MAP0]](%15)
|
||||
// CHECK-NEXT: %17 = affine.apply [[MAP6]](%15)[%c99]
|
||||
// UNROLL-FULL: %7 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-FULL-NEXT: %8 = affine.apply [[MAP6]](%c0)[%c99]
|
||||
// UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-FULL-NEXT: %10 = affine.apply [[MAP0]](%9)
|
||||
// UNROLL-FULL-NEXT: %11 = affine.apply [[MAP6]](%9)[%c99]
|
||||
// UNROLL-FULL-NEXT: %12 = affine.apply [[MAP1]](%c0)
|
||||
// UNROLL-FULL-NEXT: %13 = affine.apply [[MAP0]](%12)
|
||||
// UNROLL-FULL-NEXT: %14 = affine.apply [[MAP6]](%12)[%c99]
|
||||
// UNROLL-FULL-NEXT: %15 = affine.apply [[MAP2]](%c0)
|
||||
// UNROLL-FULL-NEXT: %16 = affine.apply [[MAP0]](%15)
|
||||
// UNROLL-FULL-NEXT: %17 = affine.apply [[MAP6]](%15)[%c99]
|
||||
for %n = 0 to 4 {
|
||||
%y = "affine.apply" (%n) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%z = "affine.apply" (%n, %k) { map: (d0) [s0] -> (d0 + s0 + 1) } :
|
||||
(index, index) -> (index)
|
||||
} // CHECK }
|
||||
} // CHECK }
|
||||
return // CHECK: return
|
||||
} // CHECK }
|
||||
} // UNROLL-FULL }
|
||||
} // UNROLL-FULL }
|
||||
return // UNROLL-FULL: return
|
||||
} // UNROLL-FULL }
|
||||
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_unroll_full() {
|
||||
func @loop_nest_unroll_full() {
|
||||
// UNROLL-FULL-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-FULL-NEXT: %1 = "bar"() : () -> i32
|
||||
// UNROLL-FULL-NEXT: return
|
||||
for %i = 0 to 1 {
|
||||
%x = "foo"() : () -> i32
|
||||
%y = "bar"() : () -> i32
|
||||
}
|
||||
return
|
||||
} // UNROLL-FULL }
|
||||
|
||||
// SHORT-LABEL: func @loop_nest_outer_unroll() {
|
||||
func @loop_nest_outer_unroll() {
|
||||
|
@ -269,8 +260,8 @@ func @loop_nest_outer_unroll() {
|
|||
return // SHORT: return
|
||||
} // SHORT }
|
||||
|
||||
// We aren't doing any file check here. We just need this test case to
|
||||
// successfully run. Both %i0 and i1 will get unrolled here with the min trip
|
||||
// We are doing a minimal FileCheck here. We just need this test case to
|
||||
// successfully run. Both %x and %y will get unrolled here as the min trip
|
||||
// count threshold set to 2.
|
||||
// SHORT-LABEL: func @loop_nest_seq_long() -> i32 {
|
||||
func @loop_nest_seq_long() -> i32 {
|
||||
|
@ -284,7 +275,9 @@ func @loop_nest_seq_long() -> i32 {
|
|||
|
||||
%zero_idx = constant 0 : index
|
||||
|
||||
// CHECK: for %i0 = 0 to 512
|
||||
for %n0 = 0 to 512 {
|
||||
// CHECK: for %i1 = 0 to 8
|
||||
for %n1 = 0 to 8 {
|
||||
store %one, %A[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
store %two, %B[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
|
@ -292,22 +285,25 @@ func @loop_nest_seq_long() -> i32 {
|
|||
}
|
||||
}
|
||||
|
||||
for %i0 = 0 to 2 {
|
||||
for %i1 = 0 to 2 {
|
||||
for %x = 0 to 2 {
|
||||
for %y = 0 to 2 {
|
||||
// CHECK: for %i2
|
||||
for %i2 = 0 to 8 {
|
||||
%b2 = "affine.apply" (%i1, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%x = load %B[%i0, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
"op1"(%x) : (i32) -> ()
|
||||
// CHECK-NOT: for %i3
|
||||
// CHECK: %{{[0-9]+}} = affine.apply
|
||||
%b2 = "affine.apply" (%y, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%z = load %B[%x, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
"op1"(%z) : (i32) -> ()
|
||||
}
|
||||
for %j1 = 0 to 8 {
|
||||
for %j2 = 0 to 8 {
|
||||
%a2 = "affine.apply" (%i1, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%a2 = "affine.apply" (%y, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
"op2"(%v203) : (i32) -> ()
|
||||
}
|
||||
for %k2 = 0 to 8 {
|
||||
%s0 = "op3"() : () -> i32
|
||||
%c2 = "affine.apply" (%i0, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%c2 = "affine.apply" (%x, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%s1 = load %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
%s2 = "addi32"(%s0, %s1) : (i32, i32) -> i32
|
||||
store %s2, %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
|
@ -353,22 +349,22 @@ func @unroll_unit_stride_no_cleanup() {
|
|||
func @unroll_unit_stride_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 7 step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]+}}([[L1]])
|
||||
// UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]+}}([[L1]])
|
||||
// UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]+}}([[L1]])
|
||||
// UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]+}}([[L1]])
|
||||
// UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]+}}([[L1]])
|
||||
// UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]+}}([[L1]])
|
||||
// UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: for [[L2:%i[0-9]+]] = 8 to 10 {
|
||||
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
for %j = 0 to 10 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
|
@ -382,7 +378,7 @@ func @unroll_unit_stride_cleanup() {
|
|||
func @unroll_non_unit_stride_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 37 step 20 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 42 step 20 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]+}}([[L1]])
|
||||
|
@ -408,6 +404,7 @@ func @unroll_non_unit_stride_cleanup() {
|
|||
}
|
||||
|
||||
// Both the unrolled loop and the cleanup loop are single iteration loops.
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_single_iteration_after_unroll
|
||||
func @loop_nest_single_iteration_after_unroll(%N: index) {
|
||||
// UNROLL-BY-4: %c0 = constant 0 : index
|
||||
// UNROLL-BY-4: %c4 = constant 4 : index
|
||||
|
@ -435,7 +432,7 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
|
|||
// UNROLL-BY-4-LABEL: func @loop_nest_operand1() {
|
||||
func @loop_nest_operand1() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4-NEXT: for %i1 = [[MAP10]](%i0) to #map{{[0-9]+}}(%i0) step 4
|
||||
// UNROLL-BY-4-NEXT: for %i1 = 0 to #map{{[0-9]+}}(%i0) step 4
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
|
@ -444,7 +441,7 @@ func @loop_nest_operand1() {
|
|||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: return
|
||||
for %i = 0 to 100 step 2 {
|
||||
for %j = (d0) -> (0) (%i) to (d0) -> (d0 - d0 mod 4) (%i) {
|
||||
for %j = 0 to (d0) -> (d0 - d0 mod 4) (%i) {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
|
@ -491,11 +488,11 @@ func @loop_nest_operand3() {
|
|||
return
|
||||
}
|
||||
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_operand4(%arg0: index) {
|
||||
func @loop_nest_operand4(%N : index) {
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound(%arg0: index) {
|
||||
func @loop_nest_symbolic_bound(%N : index) {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for %i1 = [[MAP12]]()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4: for %i1 = 0 to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
|
@ -505,25 +502,56 @@ func @loop_nest_operand4(%N : index) {
|
|||
// UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
|
||||
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4_NEXT: }
|
||||
// Specify the lower bound so that both lb and ub operands match.
|
||||
for %j = ()[s0] -> (0)()[%N] to %N {
|
||||
for %j = 0 to %N {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @loop_nest_unroll_full() {
|
||||
func @loop_nest_unroll_full() {
|
||||
// CHECK-NEXT: %0 = "foo"() : () -> i32
|
||||
// CHECK-NEXT: %1 = "bar"() : () -> i32
|
||||
// CHECK-NEXT: return
|
||||
for %i = 0 to 1 {
|
||||
%x = "foo"() : () -> i32
|
||||
%y = "bar"() : () -> i32
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound
|
||||
func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
|
||||
for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
|
||||
"foo"() : () -> ()
|
||||
}
|
||||
return
|
||||
} // CHECK }
|
||||
}
|
||||
// CHECK-NEXT: for %i0 = %arg0 to min [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
|
||||
// CHECK-NEXT: "foo"() : () -> ()
|
||||
// CHECK-NEXT: "foo"() : () -> ()
|
||||
// CHECK-NEXT: "foo"() : () -> ()
|
||||
// CHECK-NEXT: "foo"() : () -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i1 = max [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
|
||||
// CHECK-NEXT: "foo"() : () -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
|
||||
// The trip count here is a multiple of four, but this can be inferred only
|
||||
// through composition. Check for no cleanup loop.
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor
|
||||
func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
|
||||
%T = affine.apply (d0) -> (4*d0 + 1)(%M)
|
||||
%K = affine.apply (d0) -> (d0 - 1) (%T)
|
||||
for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
|
||||
"foo"() : () -> ()
|
||||
}
|
||||
return
|
||||
}
|
||||
// UNROLL-BY-4: for %i0 = 0 to min
|
||||
// UNROLL-BY-4-NOT: for
|
||||
// UNROLL-BY-4: return
|
||||
|
||||
// Commented due to b/128340045
|
||||
// xUNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor
|
||||
// func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
|
||||
// %K = affine.apply (d0) -> (4*d0) (%M)
|
||||
// for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
|
||||
// "foo"() : () -> ()
|
||||
// }
|
||||
// return
|
||||
//}
|
||||
|
||||
|
||||
// UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
|
||||
func @unroll_by_one_should_promote_single_iteration_loop() {
|
||||
|
|
Loading…
Reference in New Issue