LoopFusion: adds support for computing forward computation slices, which will enable fusion of consumer loop nests into their producers in subsequent CLs.

PiperOrigin-RevId: 253601994
This commit is contained in:
Andy Davis 2019-06-17 09:59:35 -07:00 committed by Mehdi Amini
parent a14eeacf2c
commit 898cf0e968
8 changed files with 515 additions and 164 deletions

View File

@ -393,12 +393,12 @@ public:
bool lower = true); bool lower = true);
/// Computes the lower and upper bounds of the first 'num' dimensional /// Computes the lower and upper bounds of the first 'num' dimensional
/// identifiers as an affine map of the remaining identifiers (dimensional and /// identifiers (starting at 'offset') as an affine map of the remaining
/// symbolic). This method is able to detect identifiers as floordiv's /// identifiers (dimensional and symbolic). This method is able to detect
/// and mod's of affine expressions of other identifiers with respect to /// identifiers as floordiv's and mod's of affine expressions of other
/// (positive) constants. Sets bound map to a null AffineMap if such a bound /// identifiers with respect to (positive) constants. Sets bound map to a
/// can't be found (or yet unimplemented). /// null AffineMap if such a bound can't be found (or yet unimplemented).
void getSliceBounds(unsigned num, MLIRContext *context, void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context,
SmallVectorImpl<AffineMap> *lbMaps, SmallVectorImpl<AffineMap> *lbMaps,
SmallVectorImpl<AffineMap> *ubMaps); SmallVectorImpl<AffineMap> *ubMaps);
@ -648,13 +648,14 @@ public:
Optional<int64_t> getConstantUpperBound(unsigned pos) const; Optional<int64_t> getConstantUpperBound(unsigned pos) const;
/// Gets the lower and upper bound of the pos^th identifier treating /// Gets the lower and upper bound of the pos^th identifier treating
/// [dimStartPos, symbStartPos) as dimensions and [symStartPos, /// [0, offset) U [offset + num, symbStartPos) as dimensions and
/// getNumDimAndSymbolIds) as symbols. The returned multi-dimensional maps /// [symStartPos, getNumDimAndSymbolIds) as symbols. The returned
/// in the pair represent the max and min of potentially multiple affine /// multi-dimensional maps in the pair represent the max and min of
/// expressions. The upper bound is exclusive. 'localExprs' holds pre-computed /// potentially multiple affine expressions. The upper bound is exclusive.
/// AffineExpr's for all local identifiers in the system. /// 'localExprs' holds pre-computed AffineExpr's for all local identifiers in
/// the system.
std::pair<AffineMap, AffineMap> std::pair<AffineMap, AffineMap>
getLowerAndUpperBound(unsigned pos, unsigned dimStartPos, getLowerAndUpperBound(unsigned pos, unsigned offset, unsigned num,
unsigned symStartPos, ArrayRef<AffineExpr> localExprs, unsigned symStartPos, ArrayRef<AffineExpr> localExprs,
MLIRContext *context); MLIRContext *context);

View File

@ -73,6 +73,8 @@ struct ComputationSliceState {
std::vector<SmallVector<Value *, 4>> lbOperands; std::vector<SmallVector<Value *, 4>> lbOperands;
// List of upper bound operands (ubOperands[i] are used by 'ubs[i]'). // List of upper bound operands (ubOperands[i] are used by 'ubs[i]').
std::vector<SmallVector<Value *, 4>> ubOperands; std::vector<SmallVector<Value *, 4>> ubOperands;
// Slice loop nest insertion point in target loop nest.
Block::iterator insertPoint;
// Adds to 'cst' with constraints which represent the slice bounds on 'ivs' // Adds to 'cst' with constraints which represent the slice bounds on 'ivs'
// in 'this'. Specifically, the values in 'ivs' are added to 'cst' as dim // in 'this'. Specifically, the values in 'ivs' are added to 'cst' as dim
// identifiers and the values in 'lb/ubOperands' are added as symbols. // identifiers and the values in 'lb/ubOperands' are added as symbols.
@ -85,19 +87,67 @@ struct ComputationSliceState {
void clearBounds(); void clearBounds();
}; };
/// Computes computation slice loop bounds for the loop nest surrounding /// Computes the computation slice loop bounds for one loop nest as affine maps
/// 'srcAccess', where the returned loop bound AffineMaps are functions of /// of the other loop nest's IVs and symbols, using 'dependenceConstraints'
/// loop IVs from the loop nest surrounding 'dstAccess'. /// computed between 'depSourceAccess' and 'depSinkAccess'.
LogicalResult getBackwardComputationSliceState( /// If 'isBackwardSlice' is true, a backwards slice is computed in which the
const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, /// slice bounds of loop nest surrounding 'depSourceAccess' are computed in
unsigned dstLoopDepth, ComputationSliceState *sliceState); /// terms of loop IVs and symbols of the loop nest surrounding 'depSinkAccess'
/// at 'loopDepth'.
/// If 'isBackwardSlice' is false, a forward slice is computed in which the
/// slice bounds of loop nest surrounding 'depSinkAccess' are computed in terms
/// of loop IVs and symbols of the loop nest surrounding 'depSourceAccess' at
/// 'loopDepth'.
/// The slice loop bounds and associated operands are returned in 'sliceState'.
//
// Backward slice example:
//
// affine.for %i0 = 0 to 10 {
// store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess'
// }
// affine.for %i1 = 0 to 10 {
// %v = load %0[%i1] : memref<100xf32> // 'depSinkAccess'
// }
//
// // Backward computation slice of loop nest '%i0'.
// affine.for %i0 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 1)(%i1) {
// store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess'
// }
//
// Forward slice example:
//
// affine.for %i0 = 0 to 10 {
// store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess'
// }
// affine.for %i1 = 0 to 10 {
// %v = load %0[%i1] : memref<100xf32> // 'depSinkAccess'
// }
//
// // Forward computation slice of loop nest '%i1'.
// affine.for %i1 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 1)(%i0) {
// %v = load %0[%i1] : memref<100xf32> // 'depSinkAccess'
// }
//
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp,
FlatAffineConstraints *dependenceConstraints,
unsigned loopDepth, bool isBackwardSlice,
ComputationSliceState *sliceState);
/// Computes in 'sliceUnion' the union of all slice bounds computed at /// Computes in 'sliceUnion' the union of all slice bounds computed at
/// 'dstLoopDepth' between all pairs in 'srcOps' and 'dstOp' which access the /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB'.
/// same memref. Returns 'success' if union was computed, 'failure' otherwise. /// The parameter 'numCommonLoops' is the number of loops common to the
LogicalResult computeSliceUnion(ArrayRef<Operation *> srcOps, /// operations in 'opsA' and 'opsB'.
ArrayRef<Operation *> dstOps, /// If 'isBackwardSlice' is true, computes slice bounds for loop nest
unsigned dstLoopDepth, /// surrounding ops in 'opsA', as a function of IVs and symbols of loop nest
/// surrounding ops in 'opsB' at 'loopDepth'.
/// If 'isBackwardSlice' is false, computes slice bounds for loop nest
/// surrounding ops in 'opsB', as a function of IVs and symbols of loop nest
/// surrounding ops in 'opsA' at 'loopDepth'.
/// Returns 'success' if union was computed, 'failure' otherwise.
// TODO(andydavis) Change this API to take 'forOpA'/'forOpB'.
LogicalResult computeSliceUnion(ArrayRef<Operation *> opsA,
ArrayRef<Operation *> opsB, unsigned loopDepth,
unsigned numCommonLoops, bool isBackwardSlice,
ComputationSliceState *sliceUnion); ComputationSliceState *sliceUnion);
/// Creates a clone of the computation contained in the loop nest surrounding /// Creates a clone of the computation contained in the loop nest surrounding

View File

@ -1423,19 +1423,28 @@ void FlatAffineConstraints::removeRedundantInequalities() {
} }
std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound( std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
unsigned pos, unsigned dimStartPos, unsigned symStartPos, unsigned pos, unsigned offset, unsigned num, unsigned symStartPos,
ArrayRef<AffineExpr> localExprs, MLIRContext *context) { ArrayRef<AffineExpr> localExprs, MLIRContext *context) {
assert(pos < dimStartPos && "invalid dim start pos"); assert(pos + offset < getNumDimIds() && "invalid dim start pos");
assert(symStartPos >= dimStartPos && "invalid sym start pos"); assert(symStartPos >= (pos + offset) && "invalid sym start pos");
assert(getNumLocalIds() == localExprs.size() && assert(getNumLocalIds() == localExprs.size() &&
"incorrect local exprs count"); "incorrect local exprs count");
SmallVector<unsigned, 4> lbIndices, ubIndices; SmallVector<unsigned, 4> lbIndices, ubIndices;
getLowerAndUpperBoundIndices(*this, pos, &lbIndices, &ubIndices); getLowerAndUpperBoundIndices(*this, pos + offset, &lbIndices, &ubIndices);
/// Add to 'b' from 'a' in set [0, offset) U [offset + num, symbStartPos).
auto addCoeffs = [&](ArrayRef<int64_t> a, SmallVectorImpl<int64_t> &b) {
b.clear();
for (unsigned i = 0, e = a.size(); i < e; ++i) {
if (i < offset || i >= offset + num)
b.push_back(a[i]);
}
};
SmallVector<int64_t, 8> lb, ub; SmallVector<int64_t, 8> lb, ub;
SmallVector<AffineExpr, 4> exprs; SmallVector<AffineExpr, 4> exprs;
unsigned dimCount = symStartPos - dimStartPos; unsigned dimCount = symStartPos - num;
unsigned symCount = getNumDimAndSymbolIds() - symStartPos; unsigned symCount = getNumDimAndSymbolIds() - symStartPos;
exprs.reserve(lbIndices.size()); exprs.reserve(lbIndices.size());
// Lower bound expressions. // Lower bound expressions.
@ -1444,7 +1453,7 @@ std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
// Extract the lower bound (in terms of other coeff's + const), i.e., if // Extract the lower bound (in terms of other coeff's + const), i.e., if
// i - j + 1 >= 0 is the constraint, 'pos' is for i the lower bound is j // i - j + 1 >= 0 is the constraint, 'pos' is for i the lower bound is j
// - 1. // - 1.
lb.assign(ineq.begin() + dimStartPos, ineq.end()); addCoeffs(ineq, lb);
std::transform(lb.begin(), lb.end(), lb.begin(), std::negate<int64_t>()); std::transform(lb.begin(), lb.end(), lb.begin(), std::negate<int64_t>());
auto expr = mlir::toAffineExpr(lb, dimCount, symCount, localExprs, context); auto expr = mlir::toAffineExpr(lb, dimCount, symCount, localExprs, context);
exprs.push_back(expr); exprs.push_back(expr);
@ -1458,7 +1467,7 @@ std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
for (auto idx : ubIndices) { for (auto idx : ubIndices) {
auto ineq = getInequality(idx); auto ineq = getInequality(idx);
// Extract the upper bound (in terms of other coeff's + const). // Extract the upper bound (in terms of other coeff's + const).
ub.assign(ineq.begin() + dimStartPos, ineq.end()); addCoeffs(ineq, ub);
auto expr = mlir::toAffineExpr(ub, dimCount, symCount, localExprs, context); auto expr = mlir::toAffineExpr(ub, dimCount, symCount, localExprs, context);
// Upper bound is exclusive. // Upper bound is exclusive.
exprs.push_back(expr + 1); exprs.push_back(expr + 1);
@ -1470,10 +1479,12 @@ std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
} }
/// Computes the lower and upper bounds of the first 'num' dimensional /// Computes the lower and upper bounds of the first 'num' dimensional
/// identifiers as affine maps of the remaining identifiers (dimensional and /// identifiers (starting at 'offset') as affine maps of the remaining
/// symbolic identifiers). Local identifiers are themselves explicitly computed /// identifiers (dimensional and symbolic identifiers). Local identifiers are
/// as affine functions of other identifiers in this process if needed. /// themselves explicitly computed as affine functions of other identifiers in
void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context, /// this process if needed.
void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num,
MLIRContext *context,
SmallVectorImpl<AffineMap> *lbMaps, SmallVectorImpl<AffineMap> *lbMaps,
SmallVectorImpl<AffineMap> *ubMaps) { SmallVectorImpl<AffineMap> *ubMaps) {
assert(num < getNumDimIds() && "invalid range"); assert(num < getNumDimIds() && "invalid range");
@ -1488,8 +1499,12 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
// Record computed/detected identifiers. // Record computed/detected identifiers.
SmallVector<AffineExpr, 8> memo(getNumIds()); SmallVector<AffineExpr, 8> memo(getNumIds());
// Initialize dimensional and symbolic identifiers. // Initialize dimensional and symbolic identifiers.
for (unsigned i = num, e = getNumDimIds(); i < e; i++) for (unsigned i = 0, e = getNumDimIds(); i < e; i++) {
memo[i] = getAffineDimExpr(i - num, context); if (i < offset)
memo[i] = getAffineDimExpr(i, context);
else if (i >= offset + num)
memo[i] = getAffineDimExpr(i - num, context);
}
for (unsigned i = getNumDimIds(), e = getNumDimAndSymbolIds(); i < e; i++) for (unsigned i = getNumDimIds(), e = getNumDimAndSymbolIds(); i < e; i++)
memo[i] = getAffineSymbolExpr(i - getNumDimIds(), context); memo[i] = getAffineSymbolExpr(i - getNumDimIds(), context);
@ -1578,7 +1593,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
for (unsigned pos = 0; pos < num; pos++) { for (unsigned pos = 0; pos < num; pos++) {
unsigned numMapDims = getNumDimIds() - num; unsigned numMapDims = getNumDimIds() - num;
unsigned numMapSymbols = getNumSymbolIds(); unsigned numMapSymbols = getNumSymbolIds();
AffineExpr expr = memo[pos]; AffineExpr expr = memo[pos + offset];
if (expr) if (expr)
expr = simplifyAffineExpr(expr, numMapDims, numMapSymbols); expr = simplifyAffineExpr(expr, numMapDims, numMapSymbols);
@ -1601,7 +1616,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
tmpClone->removeRedundantInequalities(); tmpClone->removeRedundantInequalities();
} }
std::tie(lbMap, ubMap) = tmpClone->getLowerAndUpperBound( std::tie(lbMap, ubMap) = tmpClone->getLowerAndUpperBound(
pos, num, getNumDimIds(), {}, context); pos, offset, num, getNumDimIds(), {}, context);
} }
// If the above fails, we'll just use the constant lower bound and the // If the above fails, we'll just use the constant lower bound and the
@ -1612,7 +1627,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
if (!lbMap || lbMap.getNumResults() > 1) { if (!lbMap || lbMap.getNumResults() > 1) {
LLVM_DEBUG(llvm::dbgs() LLVM_DEBUG(llvm::dbgs()
<< "WARNING: Potentially over-approximating slice lb\n"); << "WARNING: Potentially over-approximating slice lb\n");
auto lbConst = getConstantLowerBound(pos); auto lbConst = getConstantLowerBound(pos + offset);
if (lbConst.hasValue()) { if (lbConst.hasValue()) {
lbMap = AffineMap::get( lbMap = AffineMap::get(
numMapDims, numMapSymbols, numMapDims, numMapSymbols,
@ -1622,7 +1637,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
if (!ubMap || ubMap.getNumResults() > 1) { if (!ubMap || ubMap.getNumResults() > 1) {
LLVM_DEBUG(llvm::dbgs() LLVM_DEBUG(llvm::dbgs()
<< "WARNING: Potentially over-approximating slice ub\n"); << "WARNING: Potentially over-approximating slice ub\n");
auto ubConst = getConstantUpperBound(pos); auto ubConst = getConstantUpperBound(pos + offset);
if (ubConst.hasValue()) { if (ubConst.hasValue()) {
(ubMap) = AffineMap::get( (ubMap) = AffineMap::get(
numMapDims, numMapSymbols, numMapDims, numMapSymbols,
@ -1630,9 +1645,11 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
} }
} }
} }
LLVM_DEBUG(llvm::dbgs() << "lb map for pos = " << Twine(pos) << ", expr: "); LLVM_DEBUG(llvm::dbgs()
<< "lb map for pos = " << Twine(pos + offset) << ", expr: ");
LLVM_DEBUG(lbMap.dump();); LLVM_DEBUG(lbMap.dump(););
LLVM_DEBUG(llvm::dbgs() << "ub map for pos = " << Twine(pos) << ", expr: "); LLVM_DEBUG(llvm::dbgs()
<< "ub map for pos = " << Twine(pos + offset) << ", expr: ");
LLVM_DEBUG(ubMap.dump();); LLVM_DEBUG(ubMap.dump(););
} }
} }

View File

@ -504,48 +504,84 @@ LogicalResult addMissingLoopIVBounds(SmallPtrSet<Value *, 8> &ivs,
return success(); return success();
} }
/// Computes in 'sliceUnion' the union of all slice bounds computed at // Returns the innermost common loop depth for the set of operations in 'ops'.
/// 'dstLoopDepth' between all pairs in 'srcOps' and 'dstOp' which access the // TODO(andydavis) Move this to LoopUtils.
/// same memref. Returns 'Success' if union was computed, 'failure' otherwise. static unsigned
LogicalResult mlir::computeSliceUnion(ArrayRef<Operation *> srcOps, getInnermostCommonLoopDepth(ArrayRef<Operation *> ops,
ArrayRef<Operation *> dstOps, SmallVectorImpl<AffineForOp> &surroundingLoops) {
unsigned dstLoopDepth, unsigned numOps = ops.size();
ComputationSliceState *sliceUnion) { assert(numOps > 0);
unsigned numSrcOps = srcOps.size();
unsigned numDstOps = dstOps.size();
assert(numSrcOps > 0 && numDstOps > 0);
// Compute the intersection of 'srcMemrefToOps' and 'dstMemrefToOps'. std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
llvm::SmallDenseSet<Value *> memrefIntersection; unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
for (auto *srcOp : srcOps) { for (unsigned i = 0; i < numOps; ++i) {
auto *srcMemRef = getLoadOrStoreMemRef(srcOp); getLoopIVs(*ops[i], &loops[i]);
for (auto *dstOp : dstOps) { loopDepthLimit =
if (srcMemRef == getLoadOrStoreMemRef(dstOp)) std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
memrefIntersection.insert(srcMemRef);
}
} }
// Return failure if 'memrefIntersection' is empty.
if (memrefIntersection.empty())
return failure();
// Compute the union of slice bounds between all pairs in 'srcOps' and unsigned loopDepth = 0;
// 'dstOps' in 'sliceUnionCst'. for (unsigned d = 0; d < loopDepthLimit; ++d) {
unsigned i;
for (i = 1; i < numOps; ++i) {
if (loops[i - 1][d] != loops[i][d])
return loopDepth;
}
surroundingLoops.push_back(loops[i - 1][d]);
++loopDepth;
}
return loopDepth;
}
/// Computes in 'sliceUnion' the union of all slice bounds computed at
/// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB'.
/// Returns 'Success' if union was computed, 'failure' otherwise.
LogicalResult mlir::computeSliceUnion(ArrayRef<Operation *> opsA,
ArrayRef<Operation *> opsB,
unsigned loopDepth,
unsigned numCommonLoops,
bool isBackwardSlice,
ComputationSliceState *sliceUnion) {
// Compute the union of slice bounds between all pairs in 'opsA' and
// 'opsB' in 'sliceUnionCst'.
FlatAffineConstraints sliceUnionCst; FlatAffineConstraints sliceUnionCst;
assert(sliceUnionCst.getNumDimAndSymbolIds() == 0); assert(sliceUnionCst.getNumDimAndSymbolIds() == 0);
for (unsigned i = 0; i < numSrcOps; ++i) { std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
MemRefAccess srcAccess(srcOps[i]); for (unsigned i = 0, numOpsA = opsA.size(); i < numOpsA; ++i) {
for (unsigned j = 0; j < numDstOps; ++j) { MemRefAccess srcAccess(opsA[i]);
MemRefAccess dstAccess(dstOps[j]); for (unsigned j = 0, numOpsB = opsB.size(); j < numOpsB; ++j) {
MemRefAccess dstAccess(opsB[j]);
if (srcAccess.memref != dstAccess.memref) if (srcAccess.memref != dstAccess.memref)
continue; continue;
// Compute slice bounds for 'srcAccess' and 'dstAccess'. // Check if 'loopDepth' exceeds nesting depth of src/dst ops.
ComputationSliceState tmpSliceState; if ((!isBackwardSlice && loopDepth > getNestingDepth(*opsA[i])) ||
if (failed(mlir::getBackwardComputationSliceState( (isBackwardSlice && loopDepth > getNestingDepth(*opsB[j]))) {
srcAccess, dstAccess, dstLoopDepth, &tmpSliceState))) { LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n.");
LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice bounds\n.");
return failure(); return failure();
} }
bool readReadAccesses =
isa<LoadOp>(srcAccess.opInst) && isa<LoadOp>(dstAccess.opInst);
FlatAffineConstraints dependenceConstraints;
// Check dependence between 'srcAccess' and 'dstAccess'.
DependenceResult result = checkMemrefAccessDependence(
srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1,
&dependenceConstraints, /*dependenceComponents=*/nullptr,
/*allowRAR=*/readReadAccesses);
if (result.value == DependenceResult::Failure) {
LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n.");
return failure();
}
if (result.value == DependenceResult::NoDependence)
continue;
dependentOpPairs.push_back({opsA[i], opsB[j]});
// Compute slice bounds for 'srcAccess' and 'dstAccess'.
ComputationSliceState tmpSliceState;
mlir::getComputationSliceState(opsA[i], opsB[j], &dependenceConstraints,
loopDepth, isBackwardSlice,
&tmpSliceState);
if (sliceUnionCst.getNumDimAndSymbolIds() == 0) { if (sliceUnionCst.getNumDimAndSymbolIds() == 0) {
// Initialize 'sliceUnionCst' with the bounds computed in previous step. // Initialize 'sliceUnionCst' with the bounds computed in previous step.
if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) { if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) {
@ -599,116 +635,147 @@ LogicalResult mlir::computeSliceUnion(ArrayRef<Operation *> srcOps,
} }
} }
// Store 'numSrcLoopIvs' before converting dst loop IVs to dims. // Empty union.
unsigned numSrcLoopIVs = sliceUnionCst.getNumDimIds(); if (sliceUnionCst.getNumDimAndSymbolIds() == 0)
return failure();
// Gather loops surrounding ops from loop nest where slice will be inserted.
SmallVector<Operation *, 4> ops;
for (auto &dep : dependentOpPairs) {
ops.push_back(isBackwardSlice ? dep.second : dep.first);
}
SmallVector<AffineForOp, 4> surroundingLoops;
unsigned innermostCommonLoopDepth =
getInnermostCommonLoopDepth(ops, surroundingLoops);
if (loopDepth > innermostCommonLoopDepth) {
LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n.");
return failure();
}
// Store 'numSliceLoopIVs' before converting dst loop IVs to dims.
unsigned numSliceLoopIVs = sliceUnionCst.getNumDimIds();
// Convert any dst loop IVs which are symbol identifiers to dim identifiers. // Convert any dst loop IVs which are symbol identifiers to dim identifiers.
sliceUnionCst.convertLoopIVSymbolsToDims(); sliceUnionCst.convertLoopIVSymbolsToDims();
sliceUnion->clearBounds(); sliceUnion->clearBounds();
sliceUnion->lbs.resize(numSrcLoopIVs, AffineMap()); sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap());
sliceUnion->ubs.resize(numSrcLoopIVs, AffineMap()); sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap());
// Get slice bounds from slice union constraints 'sliceUnionCst'. // Get slice bounds from slice union constraints 'sliceUnionCst'.
sliceUnionCst.getSliceBounds(numSrcLoopIVs, srcOps[0]->getContext(), sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs,
&sliceUnion->lbs, &sliceUnion->ubs); opsA[0]->getContext(), &sliceUnion->lbs,
&sliceUnion->ubs);
// Add slice bound operands of union. // Add slice bound operands of union.
SmallVector<Value *, 4> sliceBoundOperands; SmallVector<Value *, 4> sliceBoundOperands;
sliceUnionCst.getIdValues(numSrcLoopIVs, sliceUnionCst.getIdValues(numSliceLoopIVs,
sliceUnionCst.getNumDimAndSymbolIds(), sliceUnionCst.getNumDimAndSymbolIds(),
&sliceBoundOperands); &sliceBoundOperands);
// Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'. // Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'.
sliceUnion->ivs.clear(); sliceUnion->ivs.clear();
sliceUnionCst.getIdValues(0, numSrcLoopIVs, &sliceUnion->ivs); sliceUnionCst.getIdValues(0, numSliceLoopIVs, &sliceUnion->ivs);
// Set loop nest insertion point to block start at 'loopDepth'.
sliceUnion->insertPoint =
isBackwardSlice
? surroundingLoops[loopDepth - 1].getBody()->begin()
: std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
// Give each bound its own copy of 'sliceBoundOperands' for subsequent // Give each bound its own copy of 'sliceBoundOperands' for subsequent
// canonicalization. // canonicalization.
sliceUnion->lbOperands.resize(numSrcLoopIVs, sliceBoundOperands); sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
sliceUnion->ubOperands.resize(numSrcLoopIVs, sliceBoundOperands); sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
return success(); return success();
} }
const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier"; const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
// Computes memref dependence between 'srcAccess' and 'dstAccess', projects // Computes slice bounds by projecting out any loop IVs from
// out any dst loop IVs at depth greater than 'dstLoopDepth', and computes slice // 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice
// bounds in 'sliceState' which represent the src IVs in terms of the dst IVs, // bounds in 'sliceState' which represent the one loop nest's IVs in terms of
// symbols and constants. // the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice').
LogicalResult mlir::getBackwardComputationSliceState( void mlir::getComputationSliceState(
const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, Operation *depSourceOp, Operation *depSinkOp,
unsigned dstLoopDepth, ComputationSliceState *sliceState) { FlatAffineConstraints *dependenceConstraints, unsigned loopDepth,
bool readReadAccesses = bool isBackwardSlice, ComputationSliceState *sliceState) {
isa<LoadOp>(srcAccess.opInst) && isa<LoadOp>(dstAccess.opInst);
FlatAffineConstraints dependenceConstraints;
DependenceResult result = checkMemrefAccessDependence(
srcAccess, dstAccess, /*loopDepth=*/1, &dependenceConstraints,
/*dependenceComponents=*/nullptr, /*allowRAR=*/readReadAccesses);
if (!hasDependence(result)) {
return failure();
}
// Get loop nest surrounding src operation. // Get loop nest surrounding src operation.
SmallVector<AffineForOp, 4> srcLoopIVs; SmallVector<AffineForOp, 4> srcLoopIVs;
getLoopIVs(*srcAccess.opInst, &srcLoopIVs); getLoopIVs(*depSourceOp, &srcLoopIVs);
unsigned numSrcLoopIVs = srcLoopIVs.size(); unsigned numSrcLoopIVs = srcLoopIVs.size();
// Get loop nest surrounding dst operation. // Get loop nest surrounding dst operation.
SmallVector<AffineForOp, 4> dstLoopIVs; SmallVector<AffineForOp, 4> dstLoopIVs;
getLoopIVs(*dstAccess.opInst, &dstLoopIVs); getLoopIVs(*depSinkOp, &dstLoopIVs);
unsigned numDstLoopIVs = dstLoopIVs.size(); unsigned numDstLoopIVs = dstLoopIVs.size();
if (dstLoopDepth > numDstLoopIVs) {
dstAccess.opInst->emitError("invalid destination loop depth");
return failure();
}
// Project out dimensions other than those up to 'dstLoopDepth'. assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
dependenceConstraints.projectOut(numSrcLoopIVs + dstLoopDepth, (isBackwardSlice && loopDepth <= numDstLoopIVs));
numDstLoopIVs - dstLoopDepth);
// Add src loop IV values to 'sliceState'. // Project out dimensions other than those up to 'loopDepth'.
dependenceConstraints.getIdValues(0, numSrcLoopIVs, &sliceState->ivs); unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
unsigned num =
isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
dependenceConstraints->projectOut(pos, num);
// Add slice loop IV values to 'sliceState'.
unsigned offset = isBackwardSlice ? 0 : loopDepth;
unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
dependenceConstraints->getIdValues(offset, offset + numSliceLoopIVs,
&sliceState->ivs);
// Set up lower/upper bound affine maps for the slice. // Set up lower/upper bound affine maps for the slice.
sliceState->lbs.resize(numSrcLoopIVs, AffineMap()); sliceState->lbs.resize(numSliceLoopIVs, AffineMap());
sliceState->ubs.resize(numSrcLoopIVs, AffineMap()); sliceState->ubs.resize(numSliceLoopIVs, AffineMap());
// Get bounds for src IVs in terms of dst IVs, symbols, and constants. // Get bounds for slice IVs in terms of other IVs, symbols, and constants.
dependenceConstraints.getSliceBounds(numSrcLoopIVs, dependenceConstraints->getSliceBounds(offset, numSliceLoopIVs,
srcAccess.opInst->getContext(), depSourceOp->getContext(),
&sliceState->lbs, &sliceState->ubs); &sliceState->lbs, &sliceState->ubs);
// Set up bound operands for the slice's lower and upper bounds. // Set up bound operands for the slice's lower and upper bounds.
SmallVector<Value *, 4> sliceBoundOperands; SmallVector<Value *, 4> sliceBoundOperands;
dependenceConstraints.getIdValues( unsigned numDimsAndSymbols = dependenceConstraints->getNumDimAndSymbolIds();
numSrcLoopIVs, dependenceConstraints.getNumDimAndSymbolIds(), for (unsigned i = 0; i < numDimsAndSymbols; ++i) {
&sliceBoundOperands); if (i < offset || i >= offset + numSliceLoopIVs) {
sliceBoundOperands.push_back(dependenceConstraints->getIdValue(i));
}
}
// Give each bound its own copy of 'sliceBoundOperands' for subsequent // Give each bound its own copy of 'sliceBoundOperands' for subsequent
// canonicalization. // canonicalization.
sliceState->lbOperands.resize(numSrcLoopIVs, sliceBoundOperands); sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
sliceState->ubOperands.resize(numSrcLoopIVs, sliceBoundOperands); sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
// Set destination loop nest insertion point to block start at 'dstLoopDepth'.
sliceState->insertPoint =
isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
: std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
llvm::SmallDenseSet<Value *, 8> sequentialLoops; llvm::SmallDenseSet<Value *, 8> sequentialLoops;
if (readReadAccesses) { if (isa<LoadOp>(depSourceOp) && isa<LoadOp>(depSinkOp)) {
// For read-read access pairs, clear any slice bounds on sequential loops. // For read-read access pairs, clear any slice bounds on sequential loops.
// Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'. // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
getSequentialLoops(srcLoopIVs[0], &sequentialLoops); getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0],
&sequentialLoops);
} }
// Clear all sliced loop bounds beginning at the first sequential loop, or // Clear all sliced loop bounds beginning at the first sequential loop, or
// first loop with a slice fusion barrier attribute.. // first loop with a slice fusion barrier attribute..
// TODO(andydavis, bondhugula) Use MemRef read/write regions instead of // TODO(andydavis, bondhugula) Use MemRef read/write regions instead of
// using 'kSliceFusionBarrierAttrName'. // using 'kSliceFusionBarrierAttrName'.
for (unsigned i = 0; i < numSrcLoopIVs; ++i) { auto getSliceLoop = [&](unsigned i) {
Value *iv = srcLoopIVs[i].getInductionVar(); return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
};
for (unsigned i = 0; i < numSliceLoopIVs; ++i) {
Value *iv = getSliceLoop(i).getInductionVar();
if (sequentialLoops.count(iv) == 0 && if (sequentialLoops.count(iv) == 0 &&
srcLoopIVs[i].getAttr(kSliceFusionBarrierAttrName) == nullptr) getSliceLoop(i).getAttr(kSliceFusionBarrierAttrName) == nullptr)
continue; continue;
for (unsigned j = i; j < numSrcLoopIVs; ++j) { for (unsigned j = i; j < numSliceLoopIVs; ++j) {
sliceState->lbs[j] = AffineMap(); sliceState->lbs[j] = AffineMap();
sliceState->ubs[j] = AffineMap(); sliceState->ubs[j] = AffineMap();
} }
break; break;
} }
return success();
} }
/// Creates a computation slice of the loop nest surrounding 'srcOpInst', /// Creates a computation slice of the loop nest surrounding 'srcOpInst',

View File

@ -1329,7 +1329,9 @@ static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst,
for (unsigned i = maxDstLoopDepth; i >= 1; --i) { for (unsigned i = maxDstLoopDepth; i >= 1; --i) {
// Compute the union of slice bounds of all ops in 'dstLoadOpInsts'. // Compute the union of slice bounds of all ops in 'dstLoadOpInsts'.
if (failed(mlir::computeSliceUnion({srcOpInst}, dstLoadOpInsts, if (failed(mlir::computeSliceUnion({srcOpInst}, dstLoadOpInsts,
/*dstLoopDepth=*/i, /*loopDepth=*/i,
/*numCommonLoops=*/0,
/*isBackwardSlice=*/true,
&sliceStates[i - 1]))) { &sliceStates[i - 1]))) {
LLVM_DEBUG(llvm::dbgs() LLVM_DEBUG(llvm::dbgs()
<< "computeSliceUnion failed for loopDepth: " << i << "\n"); << "computeSliceUnion failed for loopDepth: " << i << "\n");
@ -1736,15 +1738,16 @@ public:
dstLoadOpInsts, dstStoreOpInsts, &sliceState, dstLoadOpInsts, dstStoreOpInsts, &sliceState,
&bestDstLoopDepth, maximalFusion)) &bestDstLoopDepth, maximalFusion))
continue; continue;
// TODO(andydavis) Remove assert and surrounding code when // TODO(andydavis) Remove the following test code when canFuseLoops
// canFuseLoops is fully functional. // is fully functional.
mlir::ComputationSliceState sliceUnion; mlir::ComputationSliceState sliceUnion;
FusionResult result = mlir::canFuseLoops( if (!maximalFusion) {
cast<AffineForOp>(srcNode->op), cast<AffineForOp>(dstNode->op), FusionResult result = mlir::canFuseLoops(
bestDstLoopDepth, &sliceUnion); cast<AffineForOp>(srcNode->op), cast<AffineForOp>(dstNode->op),
assert(result.value == FusionResult::Success); bestDstLoopDepth, &sliceUnion);
(void)result; assert(result.value == FusionResult::Success);
(void)result;
}
// Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'. // Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'.
auto sliceLoopNest = mlir::insertBackwardComputationSlice( auto sliceLoopNest = mlir::insertBackwardComputationSlice(
srcStoreOpInst, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState); srcStoreOpInst, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState);

View File

@ -45,6 +45,11 @@ static llvm::cl::opt<bool> clTestDependenceCheck(
llvm::cl::desc("Enable testing of loop fusion dependence check"), llvm::cl::desc("Enable testing of loop fusion dependence check"),
llvm::cl::cat(clOptionsCategory)); llvm::cl::cat(clOptionsCategory));
static llvm::cl::opt<bool> clTestSliceComputation(
"test-loop-fusion-slice-computation",
llvm::cl::desc("Enable testing of loop fusion slice computation"),
llvm::cl::cat(clOptionsCategory));
namespace { namespace {
struct TestLoopFusion : public FunctionPass<TestLoopFusion> { struct TestLoopFusion : public FunctionPass<TestLoopFusion> {
@ -70,20 +75,74 @@ gatherLoops(Block *block, unsigned currLoopDepth,
} }
} }
// Run fusion dependence check on 'loops[i]' and 'loops[j]' at 'loopDepth'. // Run fusion dependence check on 'loops[i]' and 'loops[j]' at loop depths
// in range ['loopDepth' + 1, 'maxLoopDepth'].
// Emits a remark on 'loops[i]' if a fusion-preventing dependence exists. // Emits a remark on 'loops[i]' if a fusion-preventing dependence exists.
static void testDependenceCheck(SmallVector<AffineForOp, 2> &loops, unsigned i, static void testDependenceCheck(SmallVector<AffineForOp, 2> &loops, unsigned i,
unsigned j, unsigned loopDepth) { unsigned j, unsigned loopDepth,
unsigned maxLoopDepth) {
AffineForOp srcForOp = loops[i]; AffineForOp srcForOp = loops[i];
AffineForOp dstForOp = loops[j]; AffineForOp dstForOp = loops[j];
mlir::ComputationSliceState sliceUnion; mlir::ComputationSliceState sliceUnion;
// TODO(andydavis) Test at deeper loop depths current loop depth + 1. for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
FusionResult result = FusionResult result =
mlir::canFuseLoops(srcForOp, dstForOp, loopDepth + 1, &sliceUnion); mlir::canFuseLoops(srcForOp, dstForOp, d, &sliceUnion);
if (result.value == FusionResult::FailBlockDependence) { if (result.value == FusionResult::FailBlockDependence) {
srcForOp.getOperation()->emitRemark("block-level dependence preventing" srcForOp.getOperation()->emitRemark("block-level dependence preventing"
" fusion of loop nest ") " fusion of loop nest ")
<< i << " into loop nest " << j << " at depth " << loopDepth; << i << " into loop nest " << j << " at depth " << loopDepth;
}
}
}
// Returns the index of 'op' in its block.
static unsigned getBlockIndex(Operation &op) {
unsigned index = 0;
for (auto &opX : *op.getBlock()) {
if (&op == &opX)
break;
++index;
}
return index;
}
// Returns a string representation of 'sliceUnion'.
static std::string getSliceStr(const mlir::ComputationSliceState &sliceUnion) {
std::string result;
llvm::raw_string_ostream os(result);
// Slice insertion point format [loop-depth, operation-block-index]
unsigned ipd = getNestingDepth(*sliceUnion.insertPoint);
unsigned ipb = getBlockIndex(*sliceUnion.insertPoint);
os << "insert point: (" << std::to_string(ipd) << ", " << std::to_string(ipb)
<< ")";
assert(sliceUnion.lbs.size() == sliceUnion.ubs.size());
os << " loop bounds: ";
for (unsigned k = 0, e = sliceUnion.lbs.size(); k < e; ++k) {
os << '[';
sliceUnion.lbs[k].print(os);
os << ", ";
sliceUnion.ubs[k].print(os);
os << "] ";
}
return os.str();
}
// Computes fusion slice union on 'loops[i]' and 'loops[j]' at loop depths
// in range ['loopDepth' + 1, 'maxLoopDepth'].
// Emits a string represention of the slice union as a remark on 'loops[j]'.
static void testSliceComputation(SmallVector<AffineForOp, 2> &loops, unsigned i,
unsigned j, unsigned loopDepth,
unsigned maxLoopDepth) {
AffineForOp forOpA = loops[i];
AffineForOp forOpB = loops[j];
for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
mlir::ComputationSliceState sliceUnion;
FusionResult result = mlir::canFuseLoops(forOpA, forOpB, d, &sliceUnion);
if (result.value == FusionResult::Success) {
forOpB.getOperation()->emitRemark("slice (")
<< " src loop: " << i << ", dst loop: " << j << ", depth: " << d
<< " : " << getSliceStr(sliceUnion) << ")";
}
} }
} }
@ -104,7 +163,9 @@ void TestLoopFusion::runOnFunction() {
if (j == k) if (j == k)
continue; continue;
if (clTestDependenceCheck) if (clTestDependenceCheck)
testDependenceCheck(loops, j, k, loopDepth); testDependenceCheck(loops, j, k, loopDepth, depthToLoops.size());
if (clTestSliceComputation)
testSliceComputation(loops, j, k, loopDepth, depthToLoops.size());
} }
} }
} }

View File

@ -192,11 +192,7 @@ gatherLoadsAndStores(AffineForOp forOp,
return !hasIfOp; return !hasIfOp;
} }
// TODO(andydavis) Add support for the following features in subsequent CLs: // TODO(andydavis) Prevent fusion of loop nests with side-effecting operations.
// *) Compute dependences of unfused src/dst loops.
// *) Compute dependences of src/dst loop as if they were fused.
// *) Check for fusion preventing dependences (e.g. a dependence which changes
// from loop-independent to backward loop-carried after fusion).
FusionResult mlir::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, FusionResult mlir::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
unsigned dstLoopDepth, unsigned dstLoopDepth,
ComputationSliceState *srcSlice) { ComputationSliceState *srcSlice) {
@ -219,24 +215,35 @@ FusionResult mlir::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
return FusionResult::FailBlockDependence; return FusionResult::FailBlockDependence;
} }
// Gather all load and store ops in 'srcForOp'. // Check if 'srcForOp' precedeces 'dstForOp' in 'block'.
SmallVector<Operation *, 4> srcLoadAndStoreOps; bool isSrcForOpBeforeDstForOp =
if (!gatherLoadsAndStores(srcForOp, srcLoadAndStoreOps)) { srcForOp.getOperation()->isBeforeInBlock(dstForOp.getOperation());
// 'forOpA' executes before 'forOpB' in 'block'.
auto forOpA = isSrcForOpBeforeDstForOp ? srcForOp : dstForOp;
auto forOpB = isSrcForOpBeforeDstForOp ? dstForOp : srcForOp;
// Gather all load and store from 'forOpA' which precedes 'forOpB' in 'block'.
SmallVector<Operation *, 4> opsA;
if (!gatherLoadsAndStores(forOpA, opsA)) {
LLVM_DEBUG(llvm::dbgs() << "Fusing loops with affine.if unsupported.\n."); LLVM_DEBUG(llvm::dbgs() << "Fusing loops with affine.if unsupported.\n.");
return FusionResult::FailPrecondition; return FusionResult::FailPrecondition;
} }
// Gather all load and store ops in 'dstForOp'. // Gather all load and store from 'forOpB' which succeeds 'forOpA' in 'block'.
SmallVector<Operation *, 4> dstLoadAndStoreOps; SmallVector<Operation *, 4> opsB;
if (!gatherLoadsAndStores(dstForOp, dstLoadAndStoreOps)) { if (!gatherLoadsAndStores(forOpB, opsB)) {
LLVM_DEBUG(llvm::dbgs() << "Fusing loops with affine.if unsupported.\n."); LLVM_DEBUG(llvm::dbgs() << "Fusing loops with affine.if unsupported.\n.");
return FusionResult::FailPrecondition; return FusionResult::FailPrecondition;
} }
// Compute union of computation slices computed from all pairs in // Calculate the number of common loops surrounding 'srcForOp' and 'dstForOp'.
// {'srcLoadAndStoreOps', 'dstLoadAndStoreOps'}. unsigned numCommonLoops = mlir::getNumCommonSurroundingLoops(
if (failed(mlir::computeSliceUnion(srcLoadAndStoreOps, dstLoadAndStoreOps, *srcForOp.getOperation(), *dstForOp.getOperation());
dstLoopDepth, srcSlice))) {
// Compute union of computation slices computed between all pairs of ops
// from 'forOpA' and 'forOpB'.
if (failed(mlir::computeSliceUnion(opsA, opsB, dstLoopDepth, numCommonLoops,
isSrcForOpBeforeDstForOp, srcSlice))) {
LLVM_DEBUG(llvm::dbgs() << "computeSliceUnion failed\n"); LLVM_DEBUG(llvm::dbgs() << "computeSliceUnion failed\n");
return FusionResult::FailPrecondition; return FusionResult::FailPrecondition;
} }

View File

@ -0,0 +1,145 @@
// RUN: mlir-opt %s -test-loop-fusion -test-loop-fusion-slice-computation -split-input-file -verify | FileCheck %s
// -----
// CHECK-LABEL: func @slice_depth1_loop_nest() {
func @slice_depth1_loop_nest() {
%0 = alloc() : memref<100xf32>
%cst = constant 7.000000e+00 : f32
affine.for %i0 = 0 to 16 {
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] )}}
store %cst, %0[%i0] : memref<100xf32>
}
affine.for %i1 = 0 to 5 {
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] )}}
%1 = load %0[%i1] : memref<100xf32>
}
return
}
// -----
// Loop %i0 writes to locations [2, 17] and loop %i0 reads from locations [3, 6]
// Slice loop bounds should be adjusted such that the load/store are for the
// same location.
// CHECK-LABEL: func @slice_depth1_loop_nest_with_offsets() {
func @slice_depth1_loop_nest_with_offsets() {
%0 = alloc() : memref<100xf32>
%cst = constant 7.000000e+00 : f32
affine.for %i0 = 0 to 16 {
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 2) loop bounds: [(d0) -> (d0 + 3), (d0) -> (d0 + 4)] )}}
%a0 = affine.apply (d0) -> (d0 + 2)(%i0)
store %cst, %0[%a0] : memref<100xf32>
}
affine.for %i1 = 4 to 8 {
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0 - 3), (d0) -> (d0 - 2)] )}}
%a1 = affine.apply (d0) -> (d0 - 1)(%i1)
%1 = load %0[%a1] : memref<100xf32>
}
return
}
// -----
// Slices at loop depth 1 should only slice the loop bounds of the first loop.
// Slices at loop detph 2 should slice loop bounds of both loops.
// CHECK-LABEL: func @slice_depth2_loop_nest() {
func @slice_depth2_loop_nest() {
%0 = alloc() : memref<100x100xf32>
%cst = constant 7.000000e+00 : f32
affine.for %i0 = 0 to 16 {
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (8)] )}}
// expected-remark@-2 {{slice ( src loop: 1, dst loop: 0, depth: 2 : insert point: (2, 1) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
affine.for %i1 = 0 to 16 {
store %cst, %0[%i0, %i1] : memref<100x100xf32>
}
}
affine.for %i2 = 0 to 10 {
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (8)] )}}
// expected-remark@-2 {{slice ( src loop: 0, dst loop: 1, depth: 2 : insert point: (2, 0) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
affine.for %i3 = 0 to 8 {
%1 = load %0[%i2, %i3] : memref<100x100xf32>
}
}
return
}
// -----
// The load at depth 1 in loop nest %i2 prevents slicing loop nest %i0 at depths
// greater than 1. However, loop nest %i2 can be sliced into loop nest %i0 at
// depths 1 and 2 because the dependent store in loop nest %i0 is at depth 2.
// CHECK-LABEL: func @slice_depth2_loop_nest_two_loads() {
func @slice_depth2_loop_nest_two_loads() {
%0 = alloc() : memref<100x100xf32>
%c0 = constant 0 : index
%cst = constant 7.000000e+00 : f32
affine.for %i0 = 0 to 16 {
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0)[s0] -> (d0), (d0)[s0] -> (d0 + 1)] [(d0)[s0] -> (0), (d0)[s0] -> (8)] )}}
// expected-remark@-2 {{slice ( src loop: 1, dst loop: 0, depth: 2 : insert point: (2, 1) loop bounds: [(d0, d1)[s0] -> (d0), (d0, d1)[s0] -> (d0 + 1)] [(d0, d1)[s0] -> (0), (d0, d1)[s0] -> (8)] )}}
affine.for %i1 = 0 to 16 {
store %cst, %0[%i0, %i1] : memref<100x100xf32>
}
}
affine.for %i2 = 0 to 10 {
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0)[s0] -> (d0), (d0)[s0] -> (d0 + 1)] [(d0)[s0] -> (0), (d0)[s0] -> (8)] )}}
affine.for %i3 = 0 to 8 {
%1 = load %0[%i2, %i3] : memref<100x100xf32>
}
%2 = load %0[%i2, %c0] : memref<100x100xf32>
}
return
}
// -----
// The store at depth 1 in loop nest %i0 prevents slicing loop nest %i2 at
// depths greater than 1 into loop nest %i0. However, loop nest %i0 can be
// sliced into loop nest %i2 at depths 1 and 2 because the dependent load in
// loop nest %i2 is at depth 2.
// CHECK-LABEL: func @slice_depth2_loop_nest_two_stores() {
func @slice_depth2_loop_nest_two_stores() {
%0 = alloc() : memref<100x100xf32>
%c0 = constant 0 : index
%cst = constant 7.000000e+00 : f32
affine.for %i0 = 0 to 16 {
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 2) loop bounds: [(d0)[s0] -> (d0), (d0)[s0] -> (d0 + 1)] [(d0)[s0] -> (0), (d0)[s0] -> (8)] )}}
affine.for %i1 = 0 to 16 {
store %cst, %0[%i0, %i1] : memref<100x100xf32>
}
store %cst, %0[%i0, %c0] : memref<100x100xf32>
}
affine.for %i2 = 0 to 10 {
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0)[s0] -> (d0), (d0)[s0] -> (d0 + 1)] [(d0)[s0] -> (0), (d0)[s0] -> (16)] )}}
// expected-remark@-2 {{slice ( src loop: 0, dst loop: 1, depth: 2 : insert point: (2, 0) loop bounds: [(d0, d1)[s0] -> (d0), (d0, d1)[s0] -> (d0 + 1)] [(d0, d1)[s0] -> (0), (d0, d1)[s0] -> (16)] )}}
affine.for %i3 = 0 to 8 {
%1 = load %0[%i2, %i3] : memref<100x100xf32>
}
}
return
}
// -----
// Test loop nest which has a smaller outer trip count than its inner loop.
// CHECK-LABEL: func @slice_loop_nest_with_smaller_outer_trip_count() {
func @slice_loop_nest_with_smaller_outer_trip_count() {
%0 = alloc() : memref<100x100xf32>
%c0 = constant 0 : index
%cst = constant 7.000000e+00 : f32
affine.for %i0 = 0 to 16 {
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (10)] )}}
// expected-remark@-2 {{slice ( src loop: 1, dst loop: 0, depth: 2 : insert point: (2, 1) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
affine.for %i1 = 0 to 16 {
store %cst, %0[%i0, %i1] : memref<100x100xf32>
}
}
affine.for %i2 = 0 to 8 {
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (10)] )}}
// expected-remark@-2 {{slice ( src loop: 0, dst loop: 1, depth: 2 : insert point: (2, 0) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
affine.for %i3 = 0 to 10 {
%1 = load %0[%i2, %i3] : memref<100x100xf32>
}
}
return
}