forked from OSchip/llvm-project
LoopFusion: adds support for computing forward computation slices, which will enable fusion of consumer loop nests into their producers in subsequent CLs.
PiperOrigin-RevId: 253601994
This commit is contained in:
parent
a14eeacf2c
commit
898cf0e968
|
@ -393,12 +393,12 @@ public:
|
|||
bool lower = true);
|
||||
|
||||
/// Computes the lower and upper bounds of the first 'num' dimensional
|
||||
/// identifiers as an affine map of the remaining identifiers (dimensional and
|
||||
/// symbolic). This method is able to detect identifiers as floordiv's
|
||||
/// and mod's of affine expressions of other identifiers with respect to
|
||||
/// (positive) constants. Sets bound map to a null AffineMap if such a bound
|
||||
/// can't be found (or yet unimplemented).
|
||||
void getSliceBounds(unsigned num, MLIRContext *context,
|
||||
/// identifiers (starting at 'offset') as an affine map of the remaining
|
||||
/// identifiers (dimensional and symbolic). This method is able to detect
|
||||
/// identifiers as floordiv's and mod's of affine expressions of other
|
||||
/// identifiers with respect to (positive) constants. Sets bound map to a
|
||||
/// null AffineMap if such a bound can't be found (or yet unimplemented).
|
||||
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context,
|
||||
SmallVectorImpl<AffineMap> *lbMaps,
|
||||
SmallVectorImpl<AffineMap> *ubMaps);
|
||||
|
||||
|
@ -648,13 +648,14 @@ public:
|
|||
Optional<int64_t> getConstantUpperBound(unsigned pos) const;
|
||||
|
||||
/// Gets the lower and upper bound of the pos^th identifier treating
|
||||
/// [dimStartPos, symbStartPos) as dimensions and [symStartPos,
|
||||
/// getNumDimAndSymbolIds) as symbols. The returned multi-dimensional maps
|
||||
/// in the pair represent the max and min of potentially multiple affine
|
||||
/// expressions. The upper bound is exclusive. 'localExprs' holds pre-computed
|
||||
/// AffineExpr's for all local identifiers in the system.
|
||||
/// [0, offset) U [offset + num, symbStartPos) as dimensions and
|
||||
/// [symStartPos, getNumDimAndSymbolIds) as symbols. The returned
|
||||
/// multi-dimensional maps in the pair represent the max and min of
|
||||
/// potentially multiple affine expressions. The upper bound is exclusive.
|
||||
/// 'localExprs' holds pre-computed AffineExpr's for all local identifiers in
|
||||
/// the system.
|
||||
std::pair<AffineMap, AffineMap>
|
||||
getLowerAndUpperBound(unsigned pos, unsigned dimStartPos,
|
||||
getLowerAndUpperBound(unsigned pos, unsigned offset, unsigned num,
|
||||
unsigned symStartPos, ArrayRef<AffineExpr> localExprs,
|
||||
MLIRContext *context);
|
||||
|
||||
|
|
|
@ -73,6 +73,8 @@ struct ComputationSliceState {
|
|||
std::vector<SmallVector<Value *, 4>> lbOperands;
|
||||
// List of upper bound operands (ubOperands[i] are used by 'ubs[i]').
|
||||
std::vector<SmallVector<Value *, 4>> ubOperands;
|
||||
// Slice loop nest insertion point in target loop nest.
|
||||
Block::iterator insertPoint;
|
||||
// Adds to 'cst' with constraints which represent the slice bounds on 'ivs'
|
||||
// in 'this'. Specifically, the values in 'ivs' are added to 'cst' as dim
|
||||
// identifiers and the values in 'lb/ubOperands' are added as symbols.
|
||||
|
@ -85,19 +87,67 @@ struct ComputationSliceState {
|
|||
void clearBounds();
|
||||
};
|
||||
|
||||
/// Computes computation slice loop bounds for the loop nest surrounding
|
||||
/// 'srcAccess', where the returned loop bound AffineMaps are functions of
|
||||
/// loop IVs from the loop nest surrounding 'dstAccess'.
|
||||
LogicalResult getBackwardComputationSliceState(
|
||||
const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
|
||||
unsigned dstLoopDepth, ComputationSliceState *sliceState);
|
||||
/// Computes the computation slice loop bounds for one loop nest as affine maps
|
||||
/// of the other loop nest's IVs and symbols, using 'dependenceConstraints'
|
||||
/// computed between 'depSourceAccess' and 'depSinkAccess'.
|
||||
/// If 'isBackwardSlice' is true, a backwards slice is computed in which the
|
||||
/// slice bounds of loop nest surrounding 'depSourceAccess' are computed in
|
||||
/// terms of loop IVs and symbols of the loop nest surrounding 'depSinkAccess'
|
||||
/// at 'loopDepth'.
|
||||
/// If 'isBackwardSlice' is false, a forward slice is computed in which the
|
||||
/// slice bounds of loop nest surrounding 'depSinkAccess' are computed in terms
|
||||
/// of loop IVs and symbols of the loop nest surrounding 'depSourceAccess' at
|
||||
/// 'loopDepth'.
|
||||
/// The slice loop bounds and associated operands are returned in 'sliceState'.
|
||||
//
|
||||
// Backward slice example:
|
||||
//
|
||||
// affine.for %i0 = 0 to 10 {
|
||||
// store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess'
|
||||
// }
|
||||
// affine.for %i1 = 0 to 10 {
|
||||
// %v = load %0[%i1] : memref<100xf32> // 'depSinkAccess'
|
||||
// }
|
||||
//
|
||||
// // Backward computation slice of loop nest '%i0'.
|
||||
// affine.for %i0 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 1)(%i1) {
|
||||
// store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess'
|
||||
// }
|
||||
//
|
||||
// Forward slice example:
|
||||
//
|
||||
// affine.for %i0 = 0 to 10 {
|
||||
// store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess'
|
||||
// }
|
||||
// affine.for %i1 = 0 to 10 {
|
||||
// %v = load %0[%i1] : memref<100xf32> // 'depSinkAccess'
|
||||
// }
|
||||
//
|
||||
// // Forward computation slice of loop nest '%i1'.
|
||||
// affine.for %i1 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 1)(%i0) {
|
||||
// %v = load %0[%i1] : memref<100xf32> // 'depSinkAccess'
|
||||
// }
|
||||
//
|
||||
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp,
|
||||
FlatAffineConstraints *dependenceConstraints,
|
||||
unsigned loopDepth, bool isBackwardSlice,
|
||||
ComputationSliceState *sliceState);
|
||||
|
||||
/// Computes in 'sliceUnion' the union of all slice bounds computed at
|
||||
/// 'dstLoopDepth' between all pairs in 'srcOps' and 'dstOp' which access the
|
||||
/// same memref. Returns 'success' if union was computed, 'failure' otherwise.
|
||||
LogicalResult computeSliceUnion(ArrayRef<Operation *> srcOps,
|
||||
ArrayRef<Operation *> dstOps,
|
||||
unsigned dstLoopDepth,
|
||||
/// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB'.
|
||||
/// The parameter 'numCommonLoops' is the number of loops common to the
|
||||
/// operations in 'opsA' and 'opsB'.
|
||||
/// If 'isBackwardSlice' is true, computes slice bounds for loop nest
|
||||
/// surrounding ops in 'opsA', as a function of IVs and symbols of loop nest
|
||||
/// surrounding ops in 'opsB' at 'loopDepth'.
|
||||
/// If 'isBackwardSlice' is false, computes slice bounds for loop nest
|
||||
/// surrounding ops in 'opsB', as a function of IVs and symbols of loop nest
|
||||
/// surrounding ops in 'opsA' at 'loopDepth'.
|
||||
/// Returns 'success' if union was computed, 'failure' otherwise.
|
||||
// TODO(andydavis) Change this API to take 'forOpA'/'forOpB'.
|
||||
LogicalResult computeSliceUnion(ArrayRef<Operation *> opsA,
|
||||
ArrayRef<Operation *> opsB, unsigned loopDepth,
|
||||
unsigned numCommonLoops, bool isBackwardSlice,
|
||||
ComputationSliceState *sliceUnion);
|
||||
|
||||
/// Creates a clone of the computation contained in the loop nest surrounding
|
||||
|
|
|
@ -1423,19 +1423,28 @@ void FlatAffineConstraints::removeRedundantInequalities() {
|
|||
}
|
||||
|
||||
std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
|
||||
unsigned pos, unsigned dimStartPos, unsigned symStartPos,
|
||||
unsigned pos, unsigned offset, unsigned num, unsigned symStartPos,
|
||||
ArrayRef<AffineExpr> localExprs, MLIRContext *context) {
|
||||
assert(pos < dimStartPos && "invalid dim start pos");
|
||||
assert(symStartPos >= dimStartPos && "invalid sym start pos");
|
||||
assert(pos + offset < getNumDimIds() && "invalid dim start pos");
|
||||
assert(symStartPos >= (pos + offset) && "invalid sym start pos");
|
||||
assert(getNumLocalIds() == localExprs.size() &&
|
||||
"incorrect local exprs count");
|
||||
|
||||
SmallVector<unsigned, 4> lbIndices, ubIndices;
|
||||
getLowerAndUpperBoundIndices(*this, pos, &lbIndices, &ubIndices);
|
||||
getLowerAndUpperBoundIndices(*this, pos + offset, &lbIndices, &ubIndices);
|
||||
|
||||
/// Add to 'b' from 'a' in set [0, offset) U [offset + num, symbStartPos).
|
||||
auto addCoeffs = [&](ArrayRef<int64_t> a, SmallVectorImpl<int64_t> &b) {
|
||||
b.clear();
|
||||
for (unsigned i = 0, e = a.size(); i < e; ++i) {
|
||||
if (i < offset || i >= offset + num)
|
||||
b.push_back(a[i]);
|
||||
}
|
||||
};
|
||||
|
||||
SmallVector<int64_t, 8> lb, ub;
|
||||
SmallVector<AffineExpr, 4> exprs;
|
||||
unsigned dimCount = symStartPos - dimStartPos;
|
||||
unsigned dimCount = symStartPos - num;
|
||||
unsigned symCount = getNumDimAndSymbolIds() - symStartPos;
|
||||
exprs.reserve(lbIndices.size());
|
||||
// Lower bound expressions.
|
||||
|
@ -1444,7 +1453,7 @@ std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
|
|||
// Extract the lower bound (in terms of other coeff's + const), i.e., if
|
||||
// i - j + 1 >= 0 is the constraint, 'pos' is for i the lower bound is j
|
||||
// - 1.
|
||||
lb.assign(ineq.begin() + dimStartPos, ineq.end());
|
||||
addCoeffs(ineq, lb);
|
||||
std::transform(lb.begin(), lb.end(), lb.begin(), std::negate<int64_t>());
|
||||
auto expr = mlir::toAffineExpr(lb, dimCount, symCount, localExprs, context);
|
||||
exprs.push_back(expr);
|
||||
|
@ -1458,7 +1467,7 @@ std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
|
|||
for (auto idx : ubIndices) {
|
||||
auto ineq = getInequality(idx);
|
||||
// Extract the upper bound (in terms of other coeff's + const).
|
||||
ub.assign(ineq.begin() + dimStartPos, ineq.end());
|
||||
addCoeffs(ineq, ub);
|
||||
auto expr = mlir::toAffineExpr(ub, dimCount, symCount, localExprs, context);
|
||||
// Upper bound is exclusive.
|
||||
exprs.push_back(expr + 1);
|
||||
|
@ -1470,10 +1479,12 @@ std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
|
|||
}
|
||||
|
||||
/// Computes the lower and upper bounds of the first 'num' dimensional
|
||||
/// identifiers as affine maps of the remaining identifiers (dimensional and
|
||||
/// symbolic identifiers). Local identifiers are themselves explicitly computed
|
||||
/// as affine functions of other identifiers in this process if needed.
|
||||
void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
|
||||
/// identifiers (starting at 'offset') as affine maps of the remaining
|
||||
/// identifiers (dimensional and symbolic identifiers). Local identifiers are
|
||||
/// themselves explicitly computed as affine functions of other identifiers in
|
||||
/// this process if needed.
|
||||
void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num,
|
||||
MLIRContext *context,
|
||||
SmallVectorImpl<AffineMap> *lbMaps,
|
||||
SmallVectorImpl<AffineMap> *ubMaps) {
|
||||
assert(num < getNumDimIds() && "invalid range");
|
||||
|
@ -1488,8 +1499,12 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
|
|||
// Record computed/detected identifiers.
|
||||
SmallVector<AffineExpr, 8> memo(getNumIds());
|
||||
// Initialize dimensional and symbolic identifiers.
|
||||
for (unsigned i = num, e = getNumDimIds(); i < e; i++)
|
||||
memo[i] = getAffineDimExpr(i - num, context);
|
||||
for (unsigned i = 0, e = getNumDimIds(); i < e; i++) {
|
||||
if (i < offset)
|
||||
memo[i] = getAffineDimExpr(i, context);
|
||||
else if (i >= offset + num)
|
||||
memo[i] = getAffineDimExpr(i - num, context);
|
||||
}
|
||||
for (unsigned i = getNumDimIds(), e = getNumDimAndSymbolIds(); i < e; i++)
|
||||
memo[i] = getAffineSymbolExpr(i - getNumDimIds(), context);
|
||||
|
||||
|
@ -1578,7 +1593,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
|
|||
for (unsigned pos = 0; pos < num; pos++) {
|
||||
unsigned numMapDims = getNumDimIds() - num;
|
||||
unsigned numMapSymbols = getNumSymbolIds();
|
||||
AffineExpr expr = memo[pos];
|
||||
AffineExpr expr = memo[pos + offset];
|
||||
if (expr)
|
||||
expr = simplifyAffineExpr(expr, numMapDims, numMapSymbols);
|
||||
|
||||
|
@ -1601,7 +1616,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
|
|||
tmpClone->removeRedundantInequalities();
|
||||
}
|
||||
std::tie(lbMap, ubMap) = tmpClone->getLowerAndUpperBound(
|
||||
pos, num, getNumDimIds(), {}, context);
|
||||
pos, offset, num, getNumDimIds(), {}, context);
|
||||
}
|
||||
|
||||
// If the above fails, we'll just use the constant lower bound and the
|
||||
|
@ -1612,7 +1627,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
|
|||
if (!lbMap || lbMap.getNumResults() > 1) {
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "WARNING: Potentially over-approximating slice lb\n");
|
||||
auto lbConst = getConstantLowerBound(pos);
|
||||
auto lbConst = getConstantLowerBound(pos + offset);
|
||||
if (lbConst.hasValue()) {
|
||||
lbMap = AffineMap::get(
|
||||
numMapDims, numMapSymbols,
|
||||
|
@ -1622,7 +1637,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
|
|||
if (!ubMap || ubMap.getNumResults() > 1) {
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "WARNING: Potentially over-approximating slice ub\n");
|
||||
auto ubConst = getConstantUpperBound(pos);
|
||||
auto ubConst = getConstantUpperBound(pos + offset);
|
||||
if (ubConst.hasValue()) {
|
||||
(ubMap) = AffineMap::get(
|
||||
numMapDims, numMapSymbols,
|
||||
|
@ -1630,9 +1645,11 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
|
|||
}
|
||||
}
|
||||
}
|
||||
LLVM_DEBUG(llvm::dbgs() << "lb map for pos = " << Twine(pos) << ", expr: ");
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "lb map for pos = " << Twine(pos + offset) << ", expr: ");
|
||||
LLVM_DEBUG(lbMap.dump(););
|
||||
LLVM_DEBUG(llvm::dbgs() << "ub map for pos = " << Twine(pos) << ", expr: ");
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "ub map for pos = " << Twine(pos + offset) << ", expr: ");
|
||||
LLVM_DEBUG(ubMap.dump(););
|
||||
}
|
||||
}
|
||||
|
|
|
@ -504,48 +504,84 @@ LogicalResult addMissingLoopIVBounds(SmallPtrSet<Value *, 8> &ivs,
|
|||
return success();
|
||||
}
|
||||
|
||||
/// Computes in 'sliceUnion' the union of all slice bounds computed at
|
||||
/// 'dstLoopDepth' between all pairs in 'srcOps' and 'dstOp' which access the
|
||||
/// same memref. Returns 'Success' if union was computed, 'failure' otherwise.
|
||||
LogicalResult mlir::computeSliceUnion(ArrayRef<Operation *> srcOps,
|
||||
ArrayRef<Operation *> dstOps,
|
||||
unsigned dstLoopDepth,
|
||||
ComputationSliceState *sliceUnion) {
|
||||
unsigned numSrcOps = srcOps.size();
|
||||
unsigned numDstOps = dstOps.size();
|
||||
assert(numSrcOps > 0 && numDstOps > 0);
|
||||
// Returns the innermost common loop depth for the set of operations in 'ops'.
|
||||
// TODO(andydavis) Move this to LoopUtils.
|
||||
static unsigned
|
||||
getInnermostCommonLoopDepth(ArrayRef<Operation *> ops,
|
||||
SmallVectorImpl<AffineForOp> &surroundingLoops) {
|
||||
unsigned numOps = ops.size();
|
||||
assert(numOps > 0);
|
||||
|
||||
// Compute the intersection of 'srcMemrefToOps' and 'dstMemrefToOps'.
|
||||
llvm::SmallDenseSet<Value *> memrefIntersection;
|
||||
for (auto *srcOp : srcOps) {
|
||||
auto *srcMemRef = getLoadOrStoreMemRef(srcOp);
|
||||
for (auto *dstOp : dstOps) {
|
||||
if (srcMemRef == getLoadOrStoreMemRef(dstOp))
|
||||
memrefIntersection.insert(srcMemRef);
|
||||
}
|
||||
std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
|
||||
unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
|
||||
for (unsigned i = 0; i < numOps; ++i) {
|
||||
getLoopIVs(*ops[i], &loops[i]);
|
||||
loopDepthLimit =
|
||||
std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
|
||||
}
|
||||
// Return failure if 'memrefIntersection' is empty.
|
||||
if (memrefIntersection.empty())
|
||||
return failure();
|
||||
|
||||
// Compute the union of slice bounds between all pairs in 'srcOps' and
|
||||
// 'dstOps' in 'sliceUnionCst'.
|
||||
unsigned loopDepth = 0;
|
||||
for (unsigned d = 0; d < loopDepthLimit; ++d) {
|
||||
unsigned i;
|
||||
for (i = 1; i < numOps; ++i) {
|
||||
if (loops[i - 1][d] != loops[i][d])
|
||||
return loopDepth;
|
||||
}
|
||||
surroundingLoops.push_back(loops[i - 1][d]);
|
||||
++loopDepth;
|
||||
}
|
||||
return loopDepth;
|
||||
}
|
||||
|
||||
/// Computes in 'sliceUnion' the union of all slice bounds computed at
|
||||
/// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB'.
|
||||
/// Returns 'Success' if union was computed, 'failure' otherwise.
|
||||
LogicalResult mlir::computeSliceUnion(ArrayRef<Operation *> opsA,
|
||||
ArrayRef<Operation *> opsB,
|
||||
unsigned loopDepth,
|
||||
unsigned numCommonLoops,
|
||||
bool isBackwardSlice,
|
||||
ComputationSliceState *sliceUnion) {
|
||||
// Compute the union of slice bounds between all pairs in 'opsA' and
|
||||
// 'opsB' in 'sliceUnionCst'.
|
||||
FlatAffineConstraints sliceUnionCst;
|
||||
assert(sliceUnionCst.getNumDimAndSymbolIds() == 0);
|
||||
for (unsigned i = 0; i < numSrcOps; ++i) {
|
||||
MemRefAccess srcAccess(srcOps[i]);
|
||||
for (unsigned j = 0; j < numDstOps; ++j) {
|
||||
MemRefAccess dstAccess(dstOps[j]);
|
||||
std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
|
||||
for (unsigned i = 0, numOpsA = opsA.size(); i < numOpsA; ++i) {
|
||||
MemRefAccess srcAccess(opsA[i]);
|
||||
for (unsigned j = 0, numOpsB = opsB.size(); j < numOpsB; ++j) {
|
||||
MemRefAccess dstAccess(opsB[j]);
|
||||
if (srcAccess.memref != dstAccess.memref)
|
||||
continue;
|
||||
// Compute slice bounds for 'srcAccess' and 'dstAccess'.
|
||||
ComputationSliceState tmpSliceState;
|
||||
if (failed(mlir::getBackwardComputationSliceState(
|
||||
srcAccess, dstAccess, dstLoopDepth, &tmpSliceState))) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice bounds\n.");
|
||||
// Check if 'loopDepth' exceeds nesting depth of src/dst ops.
|
||||
if ((!isBackwardSlice && loopDepth > getNestingDepth(*opsA[i])) ||
|
||||
(isBackwardSlice && loopDepth > getNestingDepth(*opsB[j]))) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n.");
|
||||
return failure();
|
||||
}
|
||||
|
||||
bool readReadAccesses =
|
||||
isa<LoadOp>(srcAccess.opInst) && isa<LoadOp>(dstAccess.opInst);
|
||||
FlatAffineConstraints dependenceConstraints;
|
||||
// Check dependence between 'srcAccess' and 'dstAccess'.
|
||||
DependenceResult result = checkMemrefAccessDependence(
|
||||
srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1,
|
||||
&dependenceConstraints, /*dependenceComponents=*/nullptr,
|
||||
/*allowRAR=*/readReadAccesses);
|
||||
if (result.value == DependenceResult::Failure) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n.");
|
||||
return failure();
|
||||
}
|
||||
if (result.value == DependenceResult::NoDependence)
|
||||
continue;
|
||||
dependentOpPairs.push_back({opsA[i], opsB[j]});
|
||||
|
||||
// Compute slice bounds for 'srcAccess' and 'dstAccess'.
|
||||
ComputationSliceState tmpSliceState;
|
||||
mlir::getComputationSliceState(opsA[i], opsB[j], &dependenceConstraints,
|
||||
loopDepth, isBackwardSlice,
|
||||
&tmpSliceState);
|
||||
|
||||
if (sliceUnionCst.getNumDimAndSymbolIds() == 0) {
|
||||
// Initialize 'sliceUnionCst' with the bounds computed in previous step.
|
||||
if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) {
|
||||
|
@ -599,116 +635,147 @@ LogicalResult mlir::computeSliceUnion(ArrayRef<Operation *> srcOps,
|
|||
}
|
||||
}
|
||||
|
||||
// Store 'numSrcLoopIvs' before converting dst loop IVs to dims.
|
||||
unsigned numSrcLoopIVs = sliceUnionCst.getNumDimIds();
|
||||
// Empty union.
|
||||
if (sliceUnionCst.getNumDimAndSymbolIds() == 0)
|
||||
return failure();
|
||||
|
||||
// Gather loops surrounding ops from loop nest where slice will be inserted.
|
||||
SmallVector<Operation *, 4> ops;
|
||||
for (auto &dep : dependentOpPairs) {
|
||||
ops.push_back(isBackwardSlice ? dep.second : dep.first);
|
||||
}
|
||||
SmallVector<AffineForOp, 4> surroundingLoops;
|
||||
unsigned innermostCommonLoopDepth =
|
||||
getInnermostCommonLoopDepth(ops, surroundingLoops);
|
||||
if (loopDepth > innermostCommonLoopDepth) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n.");
|
||||
return failure();
|
||||
}
|
||||
|
||||
// Store 'numSliceLoopIVs' before converting dst loop IVs to dims.
|
||||
unsigned numSliceLoopIVs = sliceUnionCst.getNumDimIds();
|
||||
|
||||
// Convert any dst loop IVs which are symbol identifiers to dim identifiers.
|
||||
sliceUnionCst.convertLoopIVSymbolsToDims();
|
||||
sliceUnion->clearBounds();
|
||||
sliceUnion->lbs.resize(numSrcLoopIVs, AffineMap());
|
||||
sliceUnion->ubs.resize(numSrcLoopIVs, AffineMap());
|
||||
sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap());
|
||||
sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap());
|
||||
|
||||
// Get slice bounds from slice union constraints 'sliceUnionCst'.
|
||||
sliceUnionCst.getSliceBounds(numSrcLoopIVs, srcOps[0]->getContext(),
|
||||
&sliceUnion->lbs, &sliceUnion->ubs);
|
||||
sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs,
|
||||
opsA[0]->getContext(), &sliceUnion->lbs,
|
||||
&sliceUnion->ubs);
|
||||
|
||||
// Add slice bound operands of union.
|
||||
SmallVector<Value *, 4> sliceBoundOperands;
|
||||
sliceUnionCst.getIdValues(numSrcLoopIVs,
|
||||
sliceUnionCst.getIdValues(numSliceLoopIVs,
|
||||
sliceUnionCst.getNumDimAndSymbolIds(),
|
||||
&sliceBoundOperands);
|
||||
|
||||
// Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'.
|
||||
sliceUnion->ivs.clear();
|
||||
sliceUnionCst.getIdValues(0, numSrcLoopIVs, &sliceUnion->ivs);
|
||||
sliceUnionCst.getIdValues(0, numSliceLoopIVs, &sliceUnion->ivs);
|
||||
|
||||
// Set loop nest insertion point to block start at 'loopDepth'.
|
||||
sliceUnion->insertPoint =
|
||||
isBackwardSlice
|
||||
? surroundingLoops[loopDepth - 1].getBody()->begin()
|
||||
: std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
|
||||
|
||||
// Give each bound its own copy of 'sliceBoundOperands' for subsequent
|
||||
// canonicalization.
|
||||
sliceUnion->lbOperands.resize(numSrcLoopIVs, sliceBoundOperands);
|
||||
sliceUnion->ubOperands.resize(numSrcLoopIVs, sliceBoundOperands);
|
||||
sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
|
||||
sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
|
||||
return success();
|
||||
}
|
||||
|
||||
const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
|
||||
// Computes memref dependence between 'srcAccess' and 'dstAccess', projects
|
||||
// out any dst loop IVs at depth greater than 'dstLoopDepth', and computes slice
|
||||
// bounds in 'sliceState' which represent the src IVs in terms of the dst IVs,
|
||||
// symbols and constants.
|
||||
LogicalResult mlir::getBackwardComputationSliceState(
|
||||
const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
|
||||
unsigned dstLoopDepth, ComputationSliceState *sliceState) {
|
||||
bool readReadAccesses =
|
||||
isa<LoadOp>(srcAccess.opInst) && isa<LoadOp>(dstAccess.opInst);
|
||||
FlatAffineConstraints dependenceConstraints;
|
||||
DependenceResult result = checkMemrefAccessDependence(
|
||||
srcAccess, dstAccess, /*loopDepth=*/1, &dependenceConstraints,
|
||||
/*dependenceComponents=*/nullptr, /*allowRAR=*/readReadAccesses);
|
||||
if (!hasDependence(result)) {
|
||||
return failure();
|
||||
}
|
||||
// Computes slice bounds by projecting out any loop IVs from
|
||||
// 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice
|
||||
// bounds in 'sliceState' which represent the one loop nest's IVs in terms of
|
||||
// the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice').
|
||||
void mlir::getComputationSliceState(
|
||||
Operation *depSourceOp, Operation *depSinkOp,
|
||||
FlatAffineConstraints *dependenceConstraints, unsigned loopDepth,
|
||||
bool isBackwardSlice, ComputationSliceState *sliceState) {
|
||||
// Get loop nest surrounding src operation.
|
||||
SmallVector<AffineForOp, 4> srcLoopIVs;
|
||||
getLoopIVs(*srcAccess.opInst, &srcLoopIVs);
|
||||
getLoopIVs(*depSourceOp, &srcLoopIVs);
|
||||
unsigned numSrcLoopIVs = srcLoopIVs.size();
|
||||
|
||||
// Get loop nest surrounding dst operation.
|
||||
SmallVector<AffineForOp, 4> dstLoopIVs;
|
||||
getLoopIVs(*dstAccess.opInst, &dstLoopIVs);
|
||||
getLoopIVs(*depSinkOp, &dstLoopIVs);
|
||||
unsigned numDstLoopIVs = dstLoopIVs.size();
|
||||
if (dstLoopDepth > numDstLoopIVs) {
|
||||
dstAccess.opInst->emitError("invalid destination loop depth");
|
||||
return failure();
|
||||
}
|
||||
|
||||
// Project out dimensions other than those up to 'dstLoopDepth'.
|
||||
dependenceConstraints.projectOut(numSrcLoopIVs + dstLoopDepth,
|
||||
numDstLoopIVs - dstLoopDepth);
|
||||
assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
|
||||
(isBackwardSlice && loopDepth <= numDstLoopIVs));
|
||||
|
||||
// Add src loop IV values to 'sliceState'.
|
||||
dependenceConstraints.getIdValues(0, numSrcLoopIVs, &sliceState->ivs);
|
||||
// Project out dimensions other than those up to 'loopDepth'.
|
||||
unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
|
||||
unsigned num =
|
||||
isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
|
||||
dependenceConstraints->projectOut(pos, num);
|
||||
|
||||
// Add slice loop IV values to 'sliceState'.
|
||||
unsigned offset = isBackwardSlice ? 0 : loopDepth;
|
||||
unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
|
||||
dependenceConstraints->getIdValues(offset, offset + numSliceLoopIVs,
|
||||
&sliceState->ivs);
|
||||
|
||||
// Set up lower/upper bound affine maps for the slice.
|
||||
sliceState->lbs.resize(numSrcLoopIVs, AffineMap());
|
||||
sliceState->ubs.resize(numSrcLoopIVs, AffineMap());
|
||||
sliceState->lbs.resize(numSliceLoopIVs, AffineMap());
|
||||
sliceState->ubs.resize(numSliceLoopIVs, AffineMap());
|
||||
|
||||
// Get bounds for src IVs in terms of dst IVs, symbols, and constants.
|
||||
dependenceConstraints.getSliceBounds(numSrcLoopIVs,
|
||||
srcAccess.opInst->getContext(),
|
||||
&sliceState->lbs, &sliceState->ubs);
|
||||
// Get bounds for slice IVs in terms of other IVs, symbols, and constants.
|
||||
dependenceConstraints->getSliceBounds(offset, numSliceLoopIVs,
|
||||
depSourceOp->getContext(),
|
||||
&sliceState->lbs, &sliceState->ubs);
|
||||
|
||||
// Set up bound operands for the slice's lower and upper bounds.
|
||||
SmallVector<Value *, 4> sliceBoundOperands;
|
||||
dependenceConstraints.getIdValues(
|
||||
numSrcLoopIVs, dependenceConstraints.getNumDimAndSymbolIds(),
|
||||
&sliceBoundOperands);
|
||||
unsigned numDimsAndSymbols = dependenceConstraints->getNumDimAndSymbolIds();
|
||||
for (unsigned i = 0; i < numDimsAndSymbols; ++i) {
|
||||
if (i < offset || i >= offset + numSliceLoopIVs) {
|
||||
sliceBoundOperands.push_back(dependenceConstraints->getIdValue(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Give each bound its own copy of 'sliceBoundOperands' for subsequent
|
||||
// canonicalization.
|
||||
sliceState->lbOperands.resize(numSrcLoopIVs, sliceBoundOperands);
|
||||
sliceState->ubOperands.resize(numSrcLoopIVs, sliceBoundOperands);
|
||||
sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
|
||||
sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
|
||||
|
||||
// Set destination loop nest insertion point to block start at 'dstLoopDepth'.
|
||||
sliceState->insertPoint =
|
||||
isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
|
||||
: std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
|
||||
|
||||
llvm::SmallDenseSet<Value *, 8> sequentialLoops;
|
||||
if (readReadAccesses) {
|
||||
if (isa<LoadOp>(depSourceOp) && isa<LoadOp>(depSinkOp)) {
|
||||
// For read-read access pairs, clear any slice bounds on sequential loops.
|
||||
// Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
|
||||
getSequentialLoops(srcLoopIVs[0], &sequentialLoops);
|
||||
getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0],
|
||||
&sequentialLoops);
|
||||
}
|
||||
// Clear all sliced loop bounds beginning at the first sequential loop, or
|
||||
// first loop with a slice fusion barrier attribute..
|
||||
// TODO(andydavis, bondhugula) Use MemRef read/write regions instead of
|
||||
// using 'kSliceFusionBarrierAttrName'.
|
||||
for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
|
||||
Value *iv = srcLoopIVs[i].getInductionVar();
|
||||
auto getSliceLoop = [&](unsigned i) {
|
||||
return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
|
||||
};
|
||||
for (unsigned i = 0; i < numSliceLoopIVs; ++i) {
|
||||
Value *iv = getSliceLoop(i).getInductionVar();
|
||||
if (sequentialLoops.count(iv) == 0 &&
|
||||
srcLoopIVs[i].getAttr(kSliceFusionBarrierAttrName) == nullptr)
|
||||
getSliceLoop(i).getAttr(kSliceFusionBarrierAttrName) == nullptr)
|
||||
continue;
|
||||
for (unsigned j = i; j < numSrcLoopIVs; ++j) {
|
||||
for (unsigned j = i; j < numSliceLoopIVs; ++j) {
|
||||
sliceState->lbs[j] = AffineMap();
|
||||
sliceState->ubs[j] = AffineMap();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Creates a computation slice of the loop nest surrounding 'srcOpInst',
|
||||
|
|
|
@ -1329,7 +1329,9 @@ static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst,
|
|||
for (unsigned i = maxDstLoopDepth; i >= 1; --i) {
|
||||
// Compute the union of slice bounds of all ops in 'dstLoadOpInsts'.
|
||||
if (failed(mlir::computeSliceUnion({srcOpInst}, dstLoadOpInsts,
|
||||
/*dstLoopDepth=*/i,
|
||||
/*loopDepth=*/i,
|
||||
/*numCommonLoops=*/0,
|
||||
/*isBackwardSlice=*/true,
|
||||
&sliceStates[i - 1]))) {
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "computeSliceUnion failed for loopDepth: " << i << "\n");
|
||||
|
@ -1736,15 +1738,16 @@ public:
|
|||
dstLoadOpInsts, dstStoreOpInsts, &sliceState,
|
||||
&bestDstLoopDepth, maximalFusion))
|
||||
continue;
|
||||
// TODO(andydavis) Remove assert and surrounding code when
|
||||
// canFuseLoops is fully functional.
|
||||
// TODO(andydavis) Remove the following test code when canFuseLoops
|
||||
// is fully functional.
|
||||
mlir::ComputationSliceState sliceUnion;
|
||||
FusionResult result = mlir::canFuseLoops(
|
||||
cast<AffineForOp>(srcNode->op), cast<AffineForOp>(dstNode->op),
|
||||
bestDstLoopDepth, &sliceUnion);
|
||||
assert(result.value == FusionResult::Success);
|
||||
(void)result;
|
||||
|
||||
if (!maximalFusion) {
|
||||
FusionResult result = mlir::canFuseLoops(
|
||||
cast<AffineForOp>(srcNode->op), cast<AffineForOp>(dstNode->op),
|
||||
bestDstLoopDepth, &sliceUnion);
|
||||
assert(result.value == FusionResult::Success);
|
||||
(void)result;
|
||||
}
|
||||
// Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'.
|
||||
auto sliceLoopNest = mlir::insertBackwardComputationSlice(
|
||||
srcStoreOpInst, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState);
|
||||
|
|
|
@ -45,6 +45,11 @@ static llvm::cl::opt<bool> clTestDependenceCheck(
|
|||
llvm::cl::desc("Enable testing of loop fusion dependence check"),
|
||||
llvm::cl::cat(clOptionsCategory));
|
||||
|
||||
static llvm::cl::opt<bool> clTestSliceComputation(
|
||||
"test-loop-fusion-slice-computation",
|
||||
llvm::cl::desc("Enable testing of loop fusion slice computation"),
|
||||
llvm::cl::cat(clOptionsCategory));
|
||||
|
||||
namespace {
|
||||
|
||||
struct TestLoopFusion : public FunctionPass<TestLoopFusion> {
|
||||
|
@ -70,20 +75,74 @@ gatherLoops(Block *block, unsigned currLoopDepth,
|
|||
}
|
||||
}
|
||||
|
||||
// Run fusion dependence check on 'loops[i]' and 'loops[j]' at 'loopDepth'.
|
||||
// Run fusion dependence check on 'loops[i]' and 'loops[j]' at loop depths
|
||||
// in range ['loopDepth' + 1, 'maxLoopDepth'].
|
||||
// Emits a remark on 'loops[i]' if a fusion-preventing dependence exists.
|
||||
static void testDependenceCheck(SmallVector<AffineForOp, 2> &loops, unsigned i,
|
||||
unsigned j, unsigned loopDepth) {
|
||||
unsigned j, unsigned loopDepth,
|
||||
unsigned maxLoopDepth) {
|
||||
AffineForOp srcForOp = loops[i];
|
||||
AffineForOp dstForOp = loops[j];
|
||||
mlir::ComputationSliceState sliceUnion;
|
||||
// TODO(andydavis) Test at deeper loop depths current loop depth + 1.
|
||||
FusionResult result =
|
||||
mlir::canFuseLoops(srcForOp, dstForOp, loopDepth + 1, &sliceUnion);
|
||||
if (result.value == FusionResult::FailBlockDependence) {
|
||||
srcForOp.getOperation()->emitRemark("block-level dependence preventing"
|
||||
" fusion of loop nest ")
|
||||
<< i << " into loop nest " << j << " at depth " << loopDepth;
|
||||
for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
|
||||
FusionResult result =
|
||||
mlir::canFuseLoops(srcForOp, dstForOp, d, &sliceUnion);
|
||||
if (result.value == FusionResult::FailBlockDependence) {
|
||||
srcForOp.getOperation()->emitRemark("block-level dependence preventing"
|
||||
" fusion of loop nest ")
|
||||
<< i << " into loop nest " << j << " at depth " << loopDepth;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the index of 'op' in its block.
|
||||
static unsigned getBlockIndex(Operation &op) {
|
||||
unsigned index = 0;
|
||||
for (auto &opX : *op.getBlock()) {
|
||||
if (&op == &opX)
|
||||
break;
|
||||
++index;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
// Returns a string representation of 'sliceUnion'.
|
||||
static std::string getSliceStr(const mlir::ComputationSliceState &sliceUnion) {
|
||||
std::string result;
|
||||
llvm::raw_string_ostream os(result);
|
||||
// Slice insertion point format [loop-depth, operation-block-index]
|
||||
unsigned ipd = getNestingDepth(*sliceUnion.insertPoint);
|
||||
unsigned ipb = getBlockIndex(*sliceUnion.insertPoint);
|
||||
os << "insert point: (" << std::to_string(ipd) << ", " << std::to_string(ipb)
|
||||
<< ")";
|
||||
assert(sliceUnion.lbs.size() == sliceUnion.ubs.size());
|
||||
os << " loop bounds: ";
|
||||
for (unsigned k = 0, e = sliceUnion.lbs.size(); k < e; ++k) {
|
||||
os << '[';
|
||||
sliceUnion.lbs[k].print(os);
|
||||
os << ", ";
|
||||
sliceUnion.ubs[k].print(os);
|
||||
os << "] ";
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
// Computes fusion slice union on 'loops[i]' and 'loops[j]' at loop depths
|
||||
// in range ['loopDepth' + 1, 'maxLoopDepth'].
|
||||
// Emits a string represention of the slice union as a remark on 'loops[j]'.
|
||||
static void testSliceComputation(SmallVector<AffineForOp, 2> &loops, unsigned i,
|
||||
unsigned j, unsigned loopDepth,
|
||||
unsigned maxLoopDepth) {
|
||||
AffineForOp forOpA = loops[i];
|
||||
AffineForOp forOpB = loops[j];
|
||||
for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
|
||||
mlir::ComputationSliceState sliceUnion;
|
||||
FusionResult result = mlir::canFuseLoops(forOpA, forOpB, d, &sliceUnion);
|
||||
if (result.value == FusionResult::Success) {
|
||||
forOpB.getOperation()->emitRemark("slice (")
|
||||
<< " src loop: " << i << ", dst loop: " << j << ", depth: " << d
|
||||
<< " : " << getSliceStr(sliceUnion) << ")";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,7 +163,9 @@ void TestLoopFusion::runOnFunction() {
|
|||
if (j == k)
|
||||
continue;
|
||||
if (clTestDependenceCheck)
|
||||
testDependenceCheck(loops, j, k, loopDepth);
|
||||
testDependenceCheck(loops, j, k, loopDepth, depthToLoops.size());
|
||||
if (clTestSliceComputation)
|
||||
testSliceComputation(loops, j, k, loopDepth, depthToLoops.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -192,11 +192,7 @@ gatherLoadsAndStores(AffineForOp forOp,
|
|||
return !hasIfOp;
|
||||
}
|
||||
|
||||
// TODO(andydavis) Add support for the following features in subsequent CLs:
|
||||
// *) Compute dependences of unfused src/dst loops.
|
||||
// *) Compute dependences of src/dst loop as if they were fused.
|
||||
// *) Check for fusion preventing dependences (e.g. a dependence which changes
|
||||
// from loop-independent to backward loop-carried after fusion).
|
||||
// TODO(andydavis) Prevent fusion of loop nests with side-effecting operations.
|
||||
FusionResult mlir::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
|
||||
unsigned dstLoopDepth,
|
||||
ComputationSliceState *srcSlice) {
|
||||
|
@ -219,24 +215,35 @@ FusionResult mlir::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
|
|||
return FusionResult::FailBlockDependence;
|
||||
}
|
||||
|
||||
// Gather all load and store ops in 'srcForOp'.
|
||||
SmallVector<Operation *, 4> srcLoadAndStoreOps;
|
||||
if (!gatherLoadsAndStores(srcForOp, srcLoadAndStoreOps)) {
|
||||
// Check if 'srcForOp' precedeces 'dstForOp' in 'block'.
|
||||
bool isSrcForOpBeforeDstForOp =
|
||||
srcForOp.getOperation()->isBeforeInBlock(dstForOp.getOperation());
|
||||
// 'forOpA' executes before 'forOpB' in 'block'.
|
||||
auto forOpA = isSrcForOpBeforeDstForOp ? srcForOp : dstForOp;
|
||||
auto forOpB = isSrcForOpBeforeDstForOp ? dstForOp : srcForOp;
|
||||
|
||||
// Gather all load and store from 'forOpA' which precedes 'forOpB' in 'block'.
|
||||
SmallVector<Operation *, 4> opsA;
|
||||
if (!gatherLoadsAndStores(forOpA, opsA)) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "Fusing loops with affine.if unsupported.\n.");
|
||||
return FusionResult::FailPrecondition;
|
||||
}
|
||||
|
||||
// Gather all load and store ops in 'dstForOp'.
|
||||
SmallVector<Operation *, 4> dstLoadAndStoreOps;
|
||||
if (!gatherLoadsAndStores(dstForOp, dstLoadAndStoreOps)) {
|
||||
// Gather all load and store from 'forOpB' which succeeds 'forOpA' in 'block'.
|
||||
SmallVector<Operation *, 4> opsB;
|
||||
if (!gatherLoadsAndStores(forOpB, opsB)) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "Fusing loops with affine.if unsupported.\n.");
|
||||
return FusionResult::FailPrecondition;
|
||||
}
|
||||
|
||||
// Compute union of computation slices computed from all pairs in
|
||||
// {'srcLoadAndStoreOps', 'dstLoadAndStoreOps'}.
|
||||
if (failed(mlir::computeSliceUnion(srcLoadAndStoreOps, dstLoadAndStoreOps,
|
||||
dstLoopDepth, srcSlice))) {
|
||||
// Calculate the number of common loops surrounding 'srcForOp' and 'dstForOp'.
|
||||
unsigned numCommonLoops = mlir::getNumCommonSurroundingLoops(
|
||||
*srcForOp.getOperation(), *dstForOp.getOperation());
|
||||
|
||||
// Compute union of computation slices computed between all pairs of ops
|
||||
// from 'forOpA' and 'forOpB'.
|
||||
if (failed(mlir::computeSliceUnion(opsA, opsB, dstLoopDepth, numCommonLoops,
|
||||
isSrcForOpBeforeDstForOp, srcSlice))) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "computeSliceUnion failed\n");
|
||||
return FusionResult::FailPrecondition;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,145 @@
|
|||
// RUN: mlir-opt %s -test-loop-fusion -test-loop-fusion-slice-computation -split-input-file -verify | FileCheck %s
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @slice_depth1_loop_nest() {
|
||||
func @slice_depth1_loop_nest() {
|
||||
%0 = alloc() : memref<100xf32>
|
||||
%cst = constant 7.000000e+00 : f32
|
||||
affine.for %i0 = 0 to 16 {
|
||||
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] )}}
|
||||
store %cst, %0[%i0] : memref<100xf32>
|
||||
}
|
||||
affine.for %i1 = 0 to 5 {
|
||||
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] )}}
|
||||
%1 = load %0[%i1] : memref<100xf32>
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Loop %i0 writes to locations [2, 17] and loop %i0 reads from locations [3, 6]
|
||||
// Slice loop bounds should be adjusted such that the load/store are for the
|
||||
// same location.
|
||||
// CHECK-LABEL: func @slice_depth1_loop_nest_with_offsets() {
|
||||
func @slice_depth1_loop_nest_with_offsets() {
|
||||
%0 = alloc() : memref<100xf32>
|
||||
%cst = constant 7.000000e+00 : f32
|
||||
affine.for %i0 = 0 to 16 {
|
||||
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 2) loop bounds: [(d0) -> (d0 + 3), (d0) -> (d0 + 4)] )}}
|
||||
%a0 = affine.apply (d0) -> (d0 + 2)(%i0)
|
||||
store %cst, %0[%a0] : memref<100xf32>
|
||||
}
|
||||
affine.for %i1 = 4 to 8 {
|
||||
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0 - 3), (d0) -> (d0 - 2)] )}}
|
||||
%a1 = affine.apply (d0) -> (d0 - 1)(%i1)
|
||||
%1 = load %0[%a1] : memref<100xf32>
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Slices at loop depth 1 should only slice the loop bounds of the first loop.
|
||||
// Slices at loop detph 2 should slice loop bounds of both loops.
|
||||
// CHECK-LABEL: func @slice_depth2_loop_nest() {
|
||||
func @slice_depth2_loop_nest() {
|
||||
%0 = alloc() : memref<100x100xf32>
|
||||
%cst = constant 7.000000e+00 : f32
|
||||
affine.for %i0 = 0 to 16 {
|
||||
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (8)] )}}
|
||||
// expected-remark@-2 {{slice ( src loop: 1, dst loop: 0, depth: 2 : insert point: (2, 1) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
|
||||
affine.for %i1 = 0 to 16 {
|
||||
store %cst, %0[%i0, %i1] : memref<100x100xf32>
|
||||
}
|
||||
}
|
||||
affine.for %i2 = 0 to 10 {
|
||||
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (8)] )}}
|
||||
// expected-remark@-2 {{slice ( src loop: 0, dst loop: 1, depth: 2 : insert point: (2, 0) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
|
||||
affine.for %i3 = 0 to 8 {
|
||||
%1 = load %0[%i2, %i3] : memref<100x100xf32>
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// The load at depth 1 in loop nest %i2 prevents slicing loop nest %i0 at depths
|
||||
// greater than 1. However, loop nest %i2 can be sliced into loop nest %i0 at
|
||||
// depths 1 and 2 because the dependent store in loop nest %i0 is at depth 2.
|
||||
// CHECK-LABEL: func @slice_depth2_loop_nest_two_loads() {
|
||||
func @slice_depth2_loop_nest_two_loads() {
|
||||
%0 = alloc() : memref<100x100xf32>
|
||||
%c0 = constant 0 : index
|
||||
%cst = constant 7.000000e+00 : f32
|
||||
affine.for %i0 = 0 to 16 {
|
||||
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0)[s0] -> (d0), (d0)[s0] -> (d0 + 1)] [(d0)[s0] -> (0), (d0)[s0] -> (8)] )}}
|
||||
// expected-remark@-2 {{slice ( src loop: 1, dst loop: 0, depth: 2 : insert point: (2, 1) loop bounds: [(d0, d1)[s0] -> (d0), (d0, d1)[s0] -> (d0 + 1)] [(d0, d1)[s0] -> (0), (d0, d1)[s0] -> (8)] )}}
|
||||
affine.for %i1 = 0 to 16 {
|
||||
store %cst, %0[%i0, %i1] : memref<100x100xf32>
|
||||
}
|
||||
}
|
||||
affine.for %i2 = 0 to 10 {
|
||||
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0)[s0] -> (d0), (d0)[s0] -> (d0 + 1)] [(d0)[s0] -> (0), (d0)[s0] -> (8)] )}}
|
||||
affine.for %i3 = 0 to 8 {
|
||||
%1 = load %0[%i2, %i3] : memref<100x100xf32>
|
||||
}
|
||||
%2 = load %0[%i2, %c0] : memref<100x100xf32>
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// The store at depth 1 in loop nest %i0 prevents slicing loop nest %i2 at
|
||||
// depths greater than 1 into loop nest %i0. However, loop nest %i0 can be
|
||||
// sliced into loop nest %i2 at depths 1 and 2 because the dependent load in
|
||||
// loop nest %i2 is at depth 2.
|
||||
// CHECK-LABEL: func @slice_depth2_loop_nest_two_stores() {
|
||||
func @slice_depth2_loop_nest_two_stores() {
|
||||
%0 = alloc() : memref<100x100xf32>
|
||||
%c0 = constant 0 : index
|
||||
%cst = constant 7.000000e+00 : f32
|
||||
affine.for %i0 = 0 to 16 {
|
||||
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 2) loop bounds: [(d0)[s0] -> (d0), (d0)[s0] -> (d0 + 1)] [(d0)[s0] -> (0), (d0)[s0] -> (8)] )}}
|
||||
affine.for %i1 = 0 to 16 {
|
||||
store %cst, %0[%i0, %i1] : memref<100x100xf32>
|
||||
}
|
||||
store %cst, %0[%i0, %c0] : memref<100x100xf32>
|
||||
}
|
||||
affine.for %i2 = 0 to 10 {
|
||||
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0)[s0] -> (d0), (d0)[s0] -> (d0 + 1)] [(d0)[s0] -> (0), (d0)[s0] -> (16)] )}}
|
||||
// expected-remark@-2 {{slice ( src loop: 0, dst loop: 1, depth: 2 : insert point: (2, 0) loop bounds: [(d0, d1)[s0] -> (d0), (d0, d1)[s0] -> (d0 + 1)] [(d0, d1)[s0] -> (0), (d0, d1)[s0] -> (16)] )}}
|
||||
affine.for %i3 = 0 to 8 {
|
||||
%1 = load %0[%i2, %i3] : memref<100x100xf32>
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Test loop nest which has a smaller outer trip count than its inner loop.
|
||||
// CHECK-LABEL: func @slice_loop_nest_with_smaller_outer_trip_count() {
|
||||
func @slice_loop_nest_with_smaller_outer_trip_count() {
|
||||
%0 = alloc() : memref<100x100xf32>
|
||||
%c0 = constant 0 : index
|
||||
%cst = constant 7.000000e+00 : f32
|
||||
affine.for %i0 = 0 to 16 {
|
||||
// expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (10)] )}}
|
||||
// expected-remark@-2 {{slice ( src loop: 1, dst loop: 0, depth: 2 : insert point: (2, 1) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
|
||||
affine.for %i1 = 0 to 16 {
|
||||
store %cst, %0[%i0, %i1] : memref<100x100xf32>
|
||||
}
|
||||
}
|
||||
affine.for %i2 = 0 to 8 {
|
||||
// expected-remark@-1 {{slice ( src loop: 0, dst loop: 1, depth: 1 : insert point: (1, 0) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (10)] )}}
|
||||
// expected-remark@-2 {{slice ( src loop: 0, dst loop: 1, depth: 2 : insert point: (2, 0) loop bounds: [(d0, d1) -> (d0), (d0, d1) -> (d0 + 1)] [(d0, d1) -> (d1), (d0, d1) -> (d1 + 1)] )}}
|
||||
affine.for %i3 = 0 to 10 {
|
||||
%1 = load %0[%i2, %i3] : memref<100x100xf32>
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
Loading…
Reference in New Issue