forked from OSchip/llvm-project
[mlir][linalg] Hoist padding simplifications (NFC).
Remove unused members and store the indexing and packing loops in SmallVector. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D113398
This commit is contained in:
parent
e201232ece
commit
969243a007
|
@ -17,7 +17,7 @@ class Value;
|
|||
namespace linalg {
|
||||
class PadTensorOp;
|
||||
|
||||
/// Mechanically hoist padding operations on tensors by `nLoops` into a new,
|
||||
/// Mechanically hoist padding operations on tensors by `numLoops` into a new,
|
||||
/// generally larger tensor. This achieves packing of multiple padding ops into
|
||||
/// a larger tensor. On success, `padTensorOp` is replaced by the cloned version
|
||||
/// in the packing loop so the caller can continue reasoning about the padding
|
||||
|
|
|
@ -54,7 +54,7 @@ using namespace mlir::linalg;
|
|||
/// 7. There is no enclosing scf::ForOp that indexes the padded data.
|
||||
/// Other cases succeed and will trigger hoisting of the pad op.
|
||||
struct HoistingAnalysis {
|
||||
HoistingAnalysis(PadTensorOp padTensorOp, int nLevels);
|
||||
HoistingAnalysis(PadTensorOp padTensorOp, int numLoops);
|
||||
|
||||
bool isValid() { return valid; }
|
||||
|
||||
|
@ -62,12 +62,6 @@ struct HoistingAnalysis {
|
|||
/// `backwardSlice`.
|
||||
FailureOr<SmallVector<Value>> getPackedTensorSizes(ImplicitLocOpBuilder &b);
|
||||
|
||||
/// The padTensorOp that needs to be hoisted.
|
||||
PadTensorOp padTensorOp;
|
||||
|
||||
/// The maximum number of immediately enclosing scf::ForOp to hoist over.
|
||||
int nLevels;
|
||||
|
||||
/// The outermost loop, determined by `nLevels` above which `padTensorOp` will
|
||||
/// be hoisted.
|
||||
scf::ForOp outermostEnclosingForOp;
|
||||
|
@ -81,9 +75,7 @@ struct HoistingAnalysis {
|
|||
/// 2. whose induction variable is used, directly or indirectly, in the
|
||||
/// computation of `padTensorOp`.
|
||||
/// The span of these loops determines the footprint of the packed tensor.
|
||||
/// SmallSetVector<scf::ForOp> packingLoops;
|
||||
SetVector<scf::ForOp, SmallVector<scf::ForOp>, DenseSet<Operation *>>
|
||||
packingLoops;
|
||||
SmallVector<scf::ForOp> packingLoops;
|
||||
|
||||
private:
|
||||
/// Returns the loops in `backwardSlice` used to index the padded data. The
|
||||
|
@ -103,8 +95,8 @@ private:
|
|||
/// %padded_slice = linalg.pad_tensor %slice
|
||||
/// ```
|
||||
/// getIndexingLoops(%padded_slice, %slice) returns [scf.for %i, scf.for %j]
|
||||
SetVector<Operation *> getIndexingLoops(PadTensorOp padTensorOp,
|
||||
tensor::ExtractSliceOp sliceOp);
|
||||
SmallVector<scf::ForOp> getIndexingLoops(PadTensorOp padTensorOp,
|
||||
tensor::ExtractSliceOp sliceOp);
|
||||
|
||||
/// Encodes whether the analysis is valid and hoisting can proceed.
|
||||
bool valid;
|
||||
|
@ -148,10 +140,8 @@ getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels,
|
|||
}
|
||||
}
|
||||
|
||||
HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int nLevels)
|
||||
: padTensorOp(padTensorOp), nLevels(nLevels), valid(false) {
|
||||
AsmState state(padTensorOp->getParentOfType<mlir::FuncOp>());
|
||||
(void)state;
|
||||
HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) {
|
||||
valid = false;
|
||||
|
||||
// Bail on any use that isn't an input of a Linalg op.
|
||||
// Hoisting of inplace updates happens after vectorization.
|
||||
|
@ -160,7 +150,7 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int nLevels)
|
|||
|
||||
// Get at most nLevels of immediately enclosing loops.
|
||||
SmallVector<scf::ForOp> reverseEnclosingLoops;
|
||||
getAtMostNEnclosingLoops(padTensorOp, nLevels, reverseEnclosingLoops);
|
||||
getAtMostNEnclosingLoops(padTensorOp, numLoops, reverseEnclosingLoops);
|
||||
if (reverseEnclosingLoops.empty()) {
|
||||
LLVM_DEBUG(DBGS() << "No immediately enclosing loop -> skip\n");
|
||||
return;
|
||||
|
@ -216,19 +206,20 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int nLevels)
|
|||
}
|
||||
|
||||
// Search the loops found in `backwardSlice` used to index the padded data.
|
||||
SetVector<Operation *> indexingLoops = getIndexingLoops(padTensorOp, sliceOp);
|
||||
SmallVector<scf::ForOp> indexingLoops =
|
||||
getIndexingLoops(padTensorOp, sliceOp);
|
||||
|
||||
// Add only the loops part of `indexingLoops` to the packing loops. All other
|
||||
// loops are not used to index the padded data and consequently access the
|
||||
// same data in every loop iteration. Adding them to the packing loops would
|
||||
// increase the cache footprint of the packed data by storing the same data
|
||||
// multiple times.
|
||||
for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops)) {
|
||||
if (indexingLoops.contains(forOp))
|
||||
packingLoops.insert(forOp);
|
||||
}
|
||||
assert(indexingLoops.size() == packingLoops.size() &&
|
||||
for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops))
|
||||
if (!indexingLoops.empty() && indexingLoops.back() == forOp)
|
||||
packingLoops.push_back(indexingLoops.pop_back_val());
|
||||
assert(indexingLoops.empty() &&
|
||||
"expect the all indexing loops are enclosing loops");
|
||||
|
||||
if (packingLoops.empty()) {
|
||||
LLVM_DEBUG(DBGS() << "Cannot find a packing loop -> skip\n");
|
||||
return;
|
||||
|
@ -247,7 +238,7 @@ static void addIndexOperandsToIndexEdges(Operation *operation,
|
|||
indexEdges.insert(operand);
|
||||
}
|
||||
|
||||
SetVector<Operation *>
|
||||
SmallVector<scf::ForOp>
|
||||
HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp,
|
||||
tensor::ExtractSliceOp sliceOp) {
|
||||
// Set of all values used for index computation.
|
||||
|
@ -272,7 +263,7 @@ HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp,
|
|||
// After iterating `backwardSlice` we obtain:
|
||||
// indexEdges = [%i, %j, %ubi, %ubj]
|
||||
// indexingLoops = [scf.for %i, scf.for %j]
|
||||
SetVector<Operation *> indexingLoops;
|
||||
SmallVector<scf::ForOp> indexingLoops;
|
||||
for (Operation *op : llvm::reverse(backwardSlice)) {
|
||||
// Add the index operands of `padTensorOp` and `sliceOp` to start the
|
||||
// exploration of the index computation.
|
||||
|
@ -286,7 +277,7 @@ HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp,
|
|||
if (auto forOp = dyn_cast<scf::ForOp>(op)) {
|
||||
if (indexEdges.contains(forOp.getInductionVar())) {
|
||||
addIndexOperandsToIndexEdges(op, indexEdges);
|
||||
indexingLoops.insert(forOp);
|
||||
indexingLoops.push_back(forOp);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -442,7 +433,7 @@ HoistingAnalysis::getPackedTensorSizes(ImplicitLocOpBuilder &b) {
|
|||
|
||||
// Iteratively try to fold the upper bounds into the constraints set.
|
||||
if (failed(foldUpperBoundsIntoConstraintsSet(
|
||||
constraints, outermostEnclosingForOp, packingLoops.getArrayRef())))
|
||||
constraints, outermostEnclosingForOp, packingLoops)))
|
||||
return failure();
|
||||
|
||||
int nPackedLoops = packingLoops.size();
|
||||
|
@ -577,7 +568,7 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
|
|||
auto forOp = dyn_cast<scf::ForOp>(op);
|
||||
assert(forOp && "Expected scf::ForOp when hoisting pad ops");
|
||||
// Unused loop, just skip it.
|
||||
if (!analysis.packingLoops.contains(forOp))
|
||||
if (!llvm::is_contained(analysis.packingLoops, forOp))
|
||||
continue;
|
||||
|
||||
auto clonedForOp =
|
||||
|
|
Loading…
Reference in New Issue