2018-12-30 11:16:55 +08:00
|
|
|
//===- MemRefDataFlowOpt.cpp - MemRef DataFlow Optimization pass ------ -*-===//
|
|
|
|
//
|
2020-01-26 11:58:30 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
2019-12-24 01:35:36 +08:00
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2018-12-30 11:16:55 +08:00
|
|
|
//
|
2019-12-24 01:35:36 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2018-12-30 11:16:55 +08:00
|
|
|
//
|
|
|
|
// This file implements a pass to forward memref stores to loads, thereby
|
|
|
|
// potentially getting rid of intermediate memref's entirely.
|
2020-07-07 16:35:23 +08:00
|
|
|
// TODO: In the future, similar techniques could be used to eliminate
|
2018-12-30 11:16:55 +08:00
|
|
|
// dead memref store's and perform more complex forwarding when support for
|
2019-03-26 01:14:34 +08:00
|
|
|
// SSA scalars live out of 'affine.for'/'affine.if' statements is available.
|
2018-12-30 11:16:55 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2020-04-08 04:58:12 +08:00
|
|
|
#include "PassDetail.h"
|
2018-12-30 11:16:55 +08:00
|
|
|
#include "mlir/Analysis/AffineAnalysis.h"
|
|
|
|
#include "mlir/Analysis/Utils.h"
|
2020-03-21 05:18:47 +08:00
|
|
|
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
2020-02-22 03:54:49 +08:00
|
|
|
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
2020-05-01 04:09:13 +08:00
|
|
|
#include "mlir/IR/Dominance.h"
|
2018-12-30 11:16:55 +08:00
|
|
|
#include "mlir/Transforms/Passes.h"
|
2018-12-31 04:51:17 +08:00
|
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
2018-12-30 11:16:55 +08:00
|
|
|
#include <algorithm>
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "memref-dataflow-opt"
|
|
|
|
|
|
|
|
using namespace mlir;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
// The store to load forwarding relies on three conditions:
|
|
|
|
//
|
2019-09-22 01:08:32 +08:00
|
|
|
// 1) they need to have mathematically equivalent affine access functions
|
|
|
|
// (checked after full composition of load/store operands); this implies that
|
|
|
|
// they access the same single memref element for all iterations of the common
|
|
|
|
// surrounding loop,
|
2018-12-30 11:16:55 +08:00
|
|
|
//
|
|
|
|
// 2) the store op should dominate the load op,
|
|
|
|
//
|
2019-09-22 01:08:32 +08:00
|
|
|
// 3) among all op's that satisfy both (1) and (2), the one that postdominates
|
|
|
|
// all store op's that have a dependence into the load, is provably the last
|
|
|
|
// writer to the particular memref location being loaded at the load op, and its
|
|
|
|
// store value can be forwarded to the load. Note that the only dependences
|
2019-10-20 15:11:03 +08:00
|
|
|
// that are to be considered are those that are satisfied at the block* of the
|
2019-09-22 01:08:32 +08:00
|
|
|
// innermost common surrounding loop of the <store, load> being considered.
|
2019-01-17 00:29:16 +08:00
|
|
|
//
|
|
|
|
// (* A dependence being satisfied at a block: a dependence that is satisfied by
|
2019-03-28 05:02:02 +08:00
|
|
|
// virtue of the destination operation appearing textually / lexically after
|
|
|
|
// the source operation within the body of a 'affine.for' operation; thus, a
|
2019-01-17 00:29:16 +08:00
|
|
|
// dependence is always either satisfied by a loop or by a block).
|
|
|
|
//
|
2018-12-30 11:16:55 +08:00
|
|
|
// The above conditions are simple to check, sufficient, and powerful for most
|
2019-09-22 01:08:32 +08:00
|
|
|
// cases in practice - they are sufficient, but not necessary --- since they
|
|
|
|
// don't reason about loops that are guaranteed to execute at least once or
|
|
|
|
// multiple sources to forward from.
|
2018-12-30 11:16:55 +08:00
|
|
|
//
|
2020-07-07 16:35:23 +08:00
|
|
|
// TODO: more forwarding can be done when support for
|
2018-12-30 11:16:55 +08:00
|
|
|
// loop/conditional live-out SSA values is available.
|
2020-07-07 16:35:23 +08:00
|
|
|
// TODO: do general dead store elimination for memref's. This pass
|
2018-12-30 11:16:55 +08:00
|
|
|
// currently only eliminates the stores only if no other loads/uses (other
|
|
|
|
// than dealloc) remain.
|
|
|
|
//
|
2020-04-08 04:58:12 +08:00
|
|
|
struct MemRefDataFlowOpt : public MemRefDataFlowOptBase<MemRefDataFlowOpt> {
|
2019-03-01 06:50:42 +08:00
|
|
|
void runOnFunction() override;
|
2018-12-30 11:16:55 +08:00
|
|
|
|
2020-07-29 01:37:16 +08:00
|
|
|
void forwardStoreToLoad(AffineReadOpInterface loadOp);
|
2018-12-30 11:16:55 +08:00
|
|
|
|
|
|
|
// A list of memref's that are potentially dead / could be eliminated.
|
2019-12-24 06:45:01 +08:00
|
|
|
SmallPtrSet<Value, 4> memrefsToErase;
|
2018-12-31 12:38:04 +08:00
|
|
|
// Load op's whose results were replaced by those forwarded from stores.
|
2019-09-22 01:08:32 +08:00
|
|
|
SmallVector<Operation *, 8> loadOpsToErase;
|
2018-12-31 12:38:04 +08:00
|
|
|
|
|
|
|
DominanceInfo *domInfo = nullptr;
|
|
|
|
PostDominanceInfo *postDomInfo = nullptr;
|
2018-12-30 11:16:55 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
/// Creates a pass to perform optimizations relying on memref dataflow such as
|
|
|
|
/// store to load forwarding, elimination of dead stores, and dead allocs.
|
2020-04-08 04:56:16 +08:00
|
|
|
std::unique_ptr<OperationPass<FuncOp>> mlir::createMemRefDataFlowOptPass() {
|
2019-08-18 02:05:35 +08:00
|
|
|
return std::make_unique<MemRefDataFlowOpt>();
|
2018-12-30 11:16:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// This is a straightforward implementation not optimized for speed. Optimize
|
2019-09-22 01:08:32 +08:00
|
|
|
// if needed.
|
2020-07-29 01:37:16 +08:00
|
|
|
void MemRefDataFlowOpt::forwardStoreToLoad(AffineReadOpInterface loadOp) {
|
2020-04-13 12:48:10 +08:00
|
|
|
// First pass over the use list to get the minimum number of surrounding
|
2018-12-30 11:16:55 +08:00
|
|
|
// loops common between the load op and the store op, with min taken across
|
|
|
|
// all store ops.
|
2019-03-28 05:02:02 +08:00
|
|
|
SmallVector<Operation *, 8> storeOps;
|
2020-04-13 12:48:10 +08:00
|
|
|
unsigned minSurroundingLoops = getNestingDepth(loadOp);
|
2020-01-12 00:54:04 +08:00
|
|
|
for (auto *user : loadOp.getMemRef().getUsers()) {
|
2020-07-29 01:37:16 +08:00
|
|
|
auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
|
2018-12-30 11:16:55 +08:00
|
|
|
if (!storeOp)
|
|
|
|
continue;
|
2020-04-13 12:48:10 +08:00
|
|
|
unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp);
|
2018-12-30 11:16:55 +08:00
|
|
|
minSurroundingLoops = std::min(nsLoops, minSurroundingLoops);
|
2020-04-13 12:48:10 +08:00
|
|
|
storeOps.push_back(storeOp);
|
2018-12-30 11:16:55 +08:00
|
|
|
}
|
|
|
|
|
2019-09-22 01:08:32 +08:00
|
|
|
// The list of store op candidates for forwarding that satisfy conditions
|
|
|
|
// (1) and (2) above - they will be filtered later when checking (3).
|
2019-03-28 05:02:02 +08:00
|
|
|
SmallVector<Operation *, 8> fwdingCandidates;
|
2019-09-22 01:08:32 +08:00
|
|
|
|
2018-12-30 11:16:55 +08:00
|
|
|
// Store ops that have a dependence into the load (even if they aren't
|
2019-01-17 00:29:16 +08:00
|
|
|
// forwarding candidates). Each forwarding candidate will be checked for a
|
2018-12-30 11:16:55 +08:00
|
|
|
// post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores.
|
2019-03-28 05:02:02 +08:00
|
|
|
SmallVector<Operation *, 8> depSrcStores;
|
2019-09-22 01:08:32 +08:00
|
|
|
|
2020-04-13 12:48:10 +08:00
|
|
|
for (auto *storeOp : storeOps) {
|
|
|
|
MemRefAccess srcAccess(storeOp);
|
|
|
|
MemRefAccess destAccess(loadOp);
|
2019-09-22 01:08:32 +08:00
|
|
|
// Find stores that may be reaching the load.
|
2018-12-30 11:16:55 +08:00
|
|
|
FlatAffineConstraints dependenceConstraints;
|
2020-04-13 12:48:10 +08:00
|
|
|
unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp);
|
2019-09-22 01:08:32 +08:00
|
|
|
unsigned d;
|
2018-12-30 11:16:55 +08:00
|
|
|
// Dependences at loop depth <= minSurroundingLoops do NOT matter.
|
2019-09-22 01:08:32 +08:00
|
|
|
for (d = nsLoops + 1; d > minSurroundingLoops; d--) {
|
2019-06-11 01:50:08 +08:00
|
|
|
DependenceResult result = checkMemrefAccessDependence(
|
|
|
|
srcAccess, destAccess, d, &dependenceConstraints,
|
|
|
|
/*dependenceComponents=*/nullptr);
|
2019-09-22 01:08:32 +08:00
|
|
|
if (hasDependence(result))
|
2018-12-30 11:16:55 +08:00
|
|
|
break;
|
2019-09-22 01:08:32 +08:00
|
|
|
}
|
|
|
|
if (d == minSurroundingLoops)
|
|
|
|
continue;
|
2018-12-30 11:16:55 +08:00
|
|
|
|
2019-09-22 01:08:32 +08:00
|
|
|
// Stores that *may* be reaching the load.
|
2020-04-13 12:48:10 +08:00
|
|
|
depSrcStores.push_back(storeOp);
|
2019-09-22 01:08:32 +08:00
|
|
|
|
|
|
|
// 1. Check if the store and the load have mathematically equivalent
|
|
|
|
// affine access functions; this implies that they statically refer to the
|
|
|
|
// same single memref element. As an example this filters out cases like:
|
|
|
|
// store %A[%i0 + 1]
|
|
|
|
// load %A[%i0]
|
|
|
|
// store %A[%M]
|
|
|
|
// load %A[%N]
|
|
|
|
// Use the AffineValueMap difference based memref access equality checking.
|
|
|
|
if (srcAccess != destAccess)
|
|
|
|
continue;
|
2018-12-30 11:16:55 +08:00
|
|
|
|
2019-09-22 01:08:32 +08:00
|
|
|
// 2. The store has to dominate the load op to be candidate.
|
2020-04-13 12:48:10 +08:00
|
|
|
if (!domInfo->dominates(storeOp, loadOp))
|
2019-09-22 01:08:32 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// We now have a candidate for forwarding.
|
2020-04-13 12:48:10 +08:00
|
|
|
fwdingCandidates.push_back(storeOp);
|
2018-12-30 11:16:55 +08:00
|
|
|
}
|
|
|
|
|
2019-09-22 01:08:32 +08:00
|
|
|
// 3. Of all the store op's that meet the above criteria, the store that
|
|
|
|
// postdominates all 'depSrcStores' (if one exists) is the unique store
|
|
|
|
// providing the value to the load, i.e., provably the last writer to that
|
|
|
|
// memref loc.
|
|
|
|
// Note: this can be implemented in a cleaner way with postdominator tree
|
2018-12-30 11:16:55 +08:00
|
|
|
// traversals. Consider this for the future if needed.
|
2019-09-22 01:08:32 +08:00
|
|
|
Operation *lastWriteStoreOp = nullptr;
|
2020-04-13 12:48:10 +08:00
|
|
|
for (auto *storeOp : fwdingCandidates) {
|
2019-03-28 05:02:02 +08:00
|
|
|
if (llvm::all_of(depSrcStores, [&](Operation *depStore) {
|
2020-04-13 12:48:10 +08:00
|
|
|
return postDomInfo->postDominates(storeOp, depStore);
|
2018-12-30 11:16:55 +08:00
|
|
|
})) {
|
2020-04-13 12:48:10 +08:00
|
|
|
lastWriteStoreOp = storeOp;
|
2018-12-30 11:16:55 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!lastWriteStoreOp)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Perform the actual store to load forwarding.
|
2020-07-29 01:37:16 +08:00
|
|
|
Value storeVal =
|
2021-02-18 19:49:52 +08:00
|
|
|
cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
|
2020-07-29 01:37:16 +08:00
|
|
|
loadOp.getValue().replaceAllUsesWith(storeVal);
|
2018-12-30 11:16:55 +08:00
|
|
|
// Record the memref for a later sweep to optimize away.
|
2019-03-26 04:02:06 +08:00
|
|
|
memrefsToErase.insert(loadOp.getMemRef());
|
2018-12-31 12:38:04 +08:00
|
|
|
// Record this to erase later.
|
2020-04-13 12:48:10 +08:00
|
|
|
loadOpsToErase.push_back(loadOp);
|
2018-12-30 11:16:55 +08:00
|
|
|
}
|
|
|
|
|
2019-03-01 06:50:42 +08:00
|
|
|
void MemRefDataFlowOpt::runOnFunction() {
|
2018-12-31 15:10:35 +08:00
|
|
|
// Only supports single block functions at the moment.
|
2019-07-10 07:17:55 +08:00
|
|
|
FuncOp f = getFunction();
|
2020-06-18 04:20:36 +08:00
|
|
|
if (!llvm::hasSingleElement(f)) {
|
Implement the initial AnalysisManagement infrastructure, with the introduction of the FunctionAnalysisManager and ModuleAnalysisManager classes. These classes provide analysis computation, caching, and invalidation for a specific IR unit. The invalidation is currently limited to either all or none, i.e. you cannot yet preserve specific analyses.
An analysis can be any class, but it must provide the following:
* A constructor for a given IR unit.
struct MyAnalysis {
// Compute this analysis with the provided module.
MyAnalysis(Module *module);
};
Analyses can be accessed from a Pass by calling either the 'getAnalysisResult<AnalysisT>' or 'getCachedAnalysisResult<AnalysisT>' methods. A FunctionPass may query for a cached analysis on the parent module with 'getCachedModuleAnalysisResult'. Similary, a ModulePass may query an analysis, it doesn't need to be cached, on a child function with 'getFunctionAnalysisResult'.
By default, when running a pass all cached analyses are set to be invalidated. If no transformation was performed, a pass can use the method 'markAllAnalysesPreserved' to preserve all analysis results. As noted above, preserving specific analyses is not yet supported.
PiperOrigin-RevId: 236505642
2019-03-03 13:46:58 +08:00
|
|
|
markAllAnalysesPreserved();
|
2019-03-01 06:50:42 +08:00
|
|
|
return;
|
Implement the initial AnalysisManagement infrastructure, with the introduction of the FunctionAnalysisManager and ModuleAnalysisManager classes. These classes provide analysis computation, caching, and invalidation for a specific IR unit. The invalidation is currently limited to either all or none, i.e. you cannot yet preserve specific analyses.
An analysis can be any class, but it must provide the following:
* A constructor for a given IR unit.
struct MyAnalysis {
// Compute this analysis with the provided module.
MyAnalysis(Module *module);
};
Analyses can be accessed from a Pass by calling either the 'getAnalysisResult<AnalysisT>' or 'getCachedAnalysisResult<AnalysisT>' methods. A FunctionPass may query for a cached analysis on the parent module with 'getCachedModuleAnalysisResult'. Similary, a ModulePass may query an analysis, it doesn't need to be cached, on a child function with 'getFunctionAnalysisResult'.
By default, when running a pass all cached analyses are set to be invalidated. If no transformation was performed, a pass can use the method 'markAllAnalysesPreserved' to preserve all analysis results. As noted above, preserving specific analyses is not yet supported.
PiperOrigin-RevId: 236505642
2019-03-03 13:46:58 +08:00
|
|
|
}
|
2018-12-31 15:10:35 +08:00
|
|
|
|
2019-03-07 04:03:14 +08:00
|
|
|
domInfo = &getAnalysis<DominanceInfo>();
|
|
|
|
postDomInfo = &getAnalysis<PostDominanceInfo>();
|
2018-12-31 12:38:04 +08:00
|
|
|
|
|
|
|
loadOpsToErase.clear();
|
2018-12-30 11:16:55 +08:00
|
|
|
memrefsToErase.clear();
|
|
|
|
|
2020-04-13 12:48:10 +08:00
|
|
|
// Walk all load's and perform store to load forwarding.
|
2020-07-29 01:37:16 +08:00
|
|
|
f.walk([&](AffineReadOpInterface loadOp) { forwardStoreToLoad(loadOp); });
|
2018-12-30 11:16:55 +08:00
|
|
|
|
2018-12-31 12:38:04 +08:00
|
|
|
// Erase all load op's whose results were replaced with store fwd'ed ones.
|
2020-04-13 12:48:10 +08:00
|
|
|
for (auto *loadOp : loadOpsToErase)
|
2018-12-31 12:38:04 +08:00
|
|
|
loadOp->erase();
|
|
|
|
|
2018-12-30 11:16:55 +08:00
|
|
|
// Check if the store fwd'ed memrefs are now left with only stores and can
|
2019-10-04 19:37:14 +08:00
|
|
|
// thus be completely deleted. Note: the canonicalize pass should be able
|
2018-12-30 11:16:55 +08:00
|
|
|
// to do this as well, but we'll do it here since we collected these anyway.
|
2019-12-23 13:59:55 +08:00
|
|
|
for (auto memref : memrefsToErase) {
|
2018-12-30 11:16:55 +08:00
|
|
|
// If the memref hasn't been alloc'ed in this function, skip.
|
2020-04-13 12:48:10 +08:00
|
|
|
Operation *defOp = memref.getDefiningOp();
|
2021-02-18 19:49:52 +08:00
|
|
|
if (!defOp || !isa<AllocOp>(defOp))
|
2020-07-07 16:35:23 +08:00
|
|
|
// TODO: if the memref was returned by a 'call' operation, we
|
2019-01-17 00:29:16 +08:00
|
|
|
// could still erase it if the call had no side-effects.
|
2018-12-30 11:16:55 +08:00
|
|
|
continue;
|
2020-04-13 12:48:10 +08:00
|
|
|
if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
|
2021-02-18 19:49:52 +08:00
|
|
|
return !isa<AffineWriteOpInterface, DeallocOp>(ownerOp);
|
2019-05-19 02:09:07 +08:00
|
|
|
}))
|
2018-12-30 11:16:55 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// Erase all stores, the dealloc, and the alloc on the memref.
|
2020-01-12 00:54:04 +08:00
|
|
|
for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
|
2019-05-19 02:09:07 +08:00
|
|
|
user->erase();
|
2020-04-13 12:48:10 +08:00
|
|
|
defOp->erase();
|
2018-12-30 11:16:55 +08:00
|
|
|
}
|
|
|
|
}
|