forked from OSchip/llvm-project
270 lines
11 KiB
C++
270 lines
11 KiB
C++
//===- MemRefDataFlowOpt.cpp - MemRef DataFlow Optimization pass ------ -*-===//
|
|
//
|
|
// Copyright 2019 The MLIR Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
// =============================================================================
|
|
//
|
|
// This file implements a pass to forward memref stores to loads, thereby
|
|
// potentially getting rid of intermediate memref's entirely.
|
|
// TODO(mlir-team): In the future, similar techniques could be used to eliminate
|
|
// dead memref store's and perform more complex forwarding when support for
|
|
// SSA scalars live out of 'for'/'affine.if' statements is available.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Analysis/AffineAnalysis.h"
|
|
#include "mlir/Analysis/Dominance.h"
|
|
#include "mlir/Analysis/Utils.h"
|
|
#include "mlir/Pass.h"
|
|
#include "mlir/StandardOps/StandardOps.h"
|
|
#include "mlir/Transforms/Passes.h"
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
#include <algorithm>
|
|
|
|
#define DEBUG_TYPE "memref-dataflow-opt"
|
|
|
|
using namespace mlir;
|
|
|
|
namespace {
|
|
|
|
// The store to load forwarding relies on three conditions:
|
|
//
|
|
// 1) there has to be a dependence from the store to the load satisfied at the
|
|
// block* immediately within the innermost loop enclosing both the load op and
|
|
// the store op,
|
|
//
|
|
// 2) the store op should dominate the load op,
|
|
//
|
|
// 3) among all candidate store op's that satisfy (1) and (2), if there exists a
|
|
// store op that postdominates all those that satisfy (1), such a store op is
|
|
// provably the last writer to the particular memref location being loaded from
|
|
// by the load op, and its store value can be forwarded to the load.
|
|
//
|
|
// 4) the load should touch a single location in the memref for a given
|
|
// iteration of the innermost loop enclosing both the store op and the load op.
|
|
//
|
|
// (* A dependence being satisfied at a block: a dependence that is satisfied by
|
|
// virtue of the destination instruction appearing textually / lexically after
|
|
// the source instruction within the body of a 'for' instruction; thus, a
|
|
// dependence is always either satisfied by a loop or by a block).
|
|
//
|
|
// The above conditions are simple to check, sufficient, and powerful for most
|
|
// cases in practice - condition (1) and (3) are precise and necessary, while
|
|
// condition (2) is a sufficient one but not necessary (since it doesn't reason
|
|
// about loops that are guaranteed to execute at least once).
|
|
//
|
|
// TODO(mlir-team): more forwarding can be done when support for
|
|
// loop/conditional live-out SSA values is available.
|
|
// TODO(mlir-team): do general dead store elimination for memref's. This pass
|
|
// currently only eliminates the stores only if no other loads/uses (other
|
|
// than dealloc) remain.
|
|
//
|
|
struct MemRefDataFlowOpt : public FunctionPass {
|
|
explicit MemRefDataFlowOpt() : FunctionPass(&MemRefDataFlowOpt::passID) {}
|
|
|
|
PassResult runOnFunction(Function *f) override;
|
|
|
|
void forwardStoreToLoad(OpPointer<LoadOp> loadOp);
|
|
|
|
// A list of memref's that are potentially dead / could be eliminated.
|
|
SmallPtrSet<Value *, 4> memrefsToErase;
|
|
// Load op's whose results were replaced by those forwarded from stores.
|
|
std::vector<Instruction *> loadOpsToErase;
|
|
|
|
DominanceInfo *domInfo = nullptr;
|
|
PostDominanceInfo *postDomInfo = nullptr;
|
|
|
|
static char passID;
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
char MemRefDataFlowOpt::passID = 0;
|
|
|
|
/// Creates a pass to perform optimizations relying on memref dataflow such as
|
|
/// store to load forwarding, elimination of dead stores, and dead allocs.
|
|
FunctionPass *mlir::createMemRefDataFlowOptPass() {
|
|
return new MemRefDataFlowOpt();
|
|
}
|
|
|
|
// This is a straightforward implementation not optimized for speed. Optimize
|
|
// this in the future if needed.
|
|
void MemRefDataFlowOpt::forwardStoreToLoad(OpPointer<LoadOp> loadOp) {
|
|
Instruction *lastWriteStoreOp = nullptr;
|
|
Instruction *loadOpInst = loadOp->getInstruction();
|
|
|
|
// First pass over the use list to get minimum number of surrounding
|
|
// loops common between the load op and the store op, with min taken across
|
|
// all store ops.
|
|
SmallVector<Instruction *, 8> storeOps;
|
|
unsigned minSurroundingLoops = getNestingDepth(*loadOpInst);
|
|
for (InstOperand &use : loadOp->getMemRef()->getUses()) {
|
|
auto storeOp = use.getOwner()->dyn_cast<StoreOp>();
|
|
if (!storeOp)
|
|
continue;
|
|
auto *storeOpInst = storeOp->getInstruction();
|
|
unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst);
|
|
minSurroundingLoops = std::min(nsLoops, minSurroundingLoops);
|
|
storeOps.push_back(storeOpInst);
|
|
}
|
|
|
|
unsigned loadOpDepth = getNestingDepth(*loadOpInst);
|
|
|
|
// 1. Check if there is a dependence satisfied at depth equal to the depth
|
|
// of the loop body of the innermost common surrounding loop of the storeOp
|
|
// and loadOp.
|
|
// The list of store op candidates for forwarding - need to satisfy the
|
|
// conditions listed at the top.
|
|
SmallVector<Instruction *, 8> fwdingCandidates;
|
|
// Store ops that have a dependence into the load (even if they aren't
|
|
// forwarding candidates). Each forwarding candidate will be checked for a
|
|
// post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores.
|
|
SmallVector<Instruction *, 8> depSrcStores;
|
|
for (auto *storeOpInst : storeOps) {
|
|
MemRefAccess srcAccess(storeOpInst);
|
|
MemRefAccess destAccess(loadOpInst);
|
|
FlatAffineConstraints dependenceConstraints;
|
|
unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst);
|
|
// Dependences at loop depth <= minSurroundingLoops do NOT matter.
|
|
for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
|
|
if (!checkMemrefAccessDependence(srcAccess, destAccess, d,
|
|
&dependenceConstraints,
|
|
/*dependenceComponents=*/nullptr))
|
|
continue;
|
|
depSrcStores.push_back(storeOpInst);
|
|
// Check if this store is a candidate for forwarding; we only forward if
|
|
// the dependence from the store is carried by the *body* of innermost
|
|
// common surrounding loop. As an example this filters out cases like:
|
|
// for %i0
|
|
// for %i1
|
|
// %idx = affine.apply (d0) -> (d0 + 1) (%i0)
|
|
// store %A[%idx]
|
|
// load %A[%i0]
|
|
//
|
|
if (d != nsLoops + 1)
|
|
break;
|
|
|
|
// 2. The store has to dominate the load op to be candidate. This is not
|
|
// strictly a necessary condition since dominance isn't a prerequisite for
|
|
// a memref element store to reach a load, but this is sufficient and
|
|
// reasonably powerful in practice.
|
|
if (!domInfo->dominates(storeOpInst, loadOpInst))
|
|
break;
|
|
|
|
// Finally, forwarding is only possible if the load touches a single
|
|
// location in the memref across the enclosing loops *not* common with the
|
|
// store. This is filtering out cases like:
|
|
// for (i ...)
|
|
// a [i] = ...
|
|
// for (j ...)
|
|
// ... = a[j]
|
|
// If storeOpInst and loadOpDepth at the same nesting depth, the load Op
|
|
// is trivially loading from a single location at that depth; so there
|
|
// isn't a need to call isRangeOneToOne.
|
|
if (getNestingDepth(*storeOpInst) < loadOpDepth) {
|
|
MemRefRegion region(loadOpInst->getLoc());
|
|
region.compute(loadOpInst, nsLoops);
|
|
if (!region.getConstraints()->isRangeOneToOne(
|
|
/*start=*/0, /*limit=*/loadOp->getMemRefType().getRank()))
|
|
break;
|
|
}
|
|
|
|
// After all these conditions, we have a candidate for forwarding!
|
|
fwdingCandidates.push_back(storeOpInst);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Note: this can implemented in a cleaner way with postdominator tree
|
|
// traversals. Consider this for the future if needed.
|
|
for (auto *storeOpInst : fwdingCandidates) {
|
|
// 3. Of all the store op's that meet the above criteria, the store
|
|
// that postdominates all 'depSrcStores' (if such a store exists) is the
|
|
// unique store providing the value to the load, i.e., provably the last
|
|
// writer to that memref loc.
|
|
if (llvm::all_of(depSrcStores, [&](Instruction *depStore) {
|
|
return postDomInfo->postDominates(storeOpInst, depStore);
|
|
})) {
|
|
lastWriteStoreOp = storeOpInst;
|
|
break;
|
|
}
|
|
}
|
|
// TODO: optimization for future: those store op's that are determined to be
|
|
// postdominated above can actually be recorded and skipped on the 'i' loop
|
|
// iteration above --- since they can never post dominate everything.
|
|
|
|
if (!lastWriteStoreOp)
|
|
return;
|
|
|
|
// Perform the actual store to load forwarding.
|
|
Value *storeVal = lastWriteStoreOp->cast<StoreOp>()->getValueToStore();
|
|
loadOp->getResult()->replaceAllUsesWith(storeVal);
|
|
// Record the memref for a later sweep to optimize away.
|
|
memrefsToErase.insert(loadOp->getMemRef());
|
|
// Record this to erase later.
|
|
loadOpsToErase.push_back(loadOpInst);
|
|
}
|
|
|
|
PassResult MemRefDataFlowOpt::runOnFunction(Function *f) {
|
|
// Only supports single block functions at the moment.
|
|
if (f->getBlocks().size() != 1)
|
|
return success();
|
|
|
|
DominanceInfo theDomInfo(f);
|
|
domInfo = &theDomInfo;
|
|
PostDominanceInfo thePostDomInfo(f);
|
|
postDomInfo = &thePostDomInfo;
|
|
|
|
loadOpsToErase.clear();
|
|
memrefsToErase.clear();
|
|
|
|
// Walk all load's and perform load/store forwarding.
|
|
f->walk<LoadOp>(
|
|
[&](OpPointer<LoadOp> loadOp) { forwardStoreToLoad(loadOp); });
|
|
|
|
// Erase all load op's whose results were replaced with store fwd'ed ones.
|
|
for (auto *loadOp : loadOpsToErase) {
|
|
loadOp->erase();
|
|
}
|
|
|
|
// Check if the store fwd'ed memrefs are now left with only stores and can
|
|
// thus be completely deleted. Note: the canononicalize pass should be able
|
|
// to do this as well, but we'll do it here since we collected these anyway.
|
|
for (auto *memref : memrefsToErase) {
|
|
// If the memref hasn't been alloc'ed in this function, skip.
|
|
Instruction *defInst = memref->getDefiningInst();
|
|
if (!defInst || !defInst->isa<AllocOp>())
|
|
// TODO(mlir-team): if the memref was returned by a 'call' instruction, we
|
|
// could still erase it if the call had no side-effects.
|
|
continue;
|
|
if (std::any_of(memref->use_begin(), memref->use_end(),
|
|
[&](InstOperand &use) {
|
|
auto *ownerInst = use.getOwner();
|
|
return (!ownerInst->isa<StoreOp>() &&
|
|
!ownerInst->isa<DeallocOp>());
|
|
}))
|
|
continue;
|
|
|
|
// Erase all stores, the dealloc, and the alloc on the memref.
|
|
for (auto &use : llvm::make_early_inc_range(memref->getUses()))
|
|
use.getOwner()->erase();
|
|
defInst->erase();
|
|
}
|
|
|
|
// This function never leaves the IR in an invalid state.
|
|
return success();
|
|
}
|
|
|
|
static PassRegistration<MemRefDataFlowOpt>
|
|
pass("memref-dataflow-opt", "Perform store/load forwarding for memrefs");
|