llvm-project/mlir/lib/Transforms/CSE.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

272 lines
9.7 KiB
C++
Raw Normal View History

//===- CSE.cpp - Common Sub-expression Elimination ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This transformation pass performs a simple common sub-expression elimination
// algorithm on operations within a region.
//
//===----------------------------------------------------------------------===//
#include "PassDetail.h"
#include "mlir/IR/Dominance.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/Utils.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/RecyclingAllocator.h"
#include <deque>
using namespace mlir;
namespace {
struct SimpleOperationInfo : public llvm::DenseMapInfo<Operation *> {
static unsigned getHashValue(const Operation *opC) {
return OperationEquivalence::computeHash(
const_cast<Operation *>(opC),
/*hashOperands=*/OperationEquivalence::directHashValue,
/*hashResults=*/OperationEquivalence::ignoreHashValue,
OperationEquivalence::IgnoreLocations);
}
static bool isEqual(const Operation *lhsC, const Operation *rhsC) {
auto *lhs = const_cast<Operation *>(lhsC);
auto *rhs = const_cast<Operation *>(rhsC);
if (lhs == rhs)
return true;
if (lhs == getTombstoneKey() || lhs == getEmptyKey() ||
rhs == getTombstoneKey() || rhs == getEmptyKey())
return false;
return OperationEquivalence::isEquivalentTo(
const_cast<Operation *>(lhsC), const_cast<Operation *>(rhsC),
/*mapOperands=*/OperationEquivalence::exactValueMatch,
/*mapResults=*/OperationEquivalence::ignoreValueEquivalence,
OperationEquivalence::IgnoreLocations);
}
};
} // end anonymous namespace
namespace {
/// Simple common sub-expression elimination.
struct CSE : public CSEBase<CSE> {
/// Shared implementation of operation elimination and scoped map definitions.
using AllocatorTy = llvm::RecyclingAllocator<
llvm::BumpPtrAllocator,
llvm::ScopedHashTableVal<Operation *, Operation *>>;
using ScopedMapTy = llvm::ScopedHashTable<Operation *, Operation *,
SimpleOperationInfo, AllocatorTy>;
/// Represents a single entry in the depth first traversal of a CFG.
struct CFGStackNode {
CFGStackNode(ScopedMapTy &knownValues, DominanceInfoNode *node)
: scope(knownValues), node(node), childIterator(node->begin()),
processed(false) {}
/// Scope for the known values.
ScopedMapTy::ScopeTy scope;
DominanceInfoNode *node;
DominanceInfoNode::const_iterator childIterator;
/// If this node has been fully processed yet or not.
bool processed;
};
/// Attempt to eliminate a redundant operation. Returns success if the
/// operation was marked for removal, failure otherwise.
LogicalResult simplifyOperation(ScopedMapTy &knownValues, Operation *op,
bool hasSSADominance);
void simplifyBlock(ScopedMapTy &knownValues, Block *bb, bool hasSSADominance);
void simplifyRegion(ScopedMapTy &knownValues, Region &region);
void runOnOperation() override;
private:
/// Operations marked as dead and to be erased.
std::vector<Operation *> opsToErase;
DominanceInfo *domInfo = nullptr;
};
} // end anonymous namespace
/// Attempt to eliminate a redundant operation.
LogicalResult CSE::simplifyOperation(ScopedMapTy &knownValues, Operation *op,
bool hasSSADominance) {
// Don't simplify terminator operations.
if (op->hasTrait<OpTrait::IsTerminator>())
return failure();
// If the operation is already trivially dead just add it to the erase list.
if (isOpTriviallyDead(op)) {
opsToErase.push_back(op);
++numDCE;
return success();
}
// Don't simplify operations with nested blocks. We don't currently model
// equality comparisons correctly among other things. It is also unclear
// whether we would want to CSE such operations.
if (op->getNumRegions() != 0)
return failure();
// TODO: We currently only eliminate non side-effecting
// operations.
if (!MemoryEffectOpInterface::hasNoEffect(op))
return failure();
// Look for an existing definition for the operation.
if (auto *existing = knownValues.lookup(op)) {
// If we find one then replace all uses of the current operation with the
// existing one and mark it for deletion. We can only replace an operand in
// an operation if it has not been visited yet.
if (hasSSADominance) {
// If the region has SSA dominance, then we are guaranteed to have not
// visited any use of the current operation.
op->replaceAllUsesWith(existing);
opsToErase.push_back(op);
} else {
// When the region does not have SSA dominance, we need to check if we
// have visited a use before replacing any use.
for (auto it : llvm::zip(op->getResults(), existing->getResults())) {
std::get<0>(it).replaceUsesWithIf(
std::get<1>(it), [&](OpOperand &operand) {
return !knownValues.count(operand.getOwner());
});
}
// There may be some remaining uses of the operation.
if (op->use_empty())
opsToErase.push_back(op);
}
// If the existing operation has an unknown location and the current
// operation doesn't, then set the existing op's location to that of the
// current op.
if (existing->getLoc().isa<UnknownLoc>() &&
!op->getLoc().isa<UnknownLoc>()) {
existing->setLoc(op->getLoc());
}
Add support for instance specific pass statistics. Statistics are a way to keep track of what the compiler is doing and how effective various optimizations are. It is useful to see what optimizations are contributing to making a particular program run faster. Pass-instance specific statistics take this even further as you can see the effect of placing a particular pass at specific places within the pass pipeline, e.g. they could help answer questions like "what happens if I run CSE again here". Statistics can be added to a pass by simply adding members of type 'Pass::Statistics'. This class takes as a constructor arguments: the parent pass pointer, a name, and a description. Statistics can be dumped by the pass manager in a similar manner to how pass timing information is dumped, i.e. via PassManager::enableStatistics programmatically; or -pass-statistics and -pass-statistics-display via the command line pass manager options. Below is an example: struct MyPass : public OperationPass<MyPass> { Statistic testStat{this, "testStat", "A test statistic"}; void runOnOperation() { ... ++testStat; ... } }; $ mlir-opt -pass-pipeline='func(my-pass,my-pass)' foo.mlir -pass-statistics Pipeline Display: ===-------------------------------------------------------------------------=== ... Pass statistics report ... ===-------------------------------------------------------------------------=== 'func' Pipeline MyPass (S) 15 testStat - A test statistic MyPass (S) 6 testStat - A test statistic List Display: ===-------------------------------------------------------------------------=== ... Pass statistics report ... ===-------------------------------------------------------------------------=== MyPass (S) 21 testStat - A test statistic PiperOrigin-RevId: 284022014
2019-12-06 03:52:58 +08:00
++numCSE;
return success();
}
// Otherwise, we add this operation to the known values map.
knownValues.insert(op, op);
return failure();
}
void CSE::simplifyBlock(ScopedMapTy &knownValues, Block *bb,
bool hasSSADominance) {
for (auto &op : *bb) {
// If the operation is simplified, we don't process any held regions.
if (succeeded(simplifyOperation(knownValues, &op, hasSSADominance)))
continue;
// Most operations don't have regions, so fast path that case.
if (op.getNumRegions() == 0)
continue;
// If this operation is isolated above, we can't process nested regions with
// the given 'knownValues' map. This would cause the insertion of implicit
// captures in explicit capture only regions.
if (op.mightHaveTrait<OpTrait::IsIsolatedFromAbove>()) {
ScopedMapTy nestedKnownValues;
for (auto &region : op.getRegions())
simplifyRegion(nestedKnownValues, region);
continue;
}
// Otherwise, process nested regions normally.
for (auto &region : op.getRegions())
simplifyRegion(knownValues, region);
}
}
void CSE::simplifyRegion(ScopedMapTy &knownValues, Region &region) {
// If the region is empty there is nothing to do.
if (region.empty())
return;
bool hasSSADominance = domInfo->hasSSADominance(&region);
// If the region only contains one block, then simplify it directly.
if (region.hasOneBlock()) {
ScopedMapTy::ScopeTy scope(knownValues);
simplifyBlock(knownValues, &region.front(), hasSSADominance);
return;
}
[MLIR] Add RegionKindInterface Some dialects have semantics which is not well represented by common SSA structures with dominance constraints. This patch allows operations to declare the 'kind' of their contained regions. Currently, two kinds are allowed: "SSACFG" and "Graph". The only difference between them at the moment is that SSACFG regions are required to have dominance, while Graph regions are not required to have dominance. The intention is that this Interface would be generated by ODS for existing operations, although this has not yet been implemented. Presumably, if someone were interested in code generation, we might also have a "CFG" dialect, which defines control flow, but does not require SSA. The new behavior is mostly identical to the previous behavior, since registered operations without a RegionKindInterface are assumed to contain SSACFG regions. However, the behavior has changed for unregistered operations. Previously, these were checked for dominance, however the new behavior allows dominance violations, in order to allow the processing of unregistered dialects with Graph regions. One implication of this is that regions in unregistered operations with more than one op are no longer CSE'd (since it requires dominance info). I've also reorganized the LangRef documentation to remove assertions about "sequential execution", "SSA Values", and "Dominance". Instead, the core IR is simply "ordered" (i.e. totally ordered) and consists of "Values". I've also clarified some things about how control flow passes between blocks in an SSACFG region. Control Flow must enter a region at the entry block and follow terminator operation successors or be returned to the containing op. Graph regions do not define a notion of control flow. see discussion here: https://llvm.discourse.group/t/rfc-allowing-dialects-to-relax-the-ssa-dominance-condition/833/53 Differential Revision: https://reviews.llvm.org/D80358
2020-05-16 01:33:13 +08:00
// If the region does not have dominanceInfo, then skip it.
// TODO: Regions without SSA dominance should define a different
// traversal order which is appropriate and can be used here.
if (!hasSSADominance)
[MLIR] Add RegionKindInterface Some dialects have semantics which is not well represented by common SSA structures with dominance constraints. This patch allows operations to declare the 'kind' of their contained regions. Currently, two kinds are allowed: "SSACFG" and "Graph". The only difference between them at the moment is that SSACFG regions are required to have dominance, while Graph regions are not required to have dominance. The intention is that this Interface would be generated by ODS for existing operations, although this has not yet been implemented. Presumably, if someone were interested in code generation, we might also have a "CFG" dialect, which defines control flow, but does not require SSA. The new behavior is mostly identical to the previous behavior, since registered operations without a RegionKindInterface are assumed to contain SSACFG regions. However, the behavior has changed for unregistered operations. Previously, these were checked for dominance, however the new behavior allows dominance violations, in order to allow the processing of unregistered dialects with Graph regions. One implication of this is that regions in unregistered operations with more than one op are no longer CSE'd (since it requires dominance info). I've also reorganized the LangRef documentation to remove assertions about "sequential execution", "SSA Values", and "Dominance". Instead, the core IR is simply "ordered" (i.e. totally ordered) and consists of "Values". I've also clarified some things about how control flow passes between blocks in an SSACFG region. Control Flow must enter a region at the entry block and follow terminator operation successors or be returned to the containing op. Graph regions do not define a notion of control flow. see discussion here: https://llvm.discourse.group/t/rfc-allowing-dialects-to-relax-the-ssa-dominance-condition/833/53 Differential Revision: https://reviews.llvm.org/D80358
2020-05-16 01:33:13 +08:00
return;
// Note, deque is being used here because there was significant performance
// gains over vector when the container becomes very large due to the
// specific access patterns. If/when these performance issues are no
// longer a problem we can change this to vector. For more information see
// the llvm mailing list discussion on this:
// http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
std::deque<std::unique_ptr<CFGStackNode>> stack;
// Process the nodes of the dom tree for this region.
stack.emplace_back(std::make_unique<CFGStackNode>(
knownValues, domInfo->getRootNode(&region)));
while (!stack.empty()) {
auto &currentNode = stack.back();
// Check to see if we need to process this node.
if (!currentNode->processed) {
currentNode->processed = true;
simplifyBlock(knownValues, currentNode->node->getBlock(),
hasSSADominance);
}
// Otherwise, check to see if we need to process a child node.
if (currentNode->childIterator != currentNode->node->end()) {
auto *childNode = *(currentNode->childIterator++);
stack.emplace_back(
std::make_unique<CFGStackNode>(knownValues, childNode));
} else {
// Finally, if the node and all of its children have been processed
// then we delete the node.
stack.pop_back();
}
}
}
void CSE::runOnOperation() {
/// A scoped hash table of defining operations within a region.
ScopedMapTy knownValues;
domInfo = &getAnalysis<DominanceInfo>();
Operation *rootOp = getOperation();
for (auto &region : rootOp->getRegions())
simplifyRegion(knownValues, region);
// If no operations were erased, then we mark all analyses as preserved.
if (opsToErase.empty())
return markAllAnalysesPreserved();
/// Erase any operations that were marked as dead during simplification.
for (auto *op : opsToErase)
op->erase();
opsToErase.clear();
// We currently don't remove region operations, so mark dominance as
// preserved.
markAnalysesPreserved<DominanceInfo, PostDominanceInfo>();
domInfo = nullptr;
}
std::unique_ptr<Pass> mlir::createCSEPass() { return std::make_unique<CSE>(); }