2019-09-06 03:23:45 +08:00
|
|
|
//===- Inliner.cpp - Pass to inline function calls ------------------------===//
|
|
|
|
//
|
2020-01-26 11:58:30 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
2019-12-24 01:35:36 +08:00
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2019-09-06 03:23:45 +08:00
|
|
|
//
|
2019-12-24 01:35:36 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2019-10-04 14:04:56 +08:00
|
|
|
//
|
|
|
|
// This file implements a basic inlining algorithm that operates bottom up over
|
|
|
|
// the Strongly Connect Components(SCCs) of the CallGraph. This enables a more
|
|
|
|
// incremental propagation of inlining decisions from the leafs to the roots of
|
|
|
|
// the callgraph.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
2019-09-06 03:23:45 +08:00
|
|
|
|
2020-04-08 04:58:12 +08:00
|
|
|
#include "PassDetail.h"
|
2019-10-04 14:04:56 +08:00
|
|
|
#include "mlir/Analysis/CallGraph.h"
|
2019-10-11 03:12:39 +08:00
|
|
|
#include "mlir/IR/PatternMatch.h"
|
2020-05-14 01:27:19 +08:00
|
|
|
#include "mlir/Interfaces/SideEffectInterfaces.h"
|
2019-09-06 03:23:45 +08:00
|
|
|
#include "mlir/Transforms/InliningUtils.h"
|
|
|
|
#include "mlir/Transforms/Passes.h"
|
2019-10-04 14:04:56 +08:00
|
|
|
#include "llvm/ADT/SCCIterator.h"
|
2019-12-19 04:33:02 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2019-10-11 03:12:39 +08:00
|
|
|
#include "llvm/Support/Parallel.h"
|
2019-09-06 03:23:45 +08:00
|
|
|
|
2019-12-19 04:33:02 +08:00
|
|
|
#define DEBUG_TYPE "inlining"
|
|
|
|
|
2019-09-06 03:23:45 +08:00
|
|
|
using namespace mlir;
|
|
|
|
|
2020-03-19 04:10:13 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Symbol Use Tracking
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// Walk all of the used symbol callgraph nodes referenced with the given op.
|
|
|
|
static void walkReferencedSymbolNodes(
|
|
|
|
Operation *op, CallGraph &cg,
|
|
|
|
DenseMap<Attribute, CallGraphNode *> &resolvedRefs,
|
|
|
|
function_ref<void(CallGraphNode *, Operation *)> callback) {
|
|
|
|
auto symbolUses = SymbolTable::getSymbolUses(op);
|
|
|
|
assert(symbolUses && "expected uses to be valid");
|
|
|
|
|
|
|
|
Operation *symbolTableOp = op->getParentOp();
|
|
|
|
for (const SymbolTable::SymbolUse &use : *symbolUses) {
|
|
|
|
auto refIt = resolvedRefs.insert({use.getSymbolRef(), nullptr});
|
|
|
|
CallGraphNode *&node = refIt.first->second;
|
|
|
|
|
|
|
|
// If this is the first instance of this reference, try to resolve a
|
|
|
|
// callgraph node for it.
|
|
|
|
if (refIt.second) {
|
|
|
|
auto *symbolOp = SymbolTable::lookupNearestSymbolFrom(symbolTableOp,
|
|
|
|
use.getSymbolRef());
|
|
|
|
auto callableOp = dyn_cast_or_null<CallableOpInterface>(symbolOp);
|
|
|
|
if (!callableOp)
|
|
|
|
continue;
|
|
|
|
node = cg.lookupNode(callableOp.getCallableRegion());
|
|
|
|
}
|
|
|
|
if (node)
|
|
|
|
callback(node, use.getUser());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// CGUseList
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
/// This struct tracks the uses of callgraph nodes that can be dropped when
|
|
|
|
/// use_empty. It directly tracks and manages a use-list for all of the
|
|
|
|
/// call-graph nodes. This is necessary because many callgraph nodes are
|
|
|
|
/// referenced by SymbolRefAttr, which has no mechanism akin to the SSA `Use`
|
|
|
|
/// class.
|
|
|
|
struct CGUseList {
|
|
|
|
/// This struct tracks the uses of callgraph nodes within a specific
|
|
|
|
/// operation.
|
|
|
|
struct CGUser {
|
|
|
|
/// Any nodes referenced in the top-level attribute list of this user. We
|
|
|
|
/// use a set here because the number of references does not matter.
|
|
|
|
DenseSet<CallGraphNode *> topLevelUses;
|
|
|
|
|
|
|
|
/// Uses of nodes referenced by nested operations.
|
|
|
|
DenseMap<CallGraphNode *, int> innerUses;
|
|
|
|
};
|
|
|
|
|
|
|
|
CGUseList(Operation *op, CallGraph &cg);
|
|
|
|
|
|
|
|
/// Drop uses of nodes referred to by the given call operation that resides
|
|
|
|
/// within 'userNode'.
|
|
|
|
void dropCallUses(CallGraphNode *userNode, Operation *callOp, CallGraph &cg);
|
|
|
|
|
|
|
|
/// Remove the given node from the use list.
|
|
|
|
void eraseNode(CallGraphNode *node);
|
|
|
|
|
|
|
|
/// Returns true if the given callgraph node has no uses and can be pruned.
|
|
|
|
bool isDead(CallGraphNode *node) const;
|
|
|
|
|
|
|
|
/// Returns true if the given callgraph node has a single use and can be
|
|
|
|
/// discarded.
|
|
|
|
bool hasOneUseAndDiscardable(CallGraphNode *node) const;
|
|
|
|
|
|
|
|
/// Recompute the uses held by the given callgraph node.
|
|
|
|
void recomputeUses(CallGraphNode *node, CallGraph &cg);
|
|
|
|
|
|
|
|
/// Merge the uses of 'lhs' with the uses of the 'rhs' after inlining a copy
|
|
|
|
/// of 'lhs' into 'rhs'.
|
|
|
|
void mergeUsesAfterInlining(CallGraphNode *lhs, CallGraphNode *rhs);
|
|
|
|
|
|
|
|
private:
|
|
|
|
/// Decrement the uses of discardable nodes referenced by the given user.
|
|
|
|
void decrementDiscardableUses(CGUser &uses);
|
|
|
|
|
|
|
|
/// A mapping between a discardable callgraph node (that is a symbol) and the
|
|
|
|
/// number of uses for this node.
|
|
|
|
DenseMap<CallGraphNode *, int> discardableSymNodeUses;
|
|
|
|
/// A mapping between a callgraph node and the symbol callgraph nodes that it
|
|
|
|
/// uses.
|
|
|
|
DenseMap<CallGraphNode *, CGUser> nodeUses;
|
|
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
CGUseList::CGUseList(Operation *op, CallGraph &cg) {
|
|
|
|
/// A set of callgraph nodes that are always known to be live during inlining.
|
|
|
|
DenseMap<Attribute, CallGraphNode *> alwaysLiveNodes;
|
|
|
|
|
|
|
|
// Walk each of the symbol tables looking for discardable callgraph nodes.
|
|
|
|
auto walkFn = [&](Operation *symbolTableOp, bool allUsesVisible) {
|
2020-05-05 08:46:06 +08:00
|
|
|
for (Operation &op : symbolTableOp->getRegion(0).getOps()) {
|
|
|
|
// If this is a callgraph operation, check to see if it is discardable.
|
|
|
|
if (auto callable = dyn_cast<CallableOpInterface>(&op)) {
|
|
|
|
if (auto *node = cg.lookupNode(callable.getCallableRegion())) {
|
|
|
|
SymbolOpInterface symbol = dyn_cast<SymbolOpInterface>(&op);
|
|
|
|
if (symbol && (allUsesVisible || symbol.isPrivate()) &&
|
|
|
|
symbol.canDiscardOnUseEmpty()) {
|
|
|
|
discardableSymNodeUses.try_emplace(node, 0);
|
2020-03-19 04:10:13 +08:00
|
|
|
}
|
2020-05-05 08:46:06 +08:00
|
|
|
continue;
|
2020-03-19 04:10:13 +08:00
|
|
|
}
|
|
|
|
}
|
2020-05-05 08:46:06 +08:00
|
|
|
// Otherwise, check for any referenced nodes. These will be always-live.
|
|
|
|
walkReferencedSymbolNodes(&op, cg, alwaysLiveNodes,
|
|
|
|
[](CallGraphNode *, Operation *) {});
|
2020-03-19 04:10:13 +08:00
|
|
|
}
|
|
|
|
};
|
2020-04-28 03:58:23 +08:00
|
|
|
SymbolTable::walkSymbolTables(op, /*allSymUsesVisible=*/!op->getBlock(),
|
|
|
|
walkFn);
|
2020-03-19 04:10:13 +08:00
|
|
|
|
|
|
|
// Drop the use information for any discardable nodes that are always live.
|
|
|
|
for (auto &it : alwaysLiveNodes)
|
|
|
|
discardableSymNodeUses.erase(it.second);
|
|
|
|
|
|
|
|
// Compute the uses for each of the callable nodes in the graph.
|
|
|
|
for (CallGraphNode *node : cg)
|
|
|
|
recomputeUses(node, cg);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGUseList::dropCallUses(CallGraphNode *userNode, Operation *callOp,
|
|
|
|
CallGraph &cg) {
|
|
|
|
auto &userRefs = nodeUses[userNode].innerUses;
|
|
|
|
auto walkFn = [&](CallGraphNode *node, Operation *user) {
|
|
|
|
auto parentIt = userRefs.find(node);
|
|
|
|
if (parentIt == userRefs.end())
|
|
|
|
return;
|
|
|
|
--parentIt->second;
|
|
|
|
--discardableSymNodeUses[node];
|
|
|
|
};
|
|
|
|
DenseMap<Attribute, CallGraphNode *> resolvedRefs;
|
|
|
|
walkReferencedSymbolNodes(callOp, cg, resolvedRefs, walkFn);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGUseList::eraseNode(CallGraphNode *node) {
|
|
|
|
// Drop all child nodes.
|
|
|
|
for (auto &edge : *node)
|
|
|
|
if (edge.isChild())
|
|
|
|
eraseNode(edge.getTarget());
|
|
|
|
|
|
|
|
// Drop the uses held by this node and erase it.
|
|
|
|
auto useIt = nodeUses.find(node);
|
|
|
|
assert(useIt != nodeUses.end() && "expected node to be valid");
|
|
|
|
decrementDiscardableUses(useIt->getSecond());
|
|
|
|
nodeUses.erase(useIt);
|
|
|
|
discardableSymNodeUses.erase(node);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CGUseList::isDead(CallGraphNode *node) const {
|
|
|
|
// If the parent operation isn't a symbol, simply check normal SSA deadness.
|
|
|
|
Operation *nodeOp = node->getCallableRegion()->getParentOp();
|
2020-04-28 03:57:32 +08:00
|
|
|
if (!isa<SymbolOpInterface>(nodeOp))
|
2020-03-19 04:10:13 +08:00
|
|
|
return MemoryEffectOpInterface::hasNoEffect(nodeOp) && nodeOp->use_empty();
|
|
|
|
|
|
|
|
// Otherwise, check the number of symbol uses.
|
|
|
|
auto symbolIt = discardableSymNodeUses.find(node);
|
|
|
|
return symbolIt != discardableSymNodeUses.end() && symbolIt->second == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CGUseList::hasOneUseAndDiscardable(CallGraphNode *node) const {
|
|
|
|
// If this isn't a symbol node, check for side-effects and SSA use count.
|
|
|
|
Operation *nodeOp = node->getCallableRegion()->getParentOp();
|
2020-04-28 03:57:32 +08:00
|
|
|
if (!isa<SymbolOpInterface>(nodeOp))
|
2020-03-19 04:10:13 +08:00
|
|
|
return MemoryEffectOpInterface::hasNoEffect(nodeOp) && nodeOp->hasOneUse();
|
|
|
|
|
|
|
|
// Otherwise, check the number of symbol uses.
|
|
|
|
auto symbolIt = discardableSymNodeUses.find(node);
|
|
|
|
return symbolIt != discardableSymNodeUses.end() && symbolIt->second == 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGUseList::recomputeUses(CallGraphNode *node, CallGraph &cg) {
|
|
|
|
Operation *parentOp = node->getCallableRegion()->getParentOp();
|
|
|
|
CGUser &uses = nodeUses[node];
|
|
|
|
decrementDiscardableUses(uses);
|
|
|
|
|
|
|
|
// Collect the new discardable uses within this node.
|
|
|
|
uses = CGUser();
|
|
|
|
DenseMap<Attribute, CallGraphNode *> resolvedRefs;
|
|
|
|
auto walkFn = [&](CallGraphNode *refNode, Operation *user) {
|
|
|
|
auto discardSymIt = discardableSymNodeUses.find(refNode);
|
|
|
|
if (discardSymIt == discardableSymNodeUses.end())
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (user != parentOp)
|
|
|
|
++uses.innerUses[refNode];
|
|
|
|
else if (!uses.topLevelUses.insert(refNode).second)
|
|
|
|
return;
|
|
|
|
++discardSymIt->second;
|
|
|
|
};
|
|
|
|
walkReferencedSymbolNodes(parentOp, cg, resolvedRefs, walkFn);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGUseList::mergeUsesAfterInlining(CallGraphNode *lhs, CallGraphNode *rhs) {
|
|
|
|
auto &lhsUses = nodeUses[lhs], &rhsUses = nodeUses[rhs];
|
|
|
|
for (auto &useIt : lhsUses.innerUses) {
|
|
|
|
rhsUses.innerUses[useIt.first] += useIt.second;
|
|
|
|
discardableSymNodeUses[useIt.first] += useIt.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CGUseList::decrementDiscardableUses(CGUser &uses) {
|
|
|
|
for (CallGraphNode *node : uses.topLevelUses)
|
|
|
|
--discardableSymNodeUses[node];
|
|
|
|
for (auto &it : uses.innerUses)
|
|
|
|
discardableSymNodeUses[it.first] -= it.second;
|
|
|
|
}
|
|
|
|
|
2019-10-04 14:04:56 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// CallGraph traversal
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// Run a given transformation over the SCCs of the callgraph in a bottom up
|
|
|
|
/// traversal.
|
|
|
|
static void runTransformOnCGSCCs(
|
|
|
|
const CallGraph &cg,
|
2020-03-19 04:10:13 +08:00
|
|
|
function_ref<void(MutableArrayRef<CallGraphNode *>)> sccTransformer) {
|
2019-10-11 03:12:39 +08:00
|
|
|
std::vector<CallGraphNode *> currentSCCVec;
|
|
|
|
auto cgi = llvm::scc_begin(&cg);
|
|
|
|
while (!cgi.isAtEnd()) {
|
|
|
|
// Copy the current SCC and increment so that the transformer can modify the
|
|
|
|
// SCC without invalidating our iterator.
|
|
|
|
currentSCCVec = *cgi;
|
|
|
|
++cgi;
|
|
|
|
sccTransformer(currentSCCVec);
|
|
|
|
}
|
2019-10-04 14:04:56 +08:00
|
|
|
}
|
|
|
|
|
2019-09-06 03:23:45 +08:00
|
|
|
namespace {
|
2019-10-04 14:04:56 +08:00
|
|
|
/// This struct represents a resolved call to a given callgraph node. Given that
|
|
|
|
/// the call does not actually contain a direct reference to the
|
|
|
|
/// Region(CallGraphNode) that it is dispatching to, we need to resolve them
|
|
|
|
/// explicitly.
|
|
|
|
struct ResolvedCall {
|
2020-03-19 04:10:13 +08:00
|
|
|
ResolvedCall(CallOpInterface call, CallGraphNode *sourceNode,
|
|
|
|
CallGraphNode *targetNode)
|
|
|
|
: call(call), sourceNode(sourceNode), targetNode(targetNode) {}
|
2019-10-04 14:04:56 +08:00
|
|
|
CallOpInterface call;
|
2020-03-19 04:10:13 +08:00
|
|
|
CallGraphNode *sourceNode, *targetNode;
|
2019-10-04 14:04:56 +08:00
|
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
/// Collect all of the callable operations within the given range of blocks. If
|
|
|
|
/// `traverseNestedCGNodes` is true, this will also collect call operations
|
|
|
|
/// inside of nested callgraph nodes.
|
2019-12-19 01:28:48 +08:00
|
|
|
static void collectCallOps(iterator_range<Region::iterator> blocks,
|
2020-03-19 04:10:13 +08:00
|
|
|
CallGraphNode *sourceNode, CallGraph &cg,
|
|
|
|
SmallVectorImpl<ResolvedCall> &calls,
|
2019-10-04 14:04:56 +08:00
|
|
|
bool traverseNestedCGNodes) {
|
2020-03-19 04:10:13 +08:00
|
|
|
SmallVector<std::pair<Block *, CallGraphNode *>, 8> worklist;
|
|
|
|
auto addToWorklist = [&](CallGraphNode *node,
|
|
|
|
iterator_range<Region::iterator> blocks) {
|
2019-10-04 14:04:56 +08:00
|
|
|
for (Block &block : blocks)
|
2020-03-19 04:10:13 +08:00
|
|
|
worklist.emplace_back(&block, node);
|
2019-10-04 14:04:56 +08:00
|
|
|
};
|
|
|
|
|
2020-03-19 04:10:13 +08:00
|
|
|
addToWorklist(sourceNode, blocks);
|
2019-10-04 14:04:56 +08:00
|
|
|
while (!worklist.empty()) {
|
2020-03-19 04:10:13 +08:00
|
|
|
Block *block;
|
|
|
|
std::tie(block, sourceNode) = worklist.pop_back_val();
|
|
|
|
|
|
|
|
for (Operation &op : *block) {
|
2019-10-04 14:04:56 +08:00
|
|
|
if (auto call = dyn_cast<CallOpInterface>(op)) {
|
2020-01-14 07:46:40 +08:00
|
|
|
// TODO(riverriddle) Support inlining nested call references.
|
2020-02-09 02:44:15 +08:00
|
|
|
CallInterfaceCallable callable = call.getCallableForCallee();
|
2020-01-14 07:46:40 +08:00
|
|
|
if (SymbolRefAttr symRef = callable.dyn_cast<SymbolRefAttr>()) {
|
|
|
|
if (!symRef.isa<FlatSymbolRefAttr>())
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-19 04:10:13 +08:00
|
|
|
CallGraphNode *targetNode = cg.resolveCallable(call);
|
|
|
|
if (!targetNode->isExternal())
|
|
|
|
calls.emplace_back(call, sourceNode, targetNode);
|
2019-09-06 03:23:45 +08:00
|
|
|
continue;
|
2019-10-04 14:04:56 +08:00
|
|
|
}
|
2019-09-06 03:23:45 +08:00
|
|
|
|
2019-10-04 14:04:56 +08:00
|
|
|
// If this is not a call, traverse the nested regions. If
|
|
|
|
// `traverseNestedCGNodes` is false, then don't traverse nested call graph
|
|
|
|
// regions.
|
2020-03-19 04:10:13 +08:00
|
|
|
for (auto &nestedRegion : op.getRegions()) {
|
|
|
|
CallGraphNode *nestedNode = cg.lookupNode(&nestedRegion);
|
|
|
|
if (traverseNestedCGNodes || !nestedNode)
|
|
|
|
addToWorklist(nestedNode ? nestedNode : sourceNode, nestedRegion);
|
|
|
|
}
|
2019-09-06 03:23:45 +08:00
|
|
|
}
|
|
|
|
}
|
2019-10-04 14:04:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Inliner
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
/// This class provides a specialization of the main inlining interface.
|
|
|
|
struct Inliner : public InlinerInterface {
|
|
|
|
Inliner(MLIRContext *context, CallGraph &cg)
|
|
|
|
: InlinerInterface(context), cg(cg) {}
|
|
|
|
|
|
|
|
/// Process a set of blocks that have been inlined. This callback is invoked
|
|
|
|
/// *before* inlined terminator operations have been processed.
|
2019-12-19 01:28:48 +08:00
|
|
|
void
|
|
|
|
processInlinedBlocks(iterator_range<Region::iterator> inlinedBlocks) final {
|
2020-03-19 04:10:13 +08:00
|
|
|
// Find the closest callgraph node from the first block.
|
|
|
|
CallGraphNode *node;
|
|
|
|
Region *region = inlinedBlocks.begin()->getParent();
|
|
|
|
while (!(node = cg.lookupNode(region))) {
|
|
|
|
region = region->getParentRegion();
|
|
|
|
assert(region && "expected valid parent node");
|
|
|
|
}
|
|
|
|
|
|
|
|
collectCallOps(inlinedBlocks, node, cg, calls,
|
|
|
|
/*traverseNestedCGNodes=*/true);
|
2019-10-04 14:04:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// The current set of call instructions to consider for inlining.
|
|
|
|
SmallVector<ResolvedCall, 8> calls;
|
|
|
|
|
|
|
|
/// The callgraph being operated on.
|
|
|
|
CallGraph &cg;
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
/// Returns true if the given call should be inlined.
|
|
|
|
static bool shouldInline(ResolvedCall &resolvedCall) {
|
|
|
|
// Don't allow inlining terminator calls. We currently don't support this
|
|
|
|
// case.
|
|
|
|
if (resolvedCall.call.getOperation()->isKnownTerminator())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Don't allow inlining if the target is an ancestor of the call. This
|
|
|
|
// prevents inlining recursively.
|
|
|
|
if (resolvedCall.targetNode->getCallableRegion()->isAncestor(
|
|
|
|
resolvedCall.call.getParentRegion()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Otherwise, inline.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-19 04:10:13 +08:00
|
|
|
/// Delete the given node and remove it from the current scc and the callgraph.
|
|
|
|
static void deleteNode(CallGraphNode *node, CGUseList &useList, CallGraph &cg,
|
|
|
|
MutableArrayRef<CallGraphNode *> currentSCC) {
|
|
|
|
// Erase the parent operation and remove it from the various lists.
|
|
|
|
node->getCallableRegion()->getParentOp()->erase();
|
|
|
|
cg.eraseNode(node);
|
|
|
|
|
|
|
|
// Replace this node in the currentSCC with the external node.
|
|
|
|
auto it = llvm::find(currentSCC, node);
|
|
|
|
if (it != currentSCC.end())
|
|
|
|
*it = cg.getExternalNode();
|
|
|
|
}
|
|
|
|
|
2019-10-11 03:12:39 +08:00
|
|
|
/// Attempt to inline calls within the given scc. This function returns
|
|
|
|
/// success if any calls were inlined, failure otherwise.
|
2020-03-19 04:10:13 +08:00
|
|
|
static LogicalResult
|
|
|
|
inlineCallsInSCC(Inliner &inliner, CGUseList &useList,
|
|
|
|
MutableArrayRef<CallGraphNode *> currentSCC) {
|
2019-10-04 14:04:56 +08:00
|
|
|
CallGraph &cg = inliner.cg;
|
|
|
|
auto &calls = inliner.calls;
|
|
|
|
|
|
|
|
// Collect all of the direct calls within the nodes of the current SCC. We
|
|
|
|
// don't traverse nested callgraph nodes, because they are handled separately
|
|
|
|
// likely within a different SCC.
|
2020-03-19 04:10:13 +08:00
|
|
|
for (CallGraphNode *node : currentSCC) {
|
|
|
|
if (node->isExternal())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// If this node is dead, just delete it now.
|
|
|
|
if (useList.isDead(node))
|
|
|
|
deleteNode(node, useList, cg, currentSCC);
|
|
|
|
else
|
|
|
|
collectCallOps(*node->getCallableRegion(), node, cg, calls,
|
2019-10-04 14:04:56 +08:00
|
|
|
/*traverseNestedCGNodes=*/false);
|
|
|
|
}
|
|
|
|
if (calls.empty())
|
2019-10-11 03:12:39 +08:00
|
|
|
return failure();
|
2019-10-04 14:04:56 +08:00
|
|
|
|
2020-03-19 04:10:13 +08:00
|
|
|
// A set of dead nodes to remove after inlining.
|
|
|
|
SmallVector<CallGraphNode *, 1> deadNodes;
|
|
|
|
|
2019-10-04 14:04:56 +08:00
|
|
|
// Try to inline each of the call operations. Don't cache the end iterator
|
|
|
|
// here as more calls may be added during inlining.
|
2019-10-11 03:12:39 +08:00
|
|
|
bool inlinedAnyCalls = false;
|
2019-10-04 14:04:56 +08:00
|
|
|
for (unsigned i = 0; i != calls.size(); ++i) {
|
2020-04-16 08:41:31 +08:00
|
|
|
ResolvedCall it = calls[i];
|
2020-04-18 11:14:41 +08:00
|
|
|
bool doInline = shouldInline(it);
|
2020-06-11 08:37:59 +08:00
|
|
|
CallOpInterface call = it.call;
|
2019-12-19 04:33:02 +08:00
|
|
|
LLVM_DEBUG({
|
2020-04-18 11:14:41 +08:00
|
|
|
if (doInline)
|
2020-06-11 08:37:59 +08:00
|
|
|
llvm::dbgs() << "* Inlining call: " << call << "\n";
|
2020-04-18 11:14:41 +08:00
|
|
|
else
|
2020-06-11 08:37:59 +08:00
|
|
|
llvm::dbgs() << "* Not inlining call: " << call << "\n";
|
2019-12-19 04:33:02 +08:00
|
|
|
});
|
2020-04-18 11:14:41 +08:00
|
|
|
if (!doInline)
|
2019-10-04 14:04:56 +08:00
|
|
|
continue;
|
2019-10-04 14:10:25 +08:00
|
|
|
Region *targetRegion = it.targetNode->getCallableRegion();
|
2020-03-19 04:10:13 +08:00
|
|
|
|
|
|
|
// If this is the last call to the target node and the node is discardable,
|
|
|
|
// then inline it in-place and delete the node if successful.
|
|
|
|
bool inlineInPlace = useList.hasOneUseAndDiscardable(it.targetNode);
|
|
|
|
|
2019-10-04 14:10:25 +08:00
|
|
|
LogicalResult inlineResult = inlineCall(
|
|
|
|
inliner, call, cast<CallableOpInterface>(targetRegion->getParentOp()),
|
2020-03-19 04:10:13 +08:00
|
|
|
targetRegion, /*shouldCloneInlinedRegion=*/!inlineInPlace);
|
2020-06-11 08:37:59 +08:00
|
|
|
if (failed(inlineResult)) {
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << "** Failed to inline\n");
|
2019-10-04 14:04:56 +08:00
|
|
|
continue;
|
2020-06-11 08:37:59 +08:00
|
|
|
}
|
2020-03-19 04:10:13 +08:00
|
|
|
inlinedAnyCalls = true;
|
|
|
|
|
|
|
|
// If the inlining was successful, Merge the new uses into the source node.
|
|
|
|
useList.dropCallUses(it.sourceNode, call.getOperation(), cg);
|
|
|
|
useList.mergeUsesAfterInlining(it.targetNode, it.sourceNode);
|
2019-10-04 14:04:56 +08:00
|
|
|
|
2020-03-19 04:10:13 +08:00
|
|
|
// then erase the call.
|
2019-10-04 14:04:56 +08:00
|
|
|
call.erase();
|
2020-03-19 04:10:13 +08:00
|
|
|
|
|
|
|
// If we inlined in place, mark the node for deletion.
|
|
|
|
if (inlineInPlace) {
|
|
|
|
useList.eraseNode(it.targetNode);
|
|
|
|
deadNodes.push_back(it.targetNode);
|
|
|
|
}
|
2019-10-04 14:04:56 +08:00
|
|
|
}
|
2020-03-19 04:10:13 +08:00
|
|
|
|
|
|
|
for (CallGraphNode *node : deadNodes)
|
|
|
|
deleteNode(node, useList, cg, currentSCC);
|
2019-10-04 14:04:56 +08:00
|
|
|
calls.clear();
|
2019-10-11 03:12:39 +08:00
|
|
|
return success(inlinedAnyCalls);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Canonicalize the nodes within the given SCC with the given set of
|
|
|
|
/// canonicalization patterns.
|
2020-03-19 04:10:13 +08:00
|
|
|
static void canonicalizeSCC(CallGraph &cg, CGUseList &useList,
|
|
|
|
MutableArrayRef<CallGraphNode *> currentSCC,
|
2019-10-11 03:12:39 +08:00
|
|
|
MLIRContext *context,
|
|
|
|
const OwningRewritePatternList &canonPatterns) {
|
|
|
|
// Collect the sets of nodes to canonicalize.
|
|
|
|
SmallVector<CallGraphNode *, 4> nodesToCanonicalize;
|
|
|
|
for (auto *node : currentSCC) {
|
|
|
|
// Don't canonicalize the external node, it has no valid callable region.
|
|
|
|
if (node->isExternal())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Don't canonicalize nodes with children. Nodes with children
|
|
|
|
// require special handling as we may remove the node during
|
|
|
|
// canonicalization. In the future, we should be able to handle this
|
|
|
|
// case with proper node deletion tracking.
|
|
|
|
if (node->hasChildren())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// We also won't apply canonicalizations for nodes that are not
|
|
|
|
// isolated. This avoids potentially mutating the regions of nodes defined
|
2020-04-16 16:30:11 +08:00
|
|
|
// above, this is also a stipulation of the 'applyPatternsAndFoldGreedily'
|
|
|
|
// driver.
|
2019-10-11 03:12:39 +08:00
|
|
|
auto *region = node->getCallableRegion();
|
|
|
|
if (!region->getParentOp()->isKnownIsolatedFromAbove())
|
|
|
|
continue;
|
|
|
|
nodesToCanonicalize.push_back(node);
|
|
|
|
}
|
|
|
|
if (nodesToCanonicalize.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Canonicalize each of the nodes within the SCC in parallel.
|
|
|
|
// NOTE: This is simple now, because we don't enable canonicalizing nodes
|
|
|
|
// within children. When we remove this restriction, this logic will need to
|
|
|
|
// be reworked.
|
2020-05-03 03:28:57 +08:00
|
|
|
if (context->isMultithreadingEnabled()) {
|
|
|
|
ParallelDiagnosticHandler canonicalizationHandler(context);
|
[Support] Move LLD's parallel algorithm wrappers to support
Essentially takes the lld/Common/Threads.h wrappers and moves them to
the llvm/Support/Paralle.h algorithm header.
The changes are:
- Remove policy parameter, since all clients use `par`.
- Rename the methods to `parallelSort` etc to match LLVM style, since
they are no longer C++17 pstl compatible.
- Move algorithms from llvm::parallel:: to llvm::, since they have
"parallel" in the name and are no longer overloads of the regular
algorithms.
- Add range overloads
- Use the sequential algorithm directly when 1 thread is requested
(skips task grouping)
- Fix the index type of parallelForEachN to size_t. Nobody in LLVM was
using any other parameter, and it made overload resolution hard for
for_each_n(par, 0, foo.size(), ...) because 0 is int, not size_t.
Remove Threads.h and update LLD for that.
This is a prerequisite for parallel public symbol processing in the PDB
library, which is in LLVM.
Reviewed By: MaskRay, aganea
Differential Revision: https://reviews.llvm.org/D79390
2020-05-05 11:03:19 +08:00
|
|
|
llvm::parallelForEachN(
|
|
|
|
/*Begin=*/0, /*End=*/nodesToCanonicalize.size(), [&](size_t index) {
|
2020-05-03 03:28:57 +08:00
|
|
|
// Set the order for this thread so that diagnostics will be properly
|
|
|
|
// ordered.
|
|
|
|
canonicalizationHandler.setOrderIDForThread(index);
|
|
|
|
|
|
|
|
// Apply the canonicalization patterns to this region.
|
|
|
|
auto *node = nodesToCanonicalize[index];
|
|
|
|
applyPatternsAndFoldGreedily(*node->getCallableRegion(),
|
|
|
|
canonPatterns);
|
|
|
|
|
|
|
|
// Make sure to reset the order ID for the diagnostic handler, as this
|
|
|
|
// thread may be used in a different context.
|
|
|
|
canonicalizationHandler.eraseOrderIDForThread();
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
for (CallGraphNode *node : nodesToCanonicalize)
|
|
|
|
applyPatternsAndFoldGreedily(*node->getCallableRegion(), canonPatterns);
|
|
|
|
}
|
2020-03-19 04:10:13 +08:00
|
|
|
|
|
|
|
// Recompute the uses held by each of the nodes.
|
|
|
|
for (CallGraphNode *node : nodesToCanonicalize)
|
|
|
|
useList.recomputeUses(node, cg);
|
2019-10-11 03:12:39 +08:00
|
|
|
}
|
|
|
|
|
2020-04-09 03:57:02 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// InlinerPass
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
struct InlinerPass : public InlinerBase<InlinerPass> {
|
|
|
|
void runOnOperation() override;
|
|
|
|
|
|
|
|
/// Attempt to inline calls within the given scc, and run canonicalizations
|
|
|
|
/// with the given patterns, until a fixed point is reached. This allows for
|
|
|
|
/// the inlining of newly devirtualized calls.
|
|
|
|
void inlineSCC(Inliner &inliner, CGUseList &useList,
|
|
|
|
MutableArrayRef<CallGraphNode *> currentSCC,
|
|
|
|
MLIRContext *context,
|
|
|
|
const OwningRewritePatternList &canonPatterns);
|
|
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
void InlinerPass::runOnOperation() {
|
|
|
|
CallGraph &cg = getAnalysis<CallGraph>();
|
|
|
|
auto *context = &getContext();
|
|
|
|
|
|
|
|
// The inliner should only be run on operations that define a symbol table,
|
|
|
|
// as the callgraph will need to resolve references.
|
|
|
|
Operation *op = getOperation();
|
|
|
|
if (!op->hasTrait<OpTrait::SymbolTable>()) {
|
|
|
|
op->emitOpError() << " was scheduled to run under the inliner, but does "
|
|
|
|
"not define a symbol table";
|
|
|
|
return signalPassFailure();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Collect a set of canonicalization patterns to use when simplifying
|
|
|
|
// callable regions within an SCC.
|
|
|
|
OwningRewritePatternList canonPatterns;
|
|
|
|
for (auto *op : context->getRegisteredOperations())
|
|
|
|
op->getCanonicalizationPatterns(canonPatterns, context);
|
|
|
|
|
|
|
|
// Run the inline transform in post-order over the SCCs in the callgraph.
|
|
|
|
Inliner inliner(context, cg);
|
|
|
|
CGUseList useList(getOperation(), cg);
|
|
|
|
runTransformOnCGSCCs(cg, [&](MutableArrayRef<CallGraphNode *> scc) {
|
|
|
|
inlineSCC(inliner, useList, scc, context, canonPatterns);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
void InlinerPass::inlineSCC(Inliner &inliner, CGUseList &useList,
|
|
|
|
MutableArrayRef<CallGraphNode *> currentSCC,
|
|
|
|
MLIRContext *context,
|
|
|
|
const OwningRewritePatternList &canonPatterns) {
|
2019-10-11 03:12:39 +08:00
|
|
|
// If we successfully inlined any calls, run some simplifications on the
|
|
|
|
// nodes of the scc. Continue attempting to inline until we reach a fixed
|
|
|
|
// point, or a maximum iteration count. We canonicalize here as it may
|
|
|
|
// devirtualize new calls, as well as give us a better cost model.
|
|
|
|
unsigned iterationCount = 0;
|
2020-03-19 04:10:13 +08:00
|
|
|
while (succeeded(inlineCallsInSCC(inliner, useList, currentSCC))) {
|
2019-10-11 03:12:39 +08:00
|
|
|
// If we aren't allowing simplifications or the max iteration count was
|
|
|
|
// reached, then bail out early.
|
|
|
|
if (disableCanonicalization || ++iterationCount >= maxInliningIterations)
|
|
|
|
break;
|
2020-03-19 04:10:13 +08:00
|
|
|
canonicalizeSCC(inliner.cg, useList, currentSCC, context, canonPatterns);
|
2019-10-11 03:12:39 +08:00
|
|
|
}
|
2019-10-04 14:04:56 +08:00
|
|
|
}
|
|
|
|
|
2019-10-17 03:08:55 +08:00
|
|
|
std::unique_ptr<Pass> mlir::createInlinerPass() {
|
|
|
|
return std::make_unique<InlinerPass>();
|
|
|
|
}
|