llvm-project/llvm/lib/Target/WebAssembly/Relooper.cpp

986 lines
36 KiB
C++

//===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
///
/// \file
/// \brief This implements the Relooper algorithm. This implementation includes
/// optimizations added since the original academic paper [1] was published.
///
/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
/// Proceedings of the ACM international conference companion on Object
/// oriented programming systems languages and applications companion
/// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
/// http://doi.acm.org/10.1145/2048147.2048224
///
//===-------------------------------------------------------------------===//
#include "Relooper.h"
#include "WebAssembly.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cstring>
#include <cstdlib>
#include <functional>
#include <list>
#include <stack>
#include <string>
#define DEBUG_TYPE "relooper"
using namespace llvm;
using namespace Relooper;
static cl::opt<int> RelooperSplittingFactor(
"relooper-splitting-factor",
cl::desc(
"How much to discount code size when deciding whether to split a node"),
cl::init(5));
static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
"relooper-multiple-switch-threshold",
cl::desc(
"How many entries to allow in a multiple before we use a switch"),
cl::init(10));
static cl::opt<unsigned> RelooperNestingLimit(
"relooper-nesting-limit",
cl::desc(
"How much nesting is acceptable"),
cl::init(20));
namespace {
///
/// Implements the relooper algorithm for a function's blocks.
///
/// Implementation details: The Relooper instance has
/// ownership of the blocks and shapes, and frees them when done.
///
struct RelooperAlgorithm {
std::deque<Block *> Blocks;
std::deque<Shape *> Shapes;
Shape *Root;
bool MinSize;
int BlockIdCounter;
int ShapeIdCounter;
RelooperAlgorithm();
~RelooperAlgorithm();
void AddBlock(Block *New, int Id = -1);
// Calculates the shapes
void Calculate(Block *Entry);
// Sets us to try to minimize size
void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
};
struct RelooperAnalysis final : public FunctionPass {
static char ID;
RelooperAnalysis() : FunctionPass(ID) {}
const char *getPassName() const override { return "relooper"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
bool runOnFunction(Function &F) override;
};
}
// RelooperAnalysis
char RelooperAnalysis::ID = 0;
FunctionPass *llvm::createWebAssemblyRelooper() {
return new RelooperAnalysis();
}
bool RelooperAnalysis::runOnFunction(Function &F) {
DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n");
RelooperAlgorithm R;
// FIXME: remove duplication between relooper's and LLVM's BBs.
std::map<const BasicBlock *, Block *> BB2B;
std::map<const Block *, const BasicBlock *> B2BB;
for (const BasicBlock &BB : F) {
// FIXME: getName is wrong here, Code is meant to represent amount of code.
// FIXME: use BranchVarInit for switch.
Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr);
R.AddBlock(B);
assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice");
assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice");
BB2B[&BB] = B;
B2BB[B] = &BB;
}
for (Block *B : R.Blocks) {
const BasicBlock *BB = B2BB[B];
for (const BasicBlock *Successor : successors(BB))
// FIXME: add branch's Condition and Code below.
B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr);
}
R.Calculate(BB2B[&F.getEntryBlock()]);
return false; // Analysis passes don't modify anything.
}
// Helpers
typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
typedef std::list<Block *> BlockList;
template <class T, class U>
static bool contains(const T &container, const U &contained) {
return container.count(contained);
}
// Branch
Branch::Branch(const char *ConditionInit, const char *CodeInit)
: Ancestor(nullptr), Labeled(true) {
// FIXME: move from char* to LLVM data structures
Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
Code = CodeInit ? strdup(CodeInit) : nullptr;
}
Branch::~Branch() {
// FIXME: move from char* to LLVM data structures
free(static_cast<void *>(const_cast<char *>(Condition)));
free(static_cast<void *>(const_cast<char *>(Code)));
}
// Block
Block::Block(const char *CodeInit, const char *BranchVarInit)
: Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
// FIXME: move from char* to LLVM data structures
Code = strdup(CodeInit);
BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
}
Block::~Block() {
// FIXME: move from char* to LLVM data structures
free(static_cast<void *>(const_cast<char *>(Code)));
free(static_cast<void *>(const_cast<char *>(BranchVar)));
}
void Block::AddBranchTo(Block *Target, const char *Condition,
const char *Code) {
assert(!contains(BranchesOut, Target) &&
"cannot add more than one branch to the same target");
BranchesOut[Target] = make_unique<Branch>(Condition, Code);
}
// Relooper
RelooperAlgorithm::RelooperAlgorithm()
: Root(nullptr), MinSize(false), BlockIdCounter(1),
ShapeIdCounter(0) { // block ID 0 is reserved for clearings
}
RelooperAlgorithm::~RelooperAlgorithm() {
for (auto Curr : Blocks)
delete Curr;
for (auto Curr : Shapes)
delete Curr;
}
void RelooperAlgorithm::AddBlock(Block *New, int Id) {
New->Id = Id == -1 ? BlockIdCounter++ : Id;
Blocks.push_back(New);
}
struct RelooperRecursor {
RelooperAlgorithm *Parent;
RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
};
void RelooperAlgorithm::Calculate(Block *Entry) {
// Scan and optimize the input
struct PreOptimizer : public RelooperRecursor {
PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
BlockSet Live;
void FindLive(Block *Root) {
BlockList ToInvestigate;
ToInvestigate.push_back(Root);
while (!ToInvestigate.empty()) {
Block *Curr = ToInvestigate.front();
ToInvestigate.pop_front();
if (contains(Live, Curr))
continue;
Live.insert(Curr);
for (const auto &iter : Curr->BranchesOut)
ToInvestigate.push_back(iter.first);
}
}
// If a block has multiple entries but no exits, and it is small enough, it
// is useful to split it. A common example is a C++ function where
// everything ends up at a final exit block and does some RAII cleanup.
// Without splitting, we will be forced to introduce labelled loops to
// allow reaching the final block
void SplitDeadEnds() {
unsigned TotalCodeSize = 0;
for (const auto &Curr : Live) {
TotalCodeSize += strlen(Curr->Code);
}
BlockSet Splits;
BlockSet Removed;
for (const auto &Original : Live) {
if (Original->BranchesIn.size() <= 1 ||
!Original->BranchesOut.empty())
continue; // only dead ends, for now
if (contains(Original->BranchesOut, Original))
continue; // cannot split a looping node
if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
TotalCodeSize / RelooperSplittingFactor)
continue; // if splitting increases raw code size by a significant
// amount, abort
// Split the node (for simplicity, we replace all the blocks, even
// though we could have reused the original)
DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n");
for (const auto &Prior : Original->BranchesIn) {
Block *Split = new Block(Original->Code, Original->BranchVar);
Parent->AddBlock(Split, Original->Id);
Split->BranchesIn.insert(Prior);
std::unique_ptr<Branch> Details;
Details.swap(Prior->BranchesOut[Original]);
Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
Details->Code);
for (const auto &iter : Original->BranchesOut) {
Block *Post = iter.first;
Branch *Details = iter.second.get();
Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
Details->Code);
Post->BranchesIn.insert(Split);
}
Splits.insert(Split);
Removed.insert(Original);
}
for (const auto &iter : Original->BranchesOut) {
Block *Post = iter.first;
Post->BranchesIn.remove(Original);
}
}
for (const auto &iter : Splits)
Live.insert(iter);
for (const auto &iter : Removed)
Live.remove(iter);
}
};
PreOptimizer Pre(this);
Pre.FindLive(Entry);
// Add incoming branches from live blocks, ignoring dead code
for (unsigned i = 0; i < Blocks.size(); i++) {
Block *Curr = Blocks[i];
if (!contains(Pre.Live, Curr))
continue;
for (const auto &iter : Curr->BranchesOut)
iter.first->BranchesIn.insert(Curr);
}
if (!MinSize)
Pre.SplitDeadEnds();
// Recursively process the graph
struct Analyzer : public RelooperRecursor {
Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
// Add a shape to the list of shapes in this Relooper calculation
void Notice(Shape *New) {
New->Id = Parent->ShapeIdCounter++;
Parent->Shapes.push_back(New);
}
// Create a list of entries from a block. If LimitTo is provided, only
// results in that set will appear
void GetBlocksOut(Block *Source, BlockSet &Entries,
BlockSet *LimitTo = nullptr) {
for (const auto &iter : Source->BranchesOut)
if (!LimitTo || contains(*LimitTo, iter.first))
Entries.insert(iter.first);
}
// Converts/processes all branchings to a specific target
void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
BlockSet &From) {
DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type
<< "\n");
for (auto iter = Target->BranchesIn.begin();
iter != Target->BranchesIn.end();) {
Block *Prior = *iter;
if (!contains(From, Prior)) {
iter++;
continue;
}
std::unique_ptr<Branch> PriorOut;
PriorOut.swap(Prior->BranchesOut[Target]);
PriorOut->Ancestor = Ancestor;
PriorOut->Type = Type;
if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
Multiple->Breaks++; // We are breaking out of this Multiple, so need a
// loop
iter++; // carefully increment iter before erasing
Target->BranchesIn.remove(Prior);
Target->ProcessedBranchesIn.insert(Prior);
Prior->ProcessedBranchesOut[Target].swap(PriorOut);
}
}
Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n");
SimpleShape *Simple = new SimpleShape;
Notice(Simple);
Simple->Inner = Inner;
Inner->Parent = Simple;
if (Blocks.size() > 1) {
Blocks.remove(Inner);
GetBlocksOut(Inner, NextEntries, &Blocks);
BlockSet JustInner;
JustInner.insert(Inner);
for (const auto &iter : NextEntries)
Solipsize(iter, Branch::Direct, Simple, JustInner);
}
return Simple;
}
Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
BlockSet &NextEntries) {
// Find the inner blocks in this loop. Proceed backwards from the entries
// until
// you reach a seen block, collecting as you go.
BlockSet InnerBlocks;
BlockSet Queue = Entries;
while (!Queue.empty()) {
Block *Curr = *(Queue.begin());
Queue.remove(*Queue.begin());
if (!contains(InnerBlocks, Curr)) {
// This element is new, mark it as inner and remove from outer
InnerBlocks.insert(Curr);
Blocks.remove(Curr);
// Add the elements prior to it
for (const auto &iter : Curr->BranchesIn)
Queue.insert(iter);
}
}
assert(!InnerBlocks.empty());
for (const auto &Curr : InnerBlocks) {
for (const auto &iter : Curr->BranchesOut) {
Block *Possible = iter.first;
if (!contains(InnerBlocks, Possible))
NextEntries.insert(Possible);
}
}
LoopShape *Loop = new LoopShape();
Notice(Loop);
// Solipsize the loop, replacing with break/continue and marking branches
// as Processed (will not affect later calculations)
// A. Branches to the loop entries become a continue to this shape
for (const auto &iter : Entries)
Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
// B. Branches to outside the loop (a next entry) become breaks on this
// shape
for (const auto &iter : NextEntries)
Solipsize(iter, Branch::Break, Loop, InnerBlocks);
// Finish up
Shape *Inner = Process(InnerBlocks, Entries, nullptr);
Loop->Inner = Inner;
return Loop;
}
// For each entry, find the independent group reachable by it. The
// independent group is the entry itself, plus all the blocks it can
// reach that cannot be directly reached by another entry. Note that we
// ignore directly reaching the entry itself by another entry.
// @param Ignore - previous blocks that are irrelevant
void FindIndependentGroups(BlockSet &Entries,
BlockBlockSetMap &IndependentGroups,
BlockSet *Ignore = nullptr) {
typedef std::map<Block *, Block *> BlockBlockMap;
struct HelperClass {
BlockBlockSetMap &IndependentGroups;
BlockBlockMap Ownership; // For each block, which entry it belongs to.
// We have reached it from there.
HelperClass(BlockBlockSetMap &IndependentGroupsInit)
: IndependentGroups(IndependentGroupsInit) {}
void InvalidateWithChildren(Block *New) {
// Being in the list means you need to be invalidated
BlockList ToInvalidate;
ToInvalidate.push_back(New);
while (!ToInvalidate.empty()) {
Block *Invalidatee = ToInvalidate.front();
ToInvalidate.pop_front();
Block *Owner = Ownership[Invalidatee];
// Owner may have been invalidated, do not add to
// IndependentGroups!
if (contains(IndependentGroups, Owner))
IndependentGroups[Owner].remove(Invalidatee);
if (Ownership[Invalidatee]) { // may have been seen before and
// invalidated already
Ownership[Invalidatee] = nullptr;
for (const auto &iter : Invalidatee->BranchesOut) {
Block *Target = iter.first;
BlockBlockMap::iterator Known = Ownership.find(Target);
if (Known != Ownership.end()) {
Block *TargetOwner = Known->second;
if (TargetOwner)
ToInvalidate.push_back(Target);
}
}
}
}
}
};
HelperClass Helper(IndependentGroups);
// We flow out from each of the entries, simultaneously.
// When we reach a new block, we add it as belonging to the one we got to
// it from.
// If we reach a new block that is already marked as belonging to someone,
// it is reachable by two entries and is not valid for any of them.
// Remove it and all it can reach that have been visited.
// Being in the queue means we just added this item, and
// we need to add its children
BlockList Queue;
for (const auto &Entry : Entries) {
Helper.Ownership[Entry] = Entry;
IndependentGroups[Entry].insert(Entry);
Queue.push_back(Entry);
}
while (!Queue.empty()) {
Block *Curr = Queue.front();
Queue.pop_front();
Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
// map if we are in the queue
if (!Owner)
continue; // we have been invalidated meanwhile after being reached
// from two entries
// Add all children
for (const auto &iter : Curr->BranchesOut) {
Block *New = iter.first;
BlockBlockMap::iterator Known = Helper.Ownership.find(New);
if (Known == Helper.Ownership.end()) {
// New node. Add it, and put it in the queue
Helper.Ownership[New] = Owner;
IndependentGroups[Owner].insert(New);
Queue.push_back(New);
continue;
}
Block *NewOwner = Known->second;
if (!NewOwner)
continue; // We reached an invalidated node
if (NewOwner != Owner)
// Invalidate this and all reachable that we have seen - we reached
// this from two locations
Helper.InvalidateWithChildren(New);
// otherwise, we have the same owner, so do nothing
}
}
// Having processed all the interesting blocks, we remain with just one
// potential issue:
// If a->b, and a was invalidated, but then b was later reached by
// someone else, we must invalidate b. To check for this, we go over all
// elements in the independent groups, if an element has a parent which
// does *not* have the same owner, we/ must remove it and all its
// children.
for (const auto &iter : Entries) {
BlockSet &CurrGroup = IndependentGroups[iter];
BlockList ToInvalidate;
for (const auto &iter : CurrGroup) {
Block *Child = iter;
for (const auto &iter : Child->BranchesIn) {
Block *Parent = iter;
if (Ignore && contains(*Ignore, Parent))
continue;
if (Helper.Ownership[Parent] != Helper.Ownership[Child])
ToInvalidate.push_back(Child);
}
}
while (!ToInvalidate.empty()) {
Block *Invalidatee = ToInvalidate.front();
ToInvalidate.pop_front();
Helper.InvalidateWithChildren(Invalidatee);
}
}
// Remove empty groups
for (const auto &iter : Entries)
if (IndependentGroups[iter].empty())
IndependentGroups.erase(iter);
}
Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
BlockBlockSetMap &IndependentGroups, Shape *Prev,
BlockSet &NextEntries) {
bool Fused = isa<SimpleShape>(Prev);
MultipleShape *Multiple = new MultipleShape();
Notice(Multiple);
BlockSet CurrEntries;
for (auto &iter : IndependentGroups) {
Block *CurrEntry = iter.first;
BlockSet &CurrBlocks = iter.second;
// Create inner block
CurrEntries.clear();
CurrEntries.insert(CurrEntry);
for (const auto &CurrInner : CurrBlocks) {
// Remove the block from the remaining blocks
Blocks.remove(CurrInner);
// Find new next entries and fix branches to them
for (auto iter = CurrInner->BranchesOut.begin();
iter != CurrInner->BranchesOut.end();) {
Block *CurrTarget = iter->first;
auto Next = iter;
Next++;
if (!contains(CurrBlocks, CurrTarget)) {
NextEntries.insert(CurrTarget);
Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
}
iter = Next; // increment carefully because Solipsize can remove us
}
}
Multiple->InnerMap[CurrEntry->Id] =
Process(CurrBlocks, CurrEntries, nullptr);
// If we are not fused, then our entries will actually be checked
if (!Fused)
CurrEntry->IsCheckedMultipleEntry = true;
}
// Add entries not handled as next entries, they are deferred
for (const auto &Entry : Entries)
if (!contains(IndependentGroups, Entry))
NextEntries.insert(Entry);
// The multiple has been created, we can decide how to implement it
if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
Multiple->UseSwitch = true;
Multiple->Breaks++; // switch captures breaks
}
return Multiple;
}
// Main function.
// Process a set of blocks with specified entries, returns a shape
// The Make* functions receive a NextEntries. If they fill it with data,
// those are the entries for the ->Next block on them, and the blocks
// are what remains in Blocks (which Make* modify). In this way
// we avoid recursing on Next (imagine a long chain of Simples, if we
// recursed we could blow the stack).
Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
BlockSet *Entries = &InitialEntries;
BlockSet TempEntries[2];
int CurrTempIndex = 0;
BlockSet *NextEntries;
Shape *Ret = nullptr;
auto Make = [&](Shape *Temp) {
if (Prev)
Prev->Next = Temp;
if (!Ret)
Ret = Temp;
Prev = Temp;
Entries = NextEntries;
};
while (1) {
CurrTempIndex = 1 - CurrTempIndex;
NextEntries = &TempEntries[CurrTempIndex];
NextEntries->clear();
if (Entries->empty())
return Ret;
if (Entries->size() == 1) {
Block *Curr = *(Entries->begin());
if (Curr->BranchesIn.empty()) {
// One entry, no looping ==> Simple
Make(MakeSimple(Blocks, Curr, *NextEntries));
if (NextEntries->empty())
return Ret;
continue;
}
// One entry, looping ==> Loop
Make(MakeLoop(Blocks, *Entries, *NextEntries));
if (NextEntries->empty())
return Ret;
continue;
}
// More than one entry, try to eliminate through a Multiple groups of
// independent blocks from an entry/ies. It is important to remove
// through multiples as opposed to looping since the former is more
// performant.
BlockBlockSetMap IndependentGroups;
FindIndependentGroups(*Entries, IndependentGroups);
if (!IndependentGroups.empty()) {
// We can handle a group in a multiple if its entry cannot be reached
// by another group.
// Note that it might be reachable by itself - a loop. But that is
// fine, we will create a loop inside the multiple block (which
// is the performant order to do it).
for (auto iter = IndependentGroups.begin();
iter != IndependentGroups.end();) {
Block *Entry = iter->first;
BlockSet &Group = iter->second;
auto curr = iter++; // iterate carefully, we may delete
for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
iterBranch != Entry->BranchesIn.end(); iterBranch++) {
Block *Origin = *iterBranch;
if (!contains(Group, Origin)) {
// Reached from outside the group, so we cannot handle this
IndependentGroups.erase(curr);
break;
}
}
}
// As an optimization, if we have 2 independent groups, and one is a
// small dead end, we can handle only that dead end.
// The other then becomes a Next - without nesting in the code and
// recursion in the analysis.
// TODO: if the larger is the only dead end, handle that too
// TODO: handle >2 groups
// TODO: handle not just dead ends, but also that do not branch to the
// NextEntries. However, must be careful there since we create a
// Next, and that Next can prevent eliminating a break (since we no
// longer naturally reach the same place), which may necessitate a
// one-time loop, which makes the unnesting pointless.
if (IndependentGroups.size() == 2) {
// Find the smaller one
auto iter = IndependentGroups.begin();
Block *SmallEntry = iter->first;
auto SmallSize = iter->second.size();
iter++;
Block *LargeEntry = iter->first;
auto LargeSize = iter->second.size();
if (SmallSize != LargeSize) { // ignore the case where they are
// identical - keep things symmetrical
// there
if (SmallSize > LargeSize) {
Block *Temp = SmallEntry;
SmallEntry = LargeEntry;
LargeEntry = Temp; // Note: we did not flip the Sizes too, they
// are now invalid. TODO: use the smaller
// size as a limit?
}
// Check if dead end
bool DeadEnd = true;
BlockSet &SmallGroup = IndependentGroups[SmallEntry];
for (const auto &Curr : SmallGroup) {
for (const auto &iter : Curr->BranchesOut) {
Block *Target = iter.first;
if (!contains(SmallGroup, Target)) {
DeadEnd = false;
break;
}
}
if (!DeadEnd)
break;
}
if (DeadEnd)
IndependentGroups.erase(LargeEntry);
}
}
if (!IndependentGroups.empty())
// Some groups removable ==> Multiple
Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
*NextEntries));
if (NextEntries->empty())
return Ret;
continue;
}
// No independent groups, must be loopable ==> Loop
Make(MakeLoop(Blocks, *Entries, *NextEntries));
if (NextEntries->empty())
return Ret;
continue;
}
}
};
// Main
BlockSet AllBlocks;
for (const auto &Curr : Pre.Live) {
AllBlocks.insert(Curr);
}
BlockSet Entries;
Entries.insert(Entry);
Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
assert(Root);
///
/// Relooper post-optimizer
///
struct PostOptimizer {
RelooperAlgorithm *Parent;
std::stack<Shape *> LoopStack;
PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
void ShapeSwitch(Shape* var,
std::function<void (SimpleShape*)> simple,
std::function<void (MultipleShape*)> multiple,
std::function<void (LoopShape*)> loop) {
switch (var->getKind()) {
case Shape::SK_Simple: {
simple(cast<SimpleShape>(var));
break;
}
case Shape::SK_Multiple: {
multiple(cast<MultipleShape>(var));
break;
}
case Shape::SK_Loop: {
loop(cast<LoopShape>(var));
break;
}
default: llvm_unreachable("invalid shape");
}
}
// Find the blocks that natural control flow can get us directly to, or
// through a multiple that we ignore
void FollowNaturalFlow(Shape *S, BlockSet &Out) {
ShapeSwitch(S, [&](SimpleShape* Simple) {
Out.insert(Simple->Inner);
}, [&](MultipleShape* Multiple) {
for (const auto &iter : Multiple->InnerMap) {
FollowNaturalFlow(iter.second, Out);
}
FollowNaturalFlow(Multiple->Next, Out);
}, [&](LoopShape* Loop) {
FollowNaturalFlow(Loop->Inner, Out);
});
}
void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
if (Root->Next) {
Root->Natural = Root->Next;
FindNaturals(Root->Next, Otherwise);
} else {
Root->Natural = Otherwise;
}
ShapeSwitch(Root, [](SimpleShape* Simple) {
}, [&](MultipleShape* Multiple) {
for (const auto &iter : Multiple->InnerMap) {
FindNaturals(iter.second, Root->Natural);
}
}, [&](LoopShape* Loop){
FindNaturals(Loop->Inner, Loop->Inner);
});
}
// Remove unneeded breaks and continues.
// A flow operation is trivially unneeded if the shape we naturally get to
// by normal code execution is the same as the flow forces us to.
void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
LoopShape *LastLoop = nullptr,
unsigned Depth = 0) {
BlockSet NaturalBlocks;
FollowNaturalFlow(Natural, NaturalBlocks);
Shape *Next = Root;
while (Next) {
Root = Next;
Next = nullptr;
ShapeSwitch(
Root,
[&](SimpleShape* Simple) {
if (Simple->Inner->BranchVar)
LastLoop =
nullptr; // a switch clears out the loop (TODO: only for
// breaks, not continue)
if (Simple->Next) {
if (!Simple->Inner->BranchVar &&
Simple->Inner->ProcessedBranchesOut.size() == 2 &&
Depth < RelooperNestingLimit) {
// If there is a next block, we already know at Simple
// creation time to make direct branches, and we can do
// nothing more in general. But, we try to optimize the
// case of a break and a direct: This would normally be
// if (break?) { break; } ..
// but if we make sure to nest the else, we can save the
// break,
// if (!break?) { .. }
// This is also better because the more canonical nested
// form is easier to further optimize later. The
// downside is more nesting, which adds to size in builds with
// whitespace.
// Note that we avoid switches, as it complicates control flow
// and is not relevant for the common case we optimize here.
bool Found = false;
bool Abort = false;
for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
Block *Target = iter.first;
Branch *Details = iter.second.get();
if (Details->Type == Branch::Break) {
Found = true;
if (!contains(NaturalBlocks, Target))
Abort = true;
} else if (Details->Type != Branch::Direct)
Abort = true;
}
if (Found && !Abort) {
for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
Branch *Details = iter.second.get();
if (Details->Type == Branch::Break) {
Details->Type = Branch::Direct;
if (MultipleShape *Multiple =
dyn_cast<MultipleShape>(Details->Ancestor))
Multiple->Breaks--;
} else {
assert(Details->Type == Branch::Direct);
Details->Type = Branch::Nested;
}
}
}
Depth++; // this optimization increases depth, for us and all
// our next chain (i.e., until this call returns)
}
Next = Simple->Next;
} else {
// If there is no next then Natural is where we will
// go to by doing nothing, so we can potentially optimize some
// branches to direct.
for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
Block *Target = iter.first;
Branch *Details = iter.second.get();
if (Details->Type != Branch::Direct &&
contains(NaturalBlocks,
Target)) { // note: cannot handle split blocks
Details->Type = Branch::Direct;
if (MultipleShape *Multiple =
dyn_cast<MultipleShape>(Details->Ancestor))
Multiple->Breaks--;
} else if (Details->Type == Branch::Break && LastLoop &&
LastLoop->Natural == Details->Ancestor->Natural) {
// it is important to simplify breaks, as simpler breaks
// enable other optimizations
Details->Labeled = false;
if (MultipleShape *Multiple =
dyn_cast<MultipleShape>(Details->Ancestor))
Multiple->Breaks--;
}
}
}
}, [&](MultipleShape* Multiple)
{
for (const auto &iter : Multiple->InnerMap) {
RemoveUnneededFlows(iter.second, Multiple->Next,
Multiple->Breaks ? nullptr : LastLoop,
Depth + 1);
}
Next = Multiple->Next;
}, [&](LoopShape* Loop)
{
RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
Next = Loop->Next;
});
}
}
// After we know which loops exist, we can calculate which need to be
// labeled
void FindLabeledLoops(Shape *Root) {
Shape *Next = Root;
while (Next) {
Root = Next;
Next = nullptr;
ShapeSwitch(
Root,
[&](SimpleShape *Simple) {
MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
// If we are fusing a Multiple with a loop into this Simple, then
// visit it now
if (Fused && Fused->Breaks)
LoopStack.push(Fused);
if (Simple->Inner->BranchVar)
LoopStack.push(nullptr); // a switch means breaks are now useless,
// push a dummy
if (Fused) {
if (Fused->UseSwitch)
LoopStack.push(nullptr); // a switch means breaks are now
// useless, push a dummy
for (const auto &iter : Fused->InnerMap) {
FindLabeledLoops(iter.second);
}
}
for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
Branch *Details = iter.second.get();
if (Details->Type == Branch::Break ||
Details->Type == Branch::Continue) {
assert(!LoopStack.empty());
if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
if (MultipleShape *Multiple =
dyn_cast<MultipleShape>(Details->Ancestor)) {
Multiple->Labeled = true;
} else {
LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
Loop->Labeled = true;
}
} else {
Details->Labeled = false;
}
}
if (Fused && Fused->UseSwitch)
LoopStack.pop();
if (Simple->Inner->BranchVar)
LoopStack.pop();
if (Fused && Fused->Breaks)
LoopStack.pop();
if (Fused)
Next = Fused->Next;
else
Next = Root->Next;
}
}
, [&](MultipleShape* Multiple) {
if (Multiple->Breaks)
LoopStack.push(Multiple);
for (const auto &iter : Multiple->InnerMap)
FindLabeledLoops(iter.second);
if (Multiple->Breaks)
LoopStack.pop();
Next = Root->Next;
}
, [&](LoopShape* Loop) {
LoopStack.push(Loop);
FindLabeledLoops(Loop->Inner);
LoopStack.pop();
Next = Root->Next;
});
}
}
void Process(Shape * Root) {
FindNaturals(Root);
RemoveUnneededFlows(Root);
FindLabeledLoops(Root);
}
};
PostOptimizer(this).Process(Root);
}