Teach llvm-flo how to reorder basic blocks with a heuristic

Summary:
This patch introduces a first approach to reorder basic blocks based on
profiling data that gives us the execution frequency for each edge. Our strategy
is to layout basic blocks in a order that maximizes the weight (hotness) of
branches that will be deleted. We can delete branches when src comes right
before dst in the new layout order. This can be reduced to the TSP problem. This
patch uses a greedy heuristic to solve the problem: we start with a graph with
no edges and progressively add edges by choosing the hottest edges first,
building a layout order that attempts to put BBs with hot edges together.

(cherry picked from FBD2544076)
This commit is contained in:
Rafael Auler 2015-10-13 12:18:54 -07:00 committed by Maksim Panchenko
parent 9b58b2e64b
commit 34f7085503
3 changed files with 182 additions and 6 deletions

View File

@ -20,6 +20,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
#include <queue>
#include <string>
#include "BinaryBasicBlock.h"
@ -387,6 +388,11 @@ bool BinaryFunction::buildCFG() {
}
}
// Set the basic block layout to the original order
for (auto &BB : BasicBlocks) {
BasicBlocksLayout.emplace_back(&BB);
}
// Intermediate dump.
DEBUG(print(dbgs(), /* PrintInstructions = */ true));
@ -527,6 +533,156 @@ void BinaryFunction::inferFallThroughCounts() {
return;
}
void BinaryFunction::optimizeLayout(bool DumpLayout) {
// Bail if no profiling information
if (getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE) {
return;
}
if (DumpLayout) {
dbgs() << "running block layout heuristics on " << getName() << "\n";
}
// Greedy heuristic implementation for the "TSP problem", applied to BB
// layout. Try to maximize weight during a path traversing all BBs. In this
// way, we will convert the hottest branches into fall-throughs.
// Encode an edge between two basic blocks, source and destination
typedef std::pair<BinaryBasicBlock *, BinaryBasicBlock *> EdgeTy;
std::map<EdgeTy, uint64_t> Weight;
// Define a comparison function to establish SWO between edges
auto Comp = [&Weight](EdgeTy A, EdgeTy B) { return Weight[A] > Weight[B]; };
std::priority_queue<EdgeTy, std::vector<EdgeTy>, decltype(Comp)> Queue(Comp);
typedef std::vector<BinaryBasicBlock *> ClusterTy;
typedef std::map<BinaryBasicBlock *, int> BBToClusterMapTy;
std::vector<ClusterTy> Clusters;
BBToClusterMapTy BBToClusterMap;
// Populating priority queue with all edges
for (auto &BB : BasicBlocks) {
BBToClusterMap[&BB] = -1; // Mark as unmapped
auto BI = BB.BranchInfo.begin();
for (auto &I : BB.successors()) {
if (BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE)
Weight[std::make_pair(&BB, I)] = BI->Count;
Queue.push(std::make_pair(&BB, I));
++BI;
}
}
// Start a cluster with the entry point
BinaryBasicBlock *Entry = &*BasicBlocks.begin();
Clusters.emplace_back();
auto &EntryCluster = Clusters.back();
EntryCluster.push_back(Entry);
BBToClusterMap[Entry] = 0;
// Grow clusters in a greedy fashion
while (!Queue.empty()) {
auto elmt = Queue.top();
Queue.pop();
BinaryBasicBlock *BBSrc = elmt.first;
BinaryBasicBlock *BBDst = elmt.second;
int I = 0, J = 0;
// Case 1: BBSrc and BBDst are the same. Ignore this edge
if (BBSrc == BBDst)
continue;
// Case 2: Both BBSrc and BBDst are already allocated
if ((I = BBToClusterMap[BBSrc]) != -1 &&
(J = BBToClusterMap[BBDst]) != -1) {
auto &ClusterA = Clusters[I];
auto &ClusterB = Clusters[J];
if (ClusterA.back() == BBSrc && ClusterB.front() == BBDst) {
// Case 2a: BBSrc is at the end of a cluster and BBDst is at the start,
// allowing us to merge two clusters
for (auto BB : ClusterB)
BBToClusterMap[BB] = I;
ClusterA.insert(ClusterA.end(), ClusterB.begin(), ClusterB.end());
ClusterB.clear();
} else {
// Case 2b: Both BBSrc and BBDst are allocated in positions we cannot
// merge them, so we ignore this edge.
}
continue;
}
// Case 3: BBSrc is already allocated in a cluster
if ((I = BBToClusterMap[BBSrc]) != -1) {
auto &Cluster = Clusters[I];
if (Cluster.back() == BBSrc) {
// Case 3a: BBSrc is allocated at the end of this cluster. We put
// BBSrc and BBDst together.
Cluster.push_back(BBDst);
BBToClusterMap[BBDst] = I;
} else {
// Case 3b: We cannot put BBSrc and BBDst in consecutive positions,
// so we ignore this edge.
}
continue;
}
// Case 4: BBSrc is not in a cluster, but BBDst is
if ((I = BBToClusterMap[BBDst]) != -1) {
auto &Cluster = Clusters[I];
if (Cluster.front() == BBDst) {
// Case 4a: BBDst is allocated at the start of this cluster. We put
// BBSrc and BBDst together.
Cluster.insert(Cluster.begin(), BBSrc);
BBToClusterMap[BBSrc] = I;
} else {
// Case 4b: We cannot put BBSrc and BBDst in consecutive positions,
// so we ignore this edge.
}
continue;
}
// Case 5: Both BBSrc and BBDst are unallocated, so we create a new cluster
// with them
I = Clusters.size();
Clusters.emplace_back();
auto &Cluster = Clusters.back();
Cluster.push_back(BBSrc);
Cluster.push_back(BBDst);
BBToClusterMap[BBSrc] = I;
BBToClusterMap[BBDst] = I;
}
// Define final function layout based on clusters
BasicBlocksLayout.clear();
for (auto &Cluster : Clusters) {
BasicBlocksLayout.insert(BasicBlocksLayout.end(), Cluster.begin(),
Cluster.end());
}
// Finalize layout with BBs that weren't assigned to any cluster, preserving
// their relative order
for (auto &BB : BasicBlocks) {
if (BBToClusterMap[&BB] == -1)
BasicBlocksLayout.push_back(&BB);
}
if (DumpLayout) {
dbgs() << "original BB order is: ";
auto Sep = "";
for (auto &BB : BasicBlocks) {
dbgs() << Sep << BB.getName();
Sep = ",";
}
dbgs() << "\nnew order is: ";
Sep = "";
for (auto BB : BasicBlocksLayout) {
dbgs() << Sep << BB->getName();
Sep = ",";
}
dbgs() << "\n";
}
}
} // namespace flo
} // namespace llvm

View File

@ -143,9 +143,12 @@ private:
InstrMapType Instructions;
// Blocks are kept sorted in the layout order. If we need to change the
// layout, the terminating instructions need to be modified.
// layout (if BasicBlocksLayout stores a different order than BasicBlocks),
// the terminating instructions need to be modified.
using BasicBlockListType = std::vector<BinaryBasicBlock>;
using BasicBlockOrderType = std::vector<BinaryBasicBlock*>;
BasicBlockListType BasicBlocks;
BasicBlockOrderType BasicBlocksLayout;
public:
@ -153,6 +156,7 @@ public:
typedef BasicBlockListType::const_iterator const_iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
typedef std::reverse_iterator<iterator> reverse_iterator;
typedef BasicBlockOrderType::iterator order_iterator;
// CFG iterators.
iterator begin() { return BasicBlocks.begin(); }
@ -172,6 +176,10 @@ public:
const BinaryBasicBlock & back() const { return BasicBlocks.back(); }
BinaryBasicBlock & back() { return BasicBlocks.back(); }
inline iterator_range<order_iterator> layout() {
return iterator_range<order_iterator>(BasicBlocksLayout.begin(),
BasicBlocksLayout.end());
}
BinaryFunction(StringRef Name, SymbolRef Symbol, SectionRef Section,
uint64_t Address, uint64_t Size, BinaryContext &BC) :
@ -180,7 +188,7 @@ public:
/// Perform optimal code layout based on edge frequencies making necessary
/// adjustments to instructions at the end of basic blocks.
void optimizeLayout();
void optimizeLayout(bool DumpLayout);
/// View CFG in graphviz program
void viewGraph();

View File

@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
#include "DataReader.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
@ -50,6 +49,7 @@
#include "BinaryBasicBlock.h"
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#include <algorithm>
#include <map>
@ -83,6 +83,11 @@ EliminateUnreachable("eliminate-unreachable",
cl::desc("eliminate unreachable code"),
cl::Optional);
static cl::opt<bool>
ReorderBlocks("reorder-blocks",
cl::desc("redo basic block layout based on profiling data"),
cl::Optional);
static cl::opt<bool>
DumpData("dump-data", cl::desc("dump parsed flo data (debugging)"),
cl::Hidden);
@ -91,6 +96,10 @@ static cl::opt<bool>
DumpFunctions("dump-functions", cl::desc("dump parsed functions (debugging)"),
cl::Hidden);
static cl::opt<bool>
DumpLayout("dump-layout", cl::desc("dump parsed flo data (debugging)"),
cl::Hidden);
static StringRef ToolName;
static void report_error(StringRef Message, std::error_code EC) {
@ -456,6 +465,9 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
DEBUG(dbgs() << "*** After unreachable block elimination ***\n");
DEBUG(Function.print(dbgs(), /* PrintInstructions = */ true));
}
if (ReorderBlocks) {
BFI.second.optimizeLayout(DumpLayout);
}
}
std::error_code EC;
@ -539,9 +551,9 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
Streamer->EmitLabel(FunctionSymbol);
// Emit code.
for (const auto &BB : Function) {
Streamer->EmitLabel(BB.getLabel());
for (const auto &Instr : BB) {
for (auto BB : Function.layout()) {
Streamer->EmitLabel(BB->getLabel());
for (const auto &Instr : *BB) {
Streamer->EmitInstruction(Instr, *BC->STI);
}
}