Added an option to reverse original basic blocks order.

Summary:
Modified processing of "-reorder-blocks=" option and added an option
to reverse original basic blocks order for testing purposes.

(cherry picked from FBD2829862)
This commit is contained in:
Maksim Panchenko 2016-01-13 17:19:40 -08:00
parent c9b7e3e09e
commit d9536e6092
3 changed files with 76 additions and 46 deletions

View File

@ -933,13 +933,30 @@ bool BinaryFunction::fixCFIState() {
return true;
}
void BinaryFunction::optimizeLayout(HeuristicPriority Priority, bool Split) {
// Bail if no profiling information or if empty
if (getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE ||
BasicBlocksLayout.empty()) {
void BinaryFunction::modifyLayout(LayoutType Type, bool Split) {
if (BasicBlocksLayout.empty() || Type == LT_NONE)
return;
if (Type == LT_REVERSE) {
BasicBlockOrderType ReverseOrder;
auto FirstBB = BasicBlocksLayout.front();
ReverseOrder.push_back(FirstBB);
for(auto RBBI = BasicBlocksLayout.rbegin(); *RBBI != FirstBB; ++RBBI)
ReverseOrder.push_back(*RBBI);
BasicBlocksLayout.swap(ReverseOrder);
if (Split)
splitFunction();
fixBranches();
return;
}
// Cannot do optimal layout without profile.
if (getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE)
return;
// Work on optimal solution if problem is small enough
if (BasicBlocksLayout.size() <= FUNC_SIZE_THRESHOLD)
return solveOptimalLayout(Split);
@ -1062,14 +1079,14 @@ void BinaryFunction::optimizeLayout(HeuristicPriority Priority, bool Split) {
AvgFreq[I] = Freq;
}
switch(Priority) {
case HP_NONE: {
switch(Type) {
case LT_OPTIMIZE: {
for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
if (!Clusters[I].empty())
Order.push_back(I);
break;
}
case HP_BRANCH_PREDICTOR: {
case LT_OPTIMIZE_BRANCH: {
// Do a topological sort for clusters, prioritizing frequently-executed BBs
// during the traversal.
std::stack<uint32_t> Stack;
@ -1137,7 +1154,7 @@ void BinaryFunction::optimizeLayout(HeuristicPriority Priority, bool Split) {
});
break;
}
case HP_CACHE_UTILIZATION: {
case LT_OPTIMIZE_CACHE: {
// Order clusters based on average instruction execution frequency
for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
if (!Clusters[I].empty())
@ -1151,6 +1168,8 @@ void BinaryFunction::optimizeLayout(HeuristicPriority Priority, bool Split) {
break;
}
default:
llvm_unreachable("unexpected layout type");
}
BasicBlocksLayout.clear();
@ -1349,6 +1368,10 @@ void BinaryFunction::fixBranches() {
// Case 3a: If the taken branch goes to the next block in the new layout,
// invert this conditional branch logic so we can make this a fallthrough.
if (TBB == FT && !HotColdBorder) {
if (OldFT == nullptr) {
errs() << "FLO-ERROR: malfromed CFG for function " << getName()
<< " in basic block " << BB->getName() << '\n';
}
assert(OldFT != nullptr && "malformed CFG");
if (!MIA->reverseBranchCondition(*CondBranch, OldFT, BC.Ctx.get()))
llvm_unreachable("Target does not support reversing branches");

View File

@ -53,17 +53,24 @@ public:
Assembled, /// Function has been assembled in memory
};
// Choose which strategy should the block layout heuristic prioritize when
// facing conflicting goals.
enum HeuristicPriority : char {
HP_NONE = 0,
// HP_BRANCH_PREDICTOR is an implementation of what is suggested in Pettis'
// paper (PLDI '90) about block reordering, trying to minimize branch
// mispredictions.
HP_BRANCH_PREDICTOR,
// HP_CACHE_UTILIZATION pigbacks on the idea from Ispike paper (CGO '04)
// that suggests putting frequently executed chains first in the layout.
HP_CACHE_UTILIZATION,
/// Choose which strategy should the block layout heuristic prioritize when
/// facing conflicting goals.
enum LayoutType : char {
/// LT_NONE - do not change layout of basic blocks
LT_NONE = 0, /// no reordering
/// LT_REVERSE - reverse the order of basic blocks, meant for testing
/// purposes. The first basic block is left intact and the rest are
/// put in the reverse order.
LT_REVERSE,
/// LT_OPTIMIZE - optimize layout of basic blocks based on profile.
LT_OPTIMIZE,
/// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis'
/// paper (PLDI '90) about block reordering, trying to minimize branch
/// mispredictions.
LT_OPTIMIZE_BRANCH,
/// LT_OPTIMIZE_CACHE pigbacks on the idea from Ispike paper (CGO '04)
/// that suggests putting frequently executed chains first in the layout.
LT_OPTIMIZE_CACHE,
};
static constexpr uint64_t COUNT_NO_PROFILE =
@ -311,9 +318,9 @@ public:
FunctionNumber(++Count)
{}
/// Perform optimal code layout based on edge frequencies making necessary
/// adjustments to instructions at the end of basic blocks.
void optimizeLayout(HeuristicPriority Priority, bool Split);
/// Modify code layout making necessary adjustments to instructions at the
/// end of basic blocks.
void modifyLayout(LayoutType Type, bool Split);
/// Dynamic programming implementation for the TSP, applied to BB layout. Find
/// the optimal way to maximize weight during a path traversing all BBs. In

View File

@ -89,11 +89,29 @@ SplitFunctions("split-functions",
cl::desc("split functions into hot and cold distinct regions"),
cl::Optional);
static cl::opt<std::string> ReorderBlocks(
static cl::opt<BinaryFunction::LayoutType> ReorderBlocks(
"reorder-blocks",
cl::desc("redo basic block layout based on profiling data with a specific "
"priority (none, branch-predictor or cache)"),
cl::value_desc("priority"), cl::init("disable"));
cl::desc("change layout of basic blocks in a function"),
cl::init(BinaryFunction::LT_NONE),
cl::values(clEnumValN(BinaryFunction::LT_NONE,
"none",
"do not reorder basic blocks"),
clEnumValN(BinaryFunction::LT_REVERSE,
"reverse",
"layout blocks in reverse order"),
clEnumValN(BinaryFunction::LT_OPTIMIZE,
"normal",
"perform optimal layout based on profile"),
clEnumValN(BinaryFunction::LT_OPTIMIZE_BRANCH,
"branch-predictor",
"perform optimal layout prioritizing branch "
"predictions"),
clEnumValN(BinaryFunction::LT_OPTIMIZE_CACHE,
"cache",
"perform optimal layout prioritizing I-cache "
"behavior"),
clEnumValEnd));
static cl::opt<bool> AlignBlocks("align-blocks",
cl::desc("try to align BBs inserting nops"),
@ -665,15 +683,6 @@ void RewriteInstance::runOptimizationPasses() {
//
// FIXME: use real optimization passes.
bool NagUser = true;
if (opts::ReorderBlocks != "" &&
opts::ReorderBlocks != "disable" &&
opts::ReorderBlocks != "none" &&
opts::ReorderBlocks != "branch-predictor" &&
opts::ReorderBlocks != "cache") {
errs() << "FLO: Unrecognized block reordering priority \""
<< opts::ReorderBlocks << "\".\n";
exit(1);
}
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
@ -725,18 +734,9 @@ void RewriteInstance::runOptimizationPasses() {
Function.print(errs(), "after unreachable code elimination");
}
if (opts::ReorderBlocks != "disable") {
if (opts::ReorderBlocks != BinaryFunction::LT_NONE) {
bool ShouldSplit = ToSplit.find(BFI.first) != ToSplit.end();
if (opts::ReorderBlocks == "branch-predictor") {
BFI.second.optimizeLayout(BinaryFunction::HP_BRANCH_PREDICTOR,
ShouldSplit);
} else if (opts::ReorderBlocks == "cache") {
BFI.second.optimizeLayout(BinaryFunction::HP_CACHE_UTILIZATION,
ShouldSplit);
} else {
BFI.second.optimizeLayout(BinaryFunction::HP_NONE, ShouldSplit);
}
BFI.second.modifyLayout(opts::ReorderBlocks, ShouldSplit);
if (opts::PrintAll || opts::PrintReordered)
Function.print(errs(), "after reordering blocks");
}