forked from OSchip/llvm-project
Added an option to reverse original basic blocks order.
Summary: Modified processing of "-reorder-blocks=" option and added an option to reverse original basic blocks order for testing purposes. (cherry picked from FBD2829862)
This commit is contained in:
parent
c9b7e3e09e
commit
d9536e6092
|
@ -933,13 +933,30 @@ bool BinaryFunction::fixCFIState() {
|
|||
return true;
|
||||
}
|
||||
|
||||
void BinaryFunction::optimizeLayout(HeuristicPriority Priority, bool Split) {
|
||||
// Bail if no profiling information or if empty
|
||||
if (getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE ||
|
||||
BasicBlocksLayout.empty()) {
|
||||
void BinaryFunction::modifyLayout(LayoutType Type, bool Split) {
|
||||
if (BasicBlocksLayout.empty() || Type == LT_NONE)
|
||||
return;
|
||||
|
||||
if (Type == LT_REVERSE) {
|
||||
BasicBlockOrderType ReverseOrder;
|
||||
auto FirstBB = BasicBlocksLayout.front();
|
||||
ReverseOrder.push_back(FirstBB);
|
||||
for(auto RBBI = BasicBlocksLayout.rbegin(); *RBBI != FirstBB; ++RBBI)
|
||||
ReverseOrder.push_back(*RBBI);
|
||||
BasicBlocksLayout.swap(ReverseOrder);
|
||||
|
||||
if (Split)
|
||||
splitFunction();
|
||||
|
||||
fixBranches();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Cannot do optimal layout without profile.
|
||||
if (getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE)
|
||||
return;
|
||||
|
||||
// Work on optimal solution if problem is small enough
|
||||
if (BasicBlocksLayout.size() <= FUNC_SIZE_THRESHOLD)
|
||||
return solveOptimalLayout(Split);
|
||||
|
@ -1062,14 +1079,14 @@ void BinaryFunction::optimizeLayout(HeuristicPriority Priority, bool Split) {
|
|||
AvgFreq[I] = Freq;
|
||||
}
|
||||
|
||||
switch(Priority) {
|
||||
case HP_NONE: {
|
||||
switch(Type) {
|
||||
case LT_OPTIMIZE: {
|
||||
for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
|
||||
if (!Clusters[I].empty())
|
||||
Order.push_back(I);
|
||||
break;
|
||||
}
|
||||
case HP_BRANCH_PREDICTOR: {
|
||||
case LT_OPTIMIZE_BRANCH: {
|
||||
// Do a topological sort for clusters, prioritizing frequently-executed BBs
|
||||
// during the traversal.
|
||||
std::stack<uint32_t> Stack;
|
||||
|
@ -1137,7 +1154,7 @@ void BinaryFunction::optimizeLayout(HeuristicPriority Priority, bool Split) {
|
|||
});
|
||||
break;
|
||||
}
|
||||
case HP_CACHE_UTILIZATION: {
|
||||
case LT_OPTIMIZE_CACHE: {
|
||||
// Order clusters based on average instruction execution frequency
|
||||
for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
|
||||
if (!Clusters[I].empty())
|
||||
|
@ -1151,6 +1168,8 @@ void BinaryFunction::optimizeLayout(HeuristicPriority Priority, bool Split) {
|
|||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("unexpected layout type");
|
||||
}
|
||||
|
||||
BasicBlocksLayout.clear();
|
||||
|
@ -1349,6 +1368,10 @@ void BinaryFunction::fixBranches() {
|
|||
// Case 3a: If the taken branch goes to the next block in the new layout,
|
||||
// invert this conditional branch logic so we can make this a fallthrough.
|
||||
if (TBB == FT && !HotColdBorder) {
|
||||
if (OldFT == nullptr) {
|
||||
errs() << "FLO-ERROR: malfromed CFG for function " << getName()
|
||||
<< " in basic block " << BB->getName() << '\n';
|
||||
}
|
||||
assert(OldFT != nullptr && "malformed CFG");
|
||||
if (!MIA->reverseBranchCondition(*CondBranch, OldFT, BC.Ctx.get()))
|
||||
llvm_unreachable("Target does not support reversing branches");
|
||||
|
|
|
@ -53,17 +53,24 @@ public:
|
|||
Assembled, /// Function has been assembled in memory
|
||||
};
|
||||
|
||||
// Choose which strategy should the block layout heuristic prioritize when
|
||||
// facing conflicting goals.
|
||||
enum HeuristicPriority : char {
|
||||
HP_NONE = 0,
|
||||
// HP_BRANCH_PREDICTOR is an implementation of what is suggested in Pettis'
|
||||
// paper (PLDI '90) about block reordering, trying to minimize branch
|
||||
// mispredictions.
|
||||
HP_BRANCH_PREDICTOR,
|
||||
// HP_CACHE_UTILIZATION pigbacks on the idea from Ispike paper (CGO '04)
|
||||
// that suggests putting frequently executed chains first in the layout.
|
||||
HP_CACHE_UTILIZATION,
|
||||
/// Choose which strategy should the block layout heuristic prioritize when
|
||||
/// facing conflicting goals.
|
||||
enum LayoutType : char {
|
||||
/// LT_NONE - do not change layout of basic blocks
|
||||
LT_NONE = 0, /// no reordering
|
||||
/// LT_REVERSE - reverse the order of basic blocks, meant for testing
|
||||
/// purposes. The first basic block is left intact and the rest are
|
||||
/// put in the reverse order.
|
||||
LT_REVERSE,
|
||||
/// LT_OPTIMIZE - optimize layout of basic blocks based on profile.
|
||||
LT_OPTIMIZE,
|
||||
/// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis'
|
||||
/// paper (PLDI '90) about block reordering, trying to minimize branch
|
||||
/// mispredictions.
|
||||
LT_OPTIMIZE_BRANCH,
|
||||
/// LT_OPTIMIZE_CACHE pigbacks on the idea from Ispike paper (CGO '04)
|
||||
/// that suggests putting frequently executed chains first in the layout.
|
||||
LT_OPTIMIZE_CACHE,
|
||||
};
|
||||
|
||||
static constexpr uint64_t COUNT_NO_PROFILE =
|
||||
|
@ -311,9 +318,9 @@ public:
|
|||
FunctionNumber(++Count)
|
||||
{}
|
||||
|
||||
/// Perform optimal code layout based on edge frequencies making necessary
|
||||
/// adjustments to instructions at the end of basic blocks.
|
||||
void optimizeLayout(HeuristicPriority Priority, bool Split);
|
||||
/// Modify code layout making necessary adjustments to instructions at the
|
||||
/// end of basic blocks.
|
||||
void modifyLayout(LayoutType Type, bool Split);
|
||||
|
||||
/// Dynamic programming implementation for the TSP, applied to BB layout. Find
|
||||
/// the optimal way to maximize weight during a path traversing all BBs. In
|
||||
|
|
|
@ -89,11 +89,29 @@ SplitFunctions("split-functions",
|
|||
cl::desc("split functions into hot and cold distinct regions"),
|
||||
cl::Optional);
|
||||
|
||||
static cl::opt<std::string> ReorderBlocks(
|
||||
static cl::opt<BinaryFunction::LayoutType> ReorderBlocks(
|
||||
"reorder-blocks",
|
||||
cl::desc("redo basic block layout based on profiling data with a specific "
|
||||
"priority (none, branch-predictor or cache)"),
|
||||
cl::value_desc("priority"), cl::init("disable"));
|
||||
cl::desc("change layout of basic blocks in a function"),
|
||||
cl::init(BinaryFunction::LT_NONE),
|
||||
cl::values(clEnumValN(BinaryFunction::LT_NONE,
|
||||
"none",
|
||||
"do not reorder basic blocks"),
|
||||
clEnumValN(BinaryFunction::LT_REVERSE,
|
||||
"reverse",
|
||||
"layout blocks in reverse order"),
|
||||
clEnumValN(BinaryFunction::LT_OPTIMIZE,
|
||||
"normal",
|
||||
"perform optimal layout based on profile"),
|
||||
clEnumValN(BinaryFunction::LT_OPTIMIZE_BRANCH,
|
||||
"branch-predictor",
|
||||
"perform optimal layout prioritizing branch "
|
||||
"predictions"),
|
||||
clEnumValN(BinaryFunction::LT_OPTIMIZE_CACHE,
|
||||
"cache",
|
||||
"perform optimal layout prioritizing I-cache "
|
||||
"behavior"),
|
||||
clEnumValEnd));
|
||||
|
||||
|
||||
static cl::opt<bool> AlignBlocks("align-blocks",
|
||||
cl::desc("try to align BBs inserting nops"),
|
||||
|
@ -665,15 +683,6 @@ void RewriteInstance::runOptimizationPasses() {
|
|||
//
|
||||
// FIXME: use real optimization passes.
|
||||
bool NagUser = true;
|
||||
if (opts::ReorderBlocks != "" &&
|
||||
opts::ReorderBlocks != "disable" &&
|
||||
opts::ReorderBlocks != "none" &&
|
||||
opts::ReorderBlocks != "branch-predictor" &&
|
||||
opts::ReorderBlocks != "cache") {
|
||||
errs() << "FLO: Unrecognized block reordering priority \""
|
||||
<< opts::ReorderBlocks << "\".\n";
|
||||
exit(1);
|
||||
}
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
|
||||
|
@ -725,18 +734,9 @@ void RewriteInstance::runOptimizationPasses() {
|
|||
Function.print(errs(), "after unreachable code elimination");
|
||||
}
|
||||
|
||||
if (opts::ReorderBlocks != "disable") {
|
||||
if (opts::ReorderBlocks != BinaryFunction::LT_NONE) {
|
||||
bool ShouldSplit = ToSplit.find(BFI.first) != ToSplit.end();
|
||||
|
||||
if (opts::ReorderBlocks == "branch-predictor") {
|
||||
BFI.second.optimizeLayout(BinaryFunction::HP_BRANCH_PREDICTOR,
|
||||
ShouldSplit);
|
||||
} else if (opts::ReorderBlocks == "cache") {
|
||||
BFI.second.optimizeLayout(BinaryFunction::HP_CACHE_UTILIZATION,
|
||||
ShouldSplit);
|
||||
} else {
|
||||
BFI.second.optimizeLayout(BinaryFunction::HP_NONE, ShouldSplit);
|
||||
}
|
||||
BFI.second.modifyLayout(opts::ReorderBlocks, ShouldSplit);
|
||||
if (opts::PrintAll || opts::PrintReordered)
|
||||
Function.print(errs(), "after reordering blocks");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue