forked from OSchip/llvm-project
[BOLT] Fix profile after ICP
Summary: After optimizing a target of a jump table, ICP was not updating edge counts corresponding to that target. As a result the edge could be left hot and negatively influence the code layout. (cherry picked from FBD9524396)
This commit is contained in:
parent
2511b09985
commit
708a550084
|
@ -365,14 +365,24 @@ public:
|
|||
return getSuccessor();
|
||||
}
|
||||
|
||||
const BinaryBranchInfo &getBranchInfo(bool Condition) const {
|
||||
/// Return branch info corresponding to a taken branch.
|
||||
const BinaryBranchInfo &getTakenBranchInfo() const {
|
||||
assert(BranchInfo.size() == 2 &&
|
||||
"could only be called for blocks with 2 successors");
|
||||
return BranchInfo[Condition == true ? 0 : 1];
|
||||
return BranchInfo[0];
|
||||
};
|
||||
|
||||
/// Return branch info corresponding to a fall-through branch.
|
||||
const BinaryBranchInfo &getFallthroughBranchInfo() const {
|
||||
assert(BranchInfo.size() == 2 &&
|
||||
"could only be called for blocks with 2 successors");
|
||||
return BranchInfo[1];
|
||||
};
|
||||
|
||||
/// Return branch info corresponding to an edge going to \p Succ basic block.
|
||||
BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ);
|
||||
|
||||
/// Set branch information for the outgoing edge to block \p Succ.
|
||||
void setSuccessorBranchInfo(const BinaryBasicBlock &Succ,
|
||||
uint64_t Count,
|
||||
uint64_t MispredictedCount) {
|
||||
|
|
|
@ -3962,11 +3962,11 @@ DynoStats BinaryFunction::getDynoStats() const {
|
|||
}
|
||||
|
||||
// Conditional branch that could be followed by an unconditional branch.
|
||||
uint64_t TakenCount = BB->getBranchInfo(true).Count;
|
||||
auto TakenCount = BB->getTakenBranchInfo().Count;
|
||||
if (TakenCount == COUNT_NO_PROFILE)
|
||||
TakenCount = 0;
|
||||
|
||||
uint64_t NonTakenCount = BB->getBranchInfo(false).Count;
|
||||
auto NonTakenCount = BB->getFallthroughBranchInfo().Count;
|
||||
if (NonTakenCount == COUNT_NO_PROFILE)
|
||||
NonTakenCount = 0;
|
||||
|
||||
|
|
|
@ -904,7 +904,7 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
|
|||
// Record this block so that we don't try to optimize it twice.
|
||||
BeenOptimized.insert(PredBB);
|
||||
|
||||
bool BranchForStats;
|
||||
uint64_t Count = 0;
|
||||
if (CondSucc != BB) {
|
||||
// Patch the new target address into the conditional branch.
|
||||
MIB->reverseBranchCondition(*CondBranch, CalleeSymbol, BC.Ctx.get());
|
||||
|
@ -913,13 +913,12 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
|
|||
// branch to the old target. This has to be done manually since
|
||||
// fixupBranches is not called after SCTC.
|
||||
NeedsUncondBranch.emplace_back(std::make_pair(PredBB, CondSucc));
|
||||
BranchForStats = false;
|
||||
Count = PredBB->getFallthroughBranchInfo().Count;
|
||||
} else {
|
||||
// Change destination of the conditional branch.
|
||||
MIB->replaceBranchTarget(*CondBranch, CalleeSymbol, BC.Ctx.get());
|
||||
BranchForStats = true;
|
||||
Count = PredBB->getTakenBranchInfo().Count;
|
||||
}
|
||||
const auto Count = PredBB->getBranchInfo(BranchForStats).Count;
|
||||
const uint64_t CTCTakenFreq =
|
||||
Count == BinaryBasicBlock::COUNT_NO_PROFILE ? 0 : Count;
|
||||
|
||||
|
|
|
@ -740,92 +740,67 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG(
|
|||
|
||||
// Scale indirect call counts to the execution count of the original
|
||||
// basic block containing the indirect call.
|
||||
auto TotalCount = IndCallBlock->getKnownExecutionCount();
|
||||
uint64_t TotalIndirectBranches = 0;
|
||||
uint64_t TotalIndirectMispreds = 0;
|
||||
for (const auto &BI : Targets) {
|
||||
TotalIndirectBranches += BI.Branches;
|
||||
TotalIndirectMispreds += BI.Mispreds;
|
||||
for (const auto &Target : Targets) {
|
||||
TotalIndirectBranches += Target.Branches;
|
||||
}
|
||||
|
||||
uint64_t TotalCount = 0;
|
||||
uint64_t TotalMispreds = 0;
|
||||
|
||||
if (Function.hasValidProfile()) {
|
||||
TotalCount = IndCallBlock->getExecutionCount();
|
||||
TotalMispreds =
|
||||
TotalCount * ((double)TotalIndirectMispreds / TotalIndirectBranches);
|
||||
assert(TotalCount != BinaryBasicBlock::COUNT_NO_PROFILE);
|
||||
}
|
||||
|
||||
// New BinaryBranchInfo scaled to the execution count of the original BB.
|
||||
std::vector<BinaryBranchInfo> BBI;
|
||||
for (auto Itr = Targets.begin(); Itr != Targets.end(); ++Itr) {
|
||||
const auto BranchPct = (double)Itr->Branches / TotalIndirectBranches;
|
||||
const auto MispredPct =
|
||||
(double)Itr->Mispreds / std::max(TotalIndirectMispreds, 1ul);
|
||||
if (Itr->JTIndex.empty()) {
|
||||
BBI.push_back(BinaryBranchInfo{uint64_t(TotalCount * BranchPct),
|
||||
uint64_t(TotalMispreds * MispredPct)});
|
||||
continue;
|
||||
}
|
||||
for (size_t I = 0, E = Itr->JTIndex.size(); I != E; ++I) {
|
||||
BBI.push_back(
|
||||
BinaryBranchInfo{uint64_t(TotalCount * (BranchPct / E)),
|
||||
uint64_t(TotalMispreds * (MispredPct / E))});
|
||||
std::vector<BinaryBranchInfo> ScaledBBI;
|
||||
for (const auto &Target : Targets) {
|
||||
const auto NumEntries = std::max(1UL, Target.JTIndex.size());
|
||||
for (size_t I = 0; I < NumEntries; ++I) {
|
||||
BBI.push_back(BinaryBranchInfo{Target.Branches / NumEntries,
|
||||
Target.Mispreds / NumEntries});
|
||||
ScaledBBI.push_back(BinaryBranchInfo{
|
||||
uint64_t(TotalCount * Target.Branches /
|
||||
(NumEntries * TotalIndirectBranches)),
|
||||
uint64_t(TotalCount * Target.Mispreds /
|
||||
(NumEntries * TotalIndirectBranches))});
|
||||
}
|
||||
}
|
||||
|
||||
auto BI = BBI.begin();
|
||||
auto updateCurrentBranchInfo = [&]{
|
||||
assert(BI < BBI.end());
|
||||
TotalCount -= BI->Count;
|
||||
TotalMispreds -= BI->MispredictedCount;
|
||||
++BI;
|
||||
};
|
||||
|
||||
if (IsJumpTable) {
|
||||
IndCallBlock->moveAllSuccessorsTo(NewBBs.back().get());
|
||||
auto *NewIndCallBlock = NewBBs.back().get();
|
||||
IndCallBlock->moveAllSuccessorsTo(NewIndCallBlock);
|
||||
|
||||
std::vector<MCSymbol*> SymTargets;
|
||||
for (size_t I = 0; I < Targets.size(); ++I) {
|
||||
assert(Targets[I].To.Sym);
|
||||
if (Targets[I].JTIndex.empty())
|
||||
SymTargets.push_back(Targets[I].To.Sym);
|
||||
else {
|
||||
for (size_t Idx = 0, E = Targets[I].JTIndex.size(); Idx != E; ++Idx) {
|
||||
SymTargets.push_back(Targets[I].To.Sym);
|
||||
}
|
||||
for (const auto &Target : Targets) {
|
||||
const auto NumEntries = std::max(1UL, Target.JTIndex.size());
|
||||
for (size_t I = 0; I < NumEntries; ++I) {
|
||||
SymTargets.push_back(Target.To.Sym);
|
||||
}
|
||||
}
|
||||
assert(SymTargets.size() > NewBBs.size() - 1 &&
|
||||
"There must be a target symbol associated with each new BB.");
|
||||
|
||||
// Fix up successors and execution counts.
|
||||
updateCurrentBranchInfo();
|
||||
auto *Succ = Function.getBasicBlockForLabel(SymTargets[0]);
|
||||
assert(Succ && "each jump target must be a legal BB label");
|
||||
IndCallBlock->addSuccessor(Succ, BBI[0]); // cond branch
|
||||
IndCallBlock->addSuccessor(NewBBs[0].get(), TotalCount); // fallthru branch
|
||||
for (uint64_t I = 0; I < NewBBs.size(); ++I) {
|
||||
BinaryBasicBlock *SourceBB = I ? NewBBs[I - 1].get() : IndCallBlock;
|
||||
SourceBB->setExecutionCount(TotalCount);
|
||||
|
||||
for (size_t I = 0; I < NewBBs.size() - 1; ++I) {
|
||||
assert(TotalCount <= IndCallBlock->getExecutionCount() ||
|
||||
TotalCount <= uint64_t(TotalIndirectBranches));
|
||||
uint64_t ExecCount = BBI[I+1].Count;
|
||||
updateCurrentBranchInfo();
|
||||
auto *Succ = Function.getBasicBlockForLabel(SymTargets[I+1]);
|
||||
assert(Succ && "each jump target must be a legal BB label");
|
||||
NewBBs[I]->addSuccessor(Succ, BBI[I+1]);
|
||||
NewBBs[I]->addSuccessor(NewBBs[I+1].get(), TotalCount); // fallthru
|
||||
ExecCount += TotalCount;
|
||||
NewBBs[I]->setCanOutline(IndCallBlock->canOutline());
|
||||
NewBBs[I]->setIsCold(IndCallBlock->isCold());
|
||||
NewBBs[I]->setExecutionCount(ExecCount);
|
||||
auto *TargetBB = Function.getBasicBlockForLabel(SymTargets[I]);
|
||||
SourceBB->addSuccessor(TargetBB, ScaledBBI[I]); // taken
|
||||
|
||||
TotalCount -= ScaledBBI[I].Count;
|
||||
SourceBB->addSuccessor(NewBBs[I].get(), TotalCount); // fall-through
|
||||
|
||||
// Update branch info for the indirect jump.
|
||||
auto &BranchInfo = NewIndCallBlock->getBranchInfo(*TargetBB);
|
||||
BranchInfo.Count -= BBI[I].Count;
|
||||
BranchInfo.MispredictedCount -= BBI[I].MispredictedCount;
|
||||
}
|
||||
} else {
|
||||
assert(NewBBs.size() >= 2);
|
||||
assert(NewBBs.size() % 2 == 1 || IndCallBlock->succ_empty());
|
||||
assert(NewBBs.size() % 2 == 1 || IsTailCall);
|
||||
|
||||
auto ScaledBI = ScaledBBI.begin();
|
||||
auto updateCurrentBranchInfo = [&]{
|
||||
assert(ScaledBI != ScaledBBI.end());
|
||||
TotalCount -= ScaledBI->Count;
|
||||
++ScaledBI;
|
||||
};
|
||||
|
||||
if (!IsTailCall) {
|
||||
MergeBlock = NewBBs.back().get();
|
||||
IndCallBlock->moveAllSuccessorsTo(MergeBlock);
|
||||
|
@ -833,25 +808,23 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG(
|
|||
|
||||
// Fix up successors and execution counts.
|
||||
updateCurrentBranchInfo();
|
||||
IndCallBlock->addSuccessor(NewBBs[1].get(), TotalCount); // cond branch
|
||||
IndCallBlock->addSuccessor(NewBBs[0].get(), BBI[0]); // uncond branch
|
||||
IndCallBlock->addSuccessor(NewBBs[1].get(), TotalCount);
|
||||
IndCallBlock->addSuccessor(NewBBs[0].get(), ScaledBBI[0]);
|
||||
|
||||
const size_t Adj = IsTailCall ? 1 : 2;
|
||||
for (size_t I = 0; I < NewBBs.size() - Adj; ++I) {
|
||||
assert(TotalCount <= IndCallBlock->getExecutionCount() ||
|
||||
TotalCount <= uint64_t(TotalIndirectBranches));
|
||||
uint64_t ExecCount = BBI[(I+1)/2].Count;
|
||||
NewBBs[I]->setCanOutline(IndCallBlock->canOutline());
|
||||
NewBBs[I]->setIsCold(IndCallBlock->isCold());
|
||||
auto ExecCount = ScaledBBI[(I+1)/2].Count;
|
||||
if (I % 2 == 0) {
|
||||
if (MergeBlock) {
|
||||
NewBBs[I]->addSuccessor(MergeBlock, BBI[(I+1)/2].Count); // uncond
|
||||
NewBBs[I]->addSuccessor(MergeBlock, ScaledBBI[(I+1)/2].Count);
|
||||
}
|
||||
} else {
|
||||
assert(I + 2 < NewBBs.size());
|
||||
updateCurrentBranchInfo();
|
||||
NewBBs[I]->addSuccessor(NewBBs[I+2].get(), TotalCount); // uncond branch
|
||||
NewBBs[I]->addSuccessor(NewBBs[I+1].get(), BBI[(I+1)/2]); // cond. branch
|
||||
NewBBs[I]->addSuccessor(NewBBs[I+2].get(), TotalCount);
|
||||
NewBBs[I]->addSuccessor(NewBBs[I+1].get(), ScaledBBI[(I+1)/2]);
|
||||
ExecCount += TotalCount;
|
||||
}
|
||||
NewBBs[I]->setExecutionCount(ExecCount);
|
||||
|
@ -860,8 +833,6 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG(
|
|||
if (MergeBlock) {
|
||||
// Arrange for the MergeBlock to be the fallthrough for the first
|
||||
// promoted call block.
|
||||
MergeBlock->setCanOutline(IndCallBlock->canOutline());
|
||||
MergeBlock->setIsCold(IndCallBlock->isCold());
|
||||
std::unique_ptr<BinaryBasicBlock> MBPtr;
|
||||
std::swap(MBPtr, NewBBs.back());
|
||||
NewBBs.pop_back();
|
||||
|
@ -871,13 +842,10 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG(
|
|||
}
|
||||
}
|
||||
|
||||
// cold call block
|
||||
// TODO: should be able to outline/cold this block.
|
||||
// Update the execution count.
|
||||
NewBBs.back()->setExecutionCount(TotalCount);
|
||||
NewBBs.back()->setCanOutline(IndCallBlock->canOutline());
|
||||
NewBBs.back()->setIsCold(IndCallBlock->isCold());
|
||||
|
||||
// update BB and BB layout.
|
||||
// Update BB and BB layout.
|
||||
Function.insertBasicBlocks(IndCallBlock, std::move(NewBBs));
|
||||
assert(Function.validateCFG());
|
||||
|
||||
|
|
Loading…
Reference in New Issue