forked from OSchip/llvm-project
Fix dynostats for conditional tail calls
Summary: Don't treat conditional tail calls as branches for dynostats. Count taken conditional tails calls as calls. Change SCTC to report dynamic numbers after it is done. (cherry picked from FBD5203708)
This commit is contained in:
parent
2baa4c7a2c
commit
583790ee22
|
@ -2083,7 +2083,7 @@ void BinaryFunction::inferFallThroughCounts() {
|
|||
|
||||
auto BranchDataOrErr = BC.DR.getFuncBranchData(getNames());
|
||||
|
||||
// Compute preliminary execution time for each basic block
|
||||
// Compute preliminary execution count for each basic block
|
||||
for (auto CurBB : BasicBlocks) {
|
||||
CurBB->ExecutionCount = 0;
|
||||
}
|
||||
|
@ -4118,9 +4118,17 @@ DynoStats BinaryFunction::getDynoStats() const {
|
|||
}
|
||||
if (!BC.MIA->isCall(Instr))
|
||||
continue;
|
||||
Stats[DynoStats::FUNCTION_CALLS] += BBExecutionCount;
|
||||
uint64_t CallFreq = BBExecutionCount;
|
||||
if (BC.MIA->isCTC(Instr)) {
|
||||
CallFreq = 0;
|
||||
if (auto FreqOrErr =
|
||||
BC.MIA->tryGetAnnotationAs<uint64_t>(Instr, "CTCTakenFreq")) {
|
||||
CallFreq = *FreqOrErr;
|
||||
}
|
||||
}
|
||||
Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
|
||||
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
|
||||
Stats[DynoStats::INDIRECT_CALLS] += BBExecutionCount;
|
||||
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
|
||||
} else if (const auto *CallSymbol = BC.MIA->getTargetSymbol(Instr)) {
|
||||
if (BC.getFunctionForSymbol(CallSymbol))
|
||||
continue;
|
||||
|
@ -4133,7 +4141,7 @@ DynoStats BinaryFunction::getDynoStats() const {
|
|||
StringRef SectionName;
|
||||
Section->getName(SectionName);
|
||||
if (SectionName == ".plt") {
|
||||
Stats[DynoStats::PLT_CALLS] += BBExecutionCount;
|
||||
Stats[DynoStats::PLT_CALLS] += CallFreq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4175,36 +4183,23 @@ DynoStats BinaryFunction::getDynoStats() const {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Conditional branch that could be followed by an unconditional branch.
|
||||
uint64_t TakenCount;
|
||||
uint64_t NonTakenCount;
|
||||
bool IsForwardBranch;
|
||||
if (BB->succ_size() == 2) {
|
||||
TakenCount = BB->getBranchInfo(true).Count;
|
||||
NonTakenCount = BB->getBranchInfo(false).Count;
|
||||
IsForwardBranch = isForwardBranch(BB, BB->getConditionalSuccessor(true));
|
||||
} else {
|
||||
// SCTC breaks the CFG invariant so we have to make some affordances
|
||||
// here if we want dyno stats after running it.
|
||||
TakenCount = BB->branch_info_begin()->Count;
|
||||
if (TakenCount != COUNT_NO_PROFILE)
|
||||
NonTakenCount = BBExecutionCount - TakenCount;
|
||||
else
|
||||
NonTakenCount = 0;
|
||||
|
||||
// If succ_size == 0 then we are branching to a function
|
||||
// rather than a BB label.
|
||||
IsForwardBranch = BB->succ_size() == 0
|
||||
? isForwardCall(BC.MIA->getTargetSymbol(*CondBranch))
|
||||
: isForwardBranch(BB, BB->getFallthrough());
|
||||
// CTCs
|
||||
if (BC.MIA->isCTC(*CondBranch)) {
|
||||
if (BB->branch_info_begin() != BB->branch_info_end())
|
||||
Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Conditional branch that could be followed by an unconditional branch.
|
||||
uint64_t TakenCount = BB->getBranchInfo(true).Count;
|
||||
if (TakenCount == COUNT_NO_PROFILE)
|
||||
TakenCount = 0;
|
||||
|
||||
uint64_t NonTakenCount = BB->getBranchInfo(false).Count;
|
||||
if (NonTakenCount == COUNT_NO_PROFILE)
|
||||
NonTakenCount = 0;
|
||||
|
||||
if (IsForwardBranch) {
|
||||
if (isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
|
||||
Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
|
||||
Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
|
||||
} else {
|
||||
|
|
|
@ -516,6 +516,8 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
|
|||
auto &MIA = BC.MIA;
|
||||
uint64_t NumLocalCTCCandidates = 0;
|
||||
uint64_t NumLocalCTCs = 0;
|
||||
uint64_t LocalCTCTakenCount = 0;
|
||||
uint64_t LocalCTCExecCount = 0;
|
||||
std::vector<std::tuple<BinaryBasicBlock *, BinaryBasicBlock *, const BinaryBasicBlock *>>
|
||||
NeedsUncondBranch;
|
||||
|
||||
|
@ -587,14 +589,29 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
|
|||
// Change destination of the conditional branch.
|
||||
MIA->replaceBranchTarget(*CondBranch, CalleeSymbol, BC.Ctx.get());
|
||||
}
|
||||
const uint64_t CTCTakenFreq = PredBB->getBranchInfo(true).Count ==
|
||||
BinaryBasicBlock::COUNT_NO_PROFILE
|
||||
? 0
|
||||
: PredBB->getBranchInfo(true).Count;
|
||||
// Annotate it, so "isCall" returns true for this jcc
|
||||
MIA->addAnnotation(BC.Ctx.get(), *CondBranch, "IsCTC", true);
|
||||
// Add info abount the conditional tail call frequency, otherwise this
|
||||
// info will be lost when we delete the associated BranchInfo entry
|
||||
BC.MIA->addAnnotation(BC.Ctx.get(), *CondBranch, "CTCTakenFreq",
|
||||
CTCTakenFreq);
|
||||
|
||||
// Remove the unused successor which may be eliminated later
|
||||
// if there are no other users.
|
||||
PredBB->removeSuccessor(BB);
|
||||
// Update BB execution count
|
||||
if (BB->getKnownExecutionCount() > 0) {
|
||||
assert(CTCTakenFreq <= BB->getKnownExecutionCount());
|
||||
BB->setExecutionCount(BB->getExecutionCount() - CTCTakenFreq);
|
||||
}
|
||||
|
||||
++NumLocalCTCs;
|
||||
LocalCTCTakenCount += CTCTakenFreq;
|
||||
LocalCTCExecCount += PredBB->getKnownExecutionCount();
|
||||
}
|
||||
|
||||
// Remove the block from CFG if all predecessors were removed.
|
||||
|
@ -643,11 +660,16 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
|
|||
}
|
||||
|
||||
DEBUG(dbgs() << "BOLT: created " << NumLocalCTCs
|
||||
<< " conditional tail calls from a total of " << NumLocalCTCCandidates
|
||||
<< " candidates in function " << BF << "\n";);
|
||||
<< " conditional tail calls from a total of "
|
||||
<< NumLocalCTCCandidates << " candidates in function " << BF
|
||||
<< ". CTCs execution count for this function is "
|
||||
<< LocalCTCExecCount << " and CTC taken count is "
|
||||
<< LocalCTCTakenCount << "\n";);
|
||||
|
||||
NumTailCallsPatched += NumLocalCTCs;
|
||||
NumCandidateTailCalls += NumLocalCTCCandidates;
|
||||
CTCExecCount += LocalCTCExecCount;
|
||||
CTCTakenCount += LocalCTCTakenCount;
|
||||
|
||||
return NumLocalCTCs > 0;
|
||||
}
|
||||
|
@ -672,10 +694,13 @@ void SimplifyConditionalTailCalls::runOnFunctions(
|
|||
outs() << "BOLT-INFO: SCTC: patched " << NumTailCallsPatched
|
||||
<< " tail calls (" << NumOrigForwardBranches << " forward)"
|
||||
<< " tail calls (" << NumOrigBackwardBranches << " backward)"
|
||||
<< " from a total of " << NumCandidateTailCalls
|
||||
<< " while removing " << NumDoubleJumps << " double jumps"
|
||||
<< " from a total of " << NumCandidateTailCalls << " while removing "
|
||||
<< NumDoubleJumps << " double jumps"
|
||||
<< " and removing " << DeletedBlocks << " basic blocks"
|
||||
<< " totalling " << DeletedBytes << " bytes of code.\n";
|
||||
<< " totalling " << DeletedBytes
|
||||
<< " bytes of code. CTCs total execution count is " << CTCExecCount
|
||||
<< " and the number of times CTCs are taken is " << CTCTakenCount
|
||||
<< ".\n";
|
||||
}
|
||||
|
||||
void Peepholes::shortenInstructions(BinaryContext &BC,
|
||||
|
|
|
@ -198,6 +198,8 @@ class FinalizeFunctions : public BinaryFunctionPass {
|
|||
class SimplifyConditionalTailCalls : public BinaryFunctionPass {
|
||||
uint64_t NumCandidateTailCalls{0};
|
||||
uint64_t NumTailCallsPatched{0};
|
||||
uint64_t CTCExecCount{0};
|
||||
uint64_t CTCTakenCount{0};
|
||||
uint64_t NumOrigForwardBranches{0};
|
||||
uint64_t NumOrigBackwardBranches{0};
|
||||
uint64_t NumDoubleJumps{0};
|
||||
|
|
Loading…
Reference in New Issue