[BOLT] Add option to print profile bias stats

Summary:
Profile bias may happen depending on the hardware counter used
to trigger LBR sampling, on the hardware implementation and as an
intrinsic characteristic of relying on LBRs. Since we infer fall-through
execution and these non-taken branches take zero hardware resources to
be represented, LBR-based profile likely overrepresents paths with fall
throughs and underrepresents paths with many taken branches. This patch
adds an option to print statistics about profile bias so we can better
understand these biases.

The goal is to analyze differences in the sum of the frequency of all
incoming edges in a basic block versus the sum of all outgoing. In an
ideally sampled profile, these differences should be close to zero. With
this option, the user gets the mean of these differences in flow as a
percentage of the input flow. For example, if this number is 15%, it
means, on average, a block observed 15% more or less flow going out of
it in comparison with the flow going in. We also print the standard
deviation so we can have an idea of how spread apart are different
measurements of flow differences. If variance is low, it means the
average bias is happening across all blocks, which is compatible with
using LBRs. If the variance is high, it means some blocks in the profile
have a much higher bias than others, which is compatible with using a
biased event such as cycles to sample LBRs because it overrepresents
paths that end in an expensive instruction.

(cherry picked from FBD15790517)
This commit is contained in:
Rafael Auler 2019-06-10 17:26:48 -07:00 committed by Maksim Panchenko
parent 1ec091e6f5
commit bda13b7dd8
3 changed files with 122 additions and 0 deletions

View File

@ -200,6 +200,13 @@ PrintUCE("print-uce",
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintProfileStats("print-profile-stats",
cl::desc("print profile quality/bias analysis"),
cl::ZeroOrMore,
cl::init(false),
cl::cat(BoltCategory));
static cl::opt<bool>
SimplifyConditionalTailCalls("simplify-conditional-tail-calls",
cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
@ -369,6 +376,9 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
// Run this pass first to use stats for the original functions.
Manager.registerPass(llvm::make_unique<PrintProgramStats>(NeverPrint));
if (opts::PrintProfileStats)
Manager.registerPass(llvm::make_unique<PrintProfileStats>(NeverPrint));
Manager.registerPass(llvm::make_unique<ValidateInternalCalls>(NeverPrint));
Manager.registerPass(llvm::make_unique<StripRepRet>(NeverPrint),

View File

@ -1252,6 +1252,99 @@ void AssignSections::runOnFunctions(BinaryContext &BC) {
}
}
void PrintProfileStats::runOnFunctions(BinaryContext &BC) {
double FlowImbalanceMean = 0.0;
size_t NumBlocksConsidered = 0;
double WorstBias = 0.0;
const BinaryFunction *WorstBiasFunc = nullptr;
// For each function CFG, we fill an IncomingMap with the sum of the frequency
// of incoming edges for each BB. Likewise for each OutgoingMap and the sum
// of the frequency of outgoing edges.
using FlowMapTy = std::unordered_map<const BinaryBasicBlock *, uint64_t>;
std::unordered_map<const BinaryFunction *, FlowMapTy> TotalIncomingMaps;
std::unordered_map<const BinaryFunction *, FlowMapTy> TotalOutgoingMaps;
// Compute mean
for (const auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction &Function = BFI.second;
if (Function.empty() || !Function.isSimple())
continue;
FlowMapTy &IncomingMap = TotalIncomingMaps[&Function];
FlowMapTy &OutgoingMap = TotalOutgoingMaps[&Function];
for (const auto &BB : Function) {
auto TotalOutgoing = 0ULL;
auto SuccBIIter = BB.branch_info_begin();
for (auto Succ : BB.successors()) {
auto Count = SuccBIIter->Count;
if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
++SuccBIIter;
continue;
}
TotalOutgoing += Count;
IncomingMap[Succ] += Count;
++SuccBIIter;
}
OutgoingMap[&BB] = TotalOutgoing;
}
size_t NumBlocks = 0;
double Mean = 0.0;
for (const auto &BB : Function) {
// Do not compute score for low frequency blocks, entry or exit blocks
if (IncomingMap[&BB] < 100 || OutgoingMap[&BB] == 0)
continue;
++NumBlocks;
const double Difference = (double)OutgoingMap[&BB] - IncomingMap[&BB];
Mean += fabs(Difference / IncomingMap[&BB]);
}
FlowImbalanceMean += Mean;
NumBlocksConsidered += NumBlocks;
if (!NumBlocks)
continue;
double FuncMean = Mean / NumBlocks;
if (FuncMean > WorstBias) {
WorstBias = FuncMean;
WorstBiasFunc = &Function;
}
}
if (NumBlocksConsidered > 0)
FlowImbalanceMean /= NumBlocksConsidered;
// Compute standard deviation
NumBlocksConsidered = 0;
double FlowImbalanceVar = 0.0;
for (const auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction &Function = BFI.second;
if (Function.empty() || !Function.isSimple())
continue;
FlowMapTy &IncomingMap = TotalIncomingMaps[&Function];
FlowMapTy &OutgoingMap = TotalOutgoingMaps[&Function];
for (const auto &BB : Function) {
if (IncomingMap[&BB] < 100 || OutgoingMap[&BB] == 0)
continue;
++NumBlocksConsidered;
const double Difference = (double)OutgoingMap[&BB] - IncomingMap[&BB];
FlowImbalanceVar +=
pow(fabs(Difference / IncomingMap[&BB]) - FlowImbalanceMean, 2);
}
}
if (NumBlocksConsidered) {
FlowImbalanceVar /= NumBlocksConsidered;
FlowImbalanceVar = sqrt(FlowImbalanceVar);
}
// Report to user
outs() << format("BOLT-INFO: Profile bias score: %.4lf%% StDev: %.4lf%%\n",
(100.0 * FlowImbalanceMean), (100.0 * FlowImbalanceVar));
if (WorstBiasFunc && opts::Verbosity >= 1) {
outs() << "Worst average bias observed in " << WorstBiasFunc->getPrintName()
<< "\n";
DEBUG(WorstBiasFunc->dump());
}
}
void
PrintProgramStats::runOnFunctions(BinaryContext &BC) {
uint64_t NumSimpleFunctions{0};

View File

@ -347,6 +347,25 @@ class AssignSections : public BinaryFunctionPass {
void runOnFunctions(BinaryContext &BC) override;
};
/// Compute and report to the user the imbalance in flow equations for all
/// CFGs, so we can detect bad quality profile. Prints average and standard
/// deviation of the absolute differences of outgoing flow minus incoming flow
/// for blocks of interest (excluding prologues, epilogues, and BB frequency
/// lower than 100).
class PrintProfileStats : public BinaryFunctionPass {
public:
explicit PrintProfileStats(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "profile-stats";
}
bool shouldPrint(const BinaryFunction &) const override {
return false;
}
void runOnFunctions(BinaryContext &BC) override;
};
/// Prints a list of the top 100 functions sorted by a set of
/// dyno stats categories.
class PrintProgramStats : public BinaryFunctionPass {