From 540489de6816486f98221952d7a54f0293c68d1e Mon Sep 17 00:00:00 2001 From: weihe Date: Sat, 8 Aug 2020 17:49:33 -0700 Subject: [PATCH] [llvm-profdata] Implement llvm-profdata overlap for sample profiles Implemented the `llvm-profdata overlap` feature for sample profiles. It reports weighted //similarity// and unweighted //overlap// metrics at program and function level for two input profiles. Similarity metrics are symmetric with regards to the order of two input profiles. By default, the tool only reports program-level summary. Users can look into function-level details via additional options `--function`, `--similarity-cutoff`, and `--value-cutoff`. The similarity metrics are designed as follows: * Program-level summary * Whole program profile similarity is an aggregate over function-level similarity `FS`: `PS = sum(FS(A) * avg_weight(A))` for all function `A`. * Whole program sample overlap: `PSO = common_samples / total_samples`. * Function overlap: `FO = #common_function / #total_function`. * Hot-function overlap: `HFO = #common_hot_function / #total_hot_function`. * Hot-block overlap: `HBO = #common_hot_block / #total_hot_block`. * Function-level details * Function-level similarity is an aggregate over line/block-level similarities `BS` of all sample lines/blocks in the function, weighted by the closeness of the function's weights in two profiles: `FS = sum(BS(i)) * (1 - weight_distance(A))`. * Function-level sample overlap: `FSO = common_samples / total_samples` for samples in the function. Reviewed By: wenlei, hoyFB, wmi Differential Revision: https://reviews.llvm.org/D83852 --- .../Inputs/sample-overlap-0.proftext | 18 + .../Inputs/sample-overlap-1.proftext | 18 + .../Inputs/sample-overlap-2.proftext | 18 + .../Inputs/sample-overlap-3.proftext | 18 + .../Inputs/sample-overlap-4.proftext | 18 + .../Inputs/sample-overlap-5.proftext | 18 + .../tools/llvm-profdata/sample-overlap.test | 118 +++ llvm/tools/llvm-profdata/llvm-profdata.cpp | 971 +++++++++++++++++- 8 files changed, 1181 insertions(+), 16 deletions(-) create mode 100644 llvm/test/tools/llvm-profdata/Inputs/sample-overlap-0.proftext create mode 100644 llvm/test/tools/llvm-profdata/Inputs/sample-overlap-1.proftext create mode 100644 llvm/test/tools/llvm-profdata/Inputs/sample-overlap-2.proftext create mode 100644 llvm/test/tools/llvm-profdata/Inputs/sample-overlap-3.proftext create mode 100644 llvm/test/tools/llvm-profdata/Inputs/sample-overlap-4.proftext create mode 100644 llvm/test/tools/llvm-profdata/Inputs/sample-overlap-5.proftext create mode 100644 llvm/test/tools/llvm-profdata/sample-overlap.test diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-0.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-0.proftext new file mode 100644 index 000000000000..33fce647b439 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-0.proftext @@ -0,0 +1,18 @@ +_Z3bari:20301:1437 + 1: 1437 +_Z3fooi:7711:610 + 1: 610 +main:184019:0 + 4: 534 + 4.2: 534 + 5: 1075 + 5.1: 1075 + 6: 2080 + 7: 534 + 9: 2064 _Z3bari:1471 _Z3fooi:631 + 10: inline1:1000 + 1: 1000 + 10: inline2:2000 + 1: 2000 +_Z3bazi:20301:1000 + 1: 1000 diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-1.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-1.proftext new file mode 100644 index 000000000000..3f3f4c1f6ee4 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-1.proftext @@ -0,0 +1,18 @@ +_Z3bari:203010:14370 + 1: 14370 +_Z3fooi:77110:6100 + 1: 6100 +main:1840190:0 + 4: 5340 + 4.2: 5340 + 5: 10750 + 5.1: 10750 + 6: 20800 + 7: 5340 + 9: 20640 _Z3bari:14710 _Z3fooi:6310 + 10: inline1:10000 + 1: 10000 + 10: inline2:20000 + 1: 20000 +_Z3bazi:203010:10000 + 1: 10000 diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-2.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-2.proftext new file mode 100644 index 000000000000..e3420120f17e --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-2.proftext @@ -0,0 +1,18 @@ +_Z3bari:20301:1437 + 1: 1437 +_Z3fooi:7711:610 + 1: 610 +main:18401:0 + 4: 53 + 4.2: 53 + 5: 107 + 5.1: 107 + 6: 208 + 7: 53 + 9: 206 _Z3bari:1471 _Z3fooi:631 + 10: inline1:100 + 1: 100 + 10: inline2:200 + 1: 200 +_Z3bazi:20301:1000 + 1: 1000 diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-3.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-3.proftext new file mode 100644 index 000000000000..b7015110cf16 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-3.proftext @@ -0,0 +1,18 @@ +_Z3bari:20301:1437 + 1: 1437 +_Z3fooi2:7711:610 + 1: 610 +main2:184019:0 + 4: 534 + 4.2: 534 + 5: 1075 + 5.1: 1075 + 6: 2080 + 7: 534 + 9: 2064 _Z3bari:1471 _Z3fooi:631 + 10: inline1:1000 + 1: 1000 + 10: inline2:2000 + 1: 2000 +_Z3bazi:20301:1000 + 1: 100 diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-4.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-4.proftext new file mode 100644 index 000000000000..5421d56334d2 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-4.proftext @@ -0,0 +1,18 @@ +_Z3bari:20301:1437 + 2: 1437 +_Z3fooi:7711:610 + 2: 610 +main:184019:0 + 5: 534 + 5.2: 534 + 6: 1075 + 6.1: 1075 + 7: 208 + 8: 534 + 10: 206 _Z3bari:1471 _Z3fooi:631 + 11: inline1:1000 + 1: 1000 + 11: inline2:2000 + 1: 2000 +_Z3bazi:20301:1000 + 2: 1000 diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-5.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-5.proftext new file mode 100644 index 000000000000..1bcd3ce4dcbe --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-overlap-5.proftext @@ -0,0 +1,18 @@ +_Z3bari:0:0 + 1: 0 +_Z3fooi:0:0 + 1: 0 +main:0:0 + 4: 0 + 4.2: 0 + 5: 0 + 5.1: 0 + 6: 0 + 7: 0 + 9: 0 + 10: inline1:0 + 1: 0 + 10: inline2:0 + 1: 0 +_Z3bazi:0:0 + 1: 0 diff --git a/llvm/test/tools/llvm-profdata/sample-overlap.test b/llvm/test/tools/llvm-profdata/sample-overlap.test new file mode 100644 index 000000000000..02609d8f98b0 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/sample-overlap.test @@ -0,0 +1,118 @@ +; RUN: llvm-profdata overlap --sample %S/Inputs/sample-overlap-0.proftext %S/Inputs/sample-overlap-0.proftext | FileCheck %s --check-prefix=OVERLAP0 --match-full-lines --strict-whitespace +; OVERLAP0:Program level: +; OVERLAP0: Whole program profile similarity: 100.000% +; OVERLAP0: Whole program sample overlap: 100.000% +; OVERLAP0: percentage of samples unique in base profile: 0.000% +; OVERLAP0: percentage of samples unique in test profile: 0.000% +; OVERLAP0: total samples in base profile: 13943 +; OVERLAP0: total samples in test profile: 13943 +; OVERLAP0: Function overlap: 100.000% +; OVERLAP0: overlap functions: 4 +; OVERLAP0: functions unique in base profile: 0 +; OVERLAP0: functions unique in test profile: 0 +; OVERLAP0: Hot-function overlap: 100.000% +; OVERLAP0: overlap hot functions: 4 +; OVERLAP0: hot functions unique in base profile: 0 +; OVERLAP0: hot functions unique in test profile: 0 +; OVERLAP0: Hot-block overlap: 100.000% +; OVERLAP0: overlap hot blocks: 12 +; OVERLAP0: hot blocks unique in base profile: 0 +; OVERLAP0: hot blocks unique in test profile: 0 + +; RUN: llvm-profdata overlap --sample %S/Inputs/sample-overlap-0.proftext %S/Inputs/sample-overlap-1.proftext | FileCheck %s --check-prefix=OVERLAP1 --match-full-lines --strict-whitespace +; OVERLAP1:Program level: +; OVERLAP1: Whole program profile similarity: 100.000% +; OVERLAP1: Whole program sample overlap: 10.000% +; OVERLAP1: percentage of samples unique in base profile: 0.000% +; OVERLAP1: percentage of samples unique in test profile: 0.000% +; OVERLAP1: total samples in base profile: 13943 +; OVERLAP1: total samples in test profile: 139430 +; OVERLAP1: Function overlap: 100.000% +; OVERLAP1: overlap functions: 4 +; OVERLAP1: functions unique in base profile: 0 +; OVERLAP1: functions unique in test profile: 0 +; OVERLAP1: Hot-function overlap: 100.000% +; OVERLAP1: overlap hot functions: 4 +; OVERLAP1: hot functions unique in base profile: 0 +; OVERLAP1: hot functions unique in test profile: 0 +; OVERLAP1: Hot-block overlap: 100.000% +; OVERLAP1: overlap hot blocks: 12 +; OVERLAP1: hot blocks unique in base profile: 0 +; OVERLAP1: hot blocks unique in test profile: 0 + +; RUN: llvm-profdata overlap --sample --similarity-cutoff=800000 %S/Inputs/sample-overlap-0.proftext %S/Inputs/sample-overlap-2.proftext | FileCheck %s --check-prefix=OVERLAP2 --match-full-lines --strict-whitespace +; OVERLAP2:Program level: +; OVERLAP2: Whole program profile similarity: 63.720% +; OVERLAP2: Whole program sample overlap: 29.649% +; OVERLAP2: percentage of samples unique in base profile: 0.000% +; OVERLAP2: percentage of samples unique in test profile: 0.000% +; OVERLAP2: total samples in base profile: 13943 +; OVERLAP2: total samples in test profile: 4134 +; OVERLAP2: Function overlap: 100.000% +; OVERLAP2: overlap functions: 4 +; OVERLAP2: functions unique in base profile: 0 +; OVERLAP2: functions unique in test profile: 0 +; OVERLAP2: Hot-function overlap: 100.000% +; OVERLAP2: overlap hot functions: 4 +; OVERLAP2: hot functions unique in base profile: 0 +; OVERLAP2: hot functions unique in test profile: 0 +; OVERLAP2: Hot-block overlap: 100.000% +; OVERLAP2: overlap hot blocks: 12 +; OVERLAP2: hot blocks unique in base profile: 0 +; OVERLAP2: hot blocks unique in test profile: 0 +; OVERLAP2:Function-level details: +; OVERLAP2:Base weight Test weight Similarity Overlap Base unique Test unique Base samples Test samples Function name +; OVERLAP2:78.15% 26.29% 48.09% 9.98% 0.00% 0.00% 10896 1087 main +; OVERLAP2:10.31% 34.76% 75.55% 100.00% 0.00% 0.00% 1437 1437 _Z3bari + +; RUN: llvm-profdata overlap --sample --value-cutoff=1000 %S/Inputs/sample-overlap-0.proftext %S/Inputs/sample-overlap-3.proftext | FileCheck %s --check-prefix=OVERLAP3 --match-full-lines --strict-whitespace +; OVERLAP3:Program level: +; OVERLAP3: Whole program profile similarity: 14.301% +; OVERLAP3: Whole program sample overlap: 6.040% +; OVERLAP3: percentage of samples unique in base profile: 82.522% +; OVERLAP3: percentage of samples unique in test profile: 88.216% +; OVERLAP3: total samples in base profile: 13943 +; OVERLAP3: total samples in test profile: 13043 +; OVERLAP3: Function overlap: 33.333% +; OVERLAP3: overlap functions: 2 +; OVERLAP3: functions unique in base profile: 2 +; OVERLAP3: functions unique in test profile: 2 +; OVERLAP3: Hot-function overlap: 16.667% +; OVERLAP3: overlap hot functions: 1 +; OVERLAP3: hot functions unique in base profile: 3 +; OVERLAP3: hot functions unique in test profile: 2 +; OVERLAP3: Hot-block overlap: 4.545% +; OVERLAP3: overlap hot blocks: 1 +; OVERLAP3: hot blocks unique in base profile: 11 +; OVERLAP3: hot blocks unique in test profile: 10 +; OVERLAP3:Function-level details: +; OVERLAP3:Base weight Test weight Similarity Overlap Base unique Test unique Base samples Test samples Function name +; OVERLAP3:10.31% 11.02% 99.29% 100.00% 0.00% 0.00% 1437 1437 _Z3bari +; OVERLAP3:0.00% 83.54% 0.00% 0.00% 0.00% 100.00% 0 10896 main2 + +; RUN: llvm-profdata overlap --sample --function=main %S/Inputs/sample-overlap-0.proftext %S/Inputs/sample-overlap-4.proftext | FileCheck %s --check-prefix=OVERLAP4 --match-full-lines --strict-whitespace +; OVERLAP4:Program level: +; OVERLAP4: Whole program profile similarity: 17.302% +; OVERLAP4: Whole program sample overlap: 8.134% +; OVERLAP4: percentage of samples unique in base profile: 73.542% +; OVERLAP4: percentage of samples unique in test profile: 82.209% +; OVERLAP4: total samples in base profile: 13943 +; OVERLAP4: total samples in test profile: 10213 +; OVERLAP4: Function overlap: 100.000% +; OVERLAP4: overlap functions: 4 +; OVERLAP4: functions unique in base profile: 0 +; OVERLAP4: functions unique in test profile: 0 +; OVERLAP4: Hot-function overlap: 100.000% +; OVERLAP4: overlap hot functions: 4 +; OVERLAP4: hot functions unique in base profile: 0 +; OVERLAP4: hot functions unique in test profile: 0 +; OVERLAP4: Hot-block overlap: 14.286% +; OVERLAP4: overlap hot blocks: 3 +; OVERLAP4: hot blocks unique in base profile: 9 +; OVERLAP4: hot blocks unique in test profile: 9 +; OVERLAP4:Function-level details: +; OVERLAP4:Base weight Test weight Similarity Overlap Base unique Test unique Base samples Test samples Function name +; OVERLAP4:78.15% 70.17% 23.33% 11.18% 66.14% 74.64% 10896 7166 main + +; RUN: llvm-profdata overlap --sample %S/Inputs/sample-overlap-0.proftext %S/Inputs/sample-overlap-5.proftext | FileCheck %s --check-prefix=OVERLAP5 --match-full-lines --strict-whitespace +; OVERLAP5:Sum of sample counts for profile {{.*}}/Inputs/sample-overlap-5.proftext is 0. diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 771aec89720e..670af164290e 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -945,6 +945,930 @@ static void overlapInstrProfile(const std::string &BaseFilename, Overlap.dump(OS); } +namespace { +struct SampleOverlapStats { + StringRef BaseName; + StringRef TestName; + // Number of overlap units + uint64_t OverlapCount; + // Total samples of overlap units + uint64_t OverlapSample; + // Number of and total samples of units that only present in base or test + // profile + uint64_t BaseUniqueCount; + uint64_t BaseUniqueSample; + uint64_t TestUniqueCount; + uint64_t TestUniqueSample; + // Number of units and total samples in base or test profile + uint64_t BaseCount; + uint64_t BaseSample; + uint64_t TestCount; + uint64_t TestSample; + // Number of and total samples of units that present in at least one profile + uint64_t UnionCount; + uint64_t UnionSample; + // Weighted similarity + double Similarity; + // For SampleOverlapStats instances representing functions, weights of the + // function in base and test profiles + double BaseWeight; + double TestWeight; + + SampleOverlapStats() + : OverlapCount(0), OverlapSample(0), BaseUniqueCount(0), + BaseUniqueSample(0), TestUniqueCount(0), TestUniqueSample(0), + BaseCount(0), BaseSample(0), TestCount(0), TestSample(0), UnionCount(0), + UnionSample(0), Similarity(0.0), BaseWeight(0.0), TestWeight(0.0) {} +}; +} // end anonymous namespace + +namespace { +struct FuncSampleStats { + uint64_t SampleSum; + uint64_t MaxSample; + uint64_t HotBlockCount; + FuncSampleStats() : SampleSum(0), MaxSample(0), HotBlockCount(0) {} + FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample, + uint64_t HotBlockCount) + : SampleSum(SampleSum), MaxSample(MaxSample), + HotBlockCount(HotBlockCount) {} +}; +} // end anonymous namespace + +namespace { +enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None }; + +// Class for updating merging steps for two sorted maps. The class should be +// instantiated with a map iterator type. +template class MatchStep { +public: + MatchStep() = delete; + + MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd) + : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter), + SecondEnd(SecondEnd), Status(MS_None) {} + + bool areBothFinished() const { + return (FirstIter == FirstEnd && SecondIter == SecondEnd); + } + + bool isFirstFinished() const { return FirstIter == FirstEnd; } + + bool isSecondFinished() const { return SecondIter == SecondEnd; } + + /// Advance one step based on the previous match status unless the previous + /// status is MS_None. Then update Status based on the comparison between two + /// container iterators at the current step. If the previous status is + /// MS_None, it means two iterators are at the beginning and no comparison has + /// been made, so we simply update Status without advancing the iterators. + void updateOneStep(); + + T getFirstIter() const { return FirstIter; } + + T getSecondIter() const { return SecondIter; } + + MatchStatus getMatchStatus() const { return Status; } + +private: + // Current iterator and end iterator of the first container. + T FirstIter; + T FirstEnd; + // Current iterator and end iterator of the second container. + T SecondIter; + T SecondEnd; + // Match status of the current step. + MatchStatus Status; +}; +} // end anonymous namespace + +template void MatchStep::updateOneStep() { + switch (Status) { + case MS_Match: + ++FirstIter; + ++SecondIter; + break; + case MS_FirstUnique: + ++FirstIter; + break; + case MS_SecondUnique: + ++SecondIter; + break; + case MS_None: + break; + } + + // Update Status according to iterators at the current step. + if (areBothFinished()) + return; + if (FirstIter != FirstEnd && + (SecondIter == SecondEnd || FirstIter->first < SecondIter->first)) + Status = MS_FirstUnique; + else if (SecondIter != SecondEnd && + (FirstIter == FirstEnd || SecondIter->first < FirstIter->first)) + Status = MS_SecondUnique; + else + Status = MS_Match; +} + +// Return the sum of line/block samples, the max line/block sample, and the +// number of line/block samples above the given threshold in a function +// including its inlinees. +static void getFuncSampleStats(const sampleprof::FunctionSamples &Func, + FuncSampleStats &FuncStats, + uint64_t HotThreshold) { + for (const auto &L : Func.getBodySamples()) { + uint64_t Sample = L.second.getSamples(); + FuncStats.SampleSum += Sample; + FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample); + if (Sample >= HotThreshold) + ++FuncStats.HotBlockCount; + } + + for (const auto &C : Func.getCallsiteSamples()) { + for (const auto &F : C.second) + getFuncSampleStats(F.second, FuncStats, HotThreshold); + } +} + +/// Predicate that determines if a function is hot with a given threshold. We +/// keep it separate from its callsites for possible extension in the future. +static bool isFunctionHot(const FuncSampleStats &FuncStats, + uint64_t HotThreshold) { + // We intentionally compare the maximum sample count in a function with the + // HotThreshold to get an approximate determination on hot functions. + return (FuncStats.MaxSample >= HotThreshold); +} + +namespace { +class SampleOverlapAggregator { +public: + SampleOverlapAggregator(const std::string &BaseFilename, + const std::string &TestFilename, + double LowSimilarityThreshold, double Epsilon, + const OverlapFuncFilters &FuncFilter) + : BaseFilename(BaseFilename), TestFilename(TestFilename), + LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon), + FuncFilter(FuncFilter) {} + + /// Detect 0-sample input profile and report to output stream. This interface + /// should be called after loadProfiles(). + bool detectZeroSampleProfile(raw_fd_ostream &OS) const; + + /// Write out function-level similarity statistics for functions specified by + /// options --function, --value-cutoff, and --similarity-cutoff. + void dumpFuncSimilarity(raw_fd_ostream &OS) const; + + /// Write out program-level similarity and overlap statistics. + void dumpProgramSummary(raw_fd_ostream &OS) const; + + /// Write out hot-function and hot-block statistics for base_profile, + /// test_profile, and their overlap. For both cases, the overlap HO is + /// calculated as follows: + /// Given the number of functions (or blocks) that are hot in both profiles + /// HCommon and the number of functions (or blocks) that are hot in at + /// least one profile HUnion, HO = HCommon / HUnion. + void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const; + + /// This function tries matching functions in base and test profiles. For each + /// pair of matched functions, it aggregates the function-level + /// similarity into a profile-level similarity. It also dump function-level + /// similarity information of functions specified by --function, + /// --value-cutoff, and --similarity-cutoff options. The program-level + /// similarity PS is computed as follows: + /// Given function-level similarity FS(A) for all function A, the + /// weight of function A in base profile WB(A), and the weight of function + /// A in test profile WT(A), compute PS(base_profile, test_profile) = + /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0 + /// meaning no-overlap. + void computeSampleProfileOverlap(raw_fd_ostream &OS); + + /// Initialize ProfOverlap with the sum of samples in base and test + /// profiles. This function also computes and keeps the sum of samples and + /// max sample counts of each function in BaseStats and TestStats for later + /// use to avoid re-computations. + void initializeSampleProfileOverlap(); + + /// Load profiles specified by BaseFilename and TestFilename. + std::error_code loadProfiles(); + +private: + SampleOverlapStats ProfOverlap; + SampleOverlapStats HotFuncOverlap; + SampleOverlapStats HotBlockOverlap; + std::string BaseFilename; + std::string TestFilename; + std::unique_ptr BaseReader; + std::unique_ptr TestReader; + // BaseStats and TestStats hold FuncSampleStats for each function, with + // function name as the key. + StringMap BaseStats; + StringMap TestStats; + // Low similarity threshold in floating point number + double LowSimilarityThreshold; + // Block samples above BaseHotThreshold or TestHotThreshold are considered hot + // for tracking hot blocks. + uint64_t BaseHotThreshold; + uint64_t TestHotThreshold; + // A small threshold used to round the results of floating point accumulations + // to resolve imprecision. + const double Epsilon; + std::multimap> + FuncSimilarityDump; + // FuncFilter carries specifications in options --value-cutoff and + // --function. + OverlapFuncFilters FuncFilter; + // Column offsets for printing the function-level details table. + static const unsigned int TestWeightCol = 15; + static const unsigned int SimilarityCol = 30; + static const unsigned int OverlapCol = 43; + static const unsigned int BaseUniqueCol = 53; + static const unsigned int TestUniqueCol = 67; + static const unsigned int BaseSampleCol = 81; + static const unsigned int TestSampleCol = 96; + static const unsigned int FuncNameCol = 111; + + /// Return a similarity of two line/block sample counters in the same + /// function in base and test profiles. The line/block-similarity BS(i) is + /// computed as follows: + /// For an offsets i, given the sample count at i in base profile BB(i), + /// the sample count at i in test profile BT(i), the sum of sample counts + /// in this function in base profile SB, and the sum of sample counts in + /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB - + /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap. + double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample, + const SampleOverlapStats &FuncOverlap) const; + + void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample, + uint64_t HotBlockCount); + + void getHotFunctions(const StringMap &ProfStats, + StringMap &HotFunc, + uint64_t HotThreshold) const; + + void computeHotFuncOverlap(); + + /// This function updates statistics in FuncOverlap, HotBlockOverlap, and + /// Difference for two sample units in a matched function according to the + /// given match status. + void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample, + uint64_t HotBlockCount, + SampleOverlapStats &FuncOverlap, + double &Difference, MatchStatus Status); + + /// This function updates statistics in FuncOverlap, HotBlockOverlap, and + /// Difference for unmatched callees that only present in one profile in a + /// matched caller function. + void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func, + SampleOverlapStats &FuncOverlap, + double &Difference, MatchStatus Status); + + /// This function updates sample overlap statistics of an overlap function in + /// base and test profile. It also calculates a function-internal similarity + /// FIS as follows: + /// For offsets i that have samples in at least one profile in this + /// function A, given BS(i) returned by computeBlockSimilarity(), compute + /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with + /// 0.0 meaning no overlap. + double computeSampleFunctionInternalOverlap( + const sampleprof::FunctionSamples &BaseFunc, + const sampleprof::FunctionSamples &TestFunc, + SampleOverlapStats &FuncOverlap); + + /// Function-level similarity (FS) is a weighted value over function internal + /// similarity (FIS). This function computes a function's FS from its FIS by + /// applying the weight. + double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample, + uint64_t TestFuncSample) const; + + /// The function-level similarity FS(A) for a function A is computed as + /// follows: + /// Compute a function-internal similarity FIS(A) by + /// computeSampleFunctionInternalOverlap(). Then, with the weight of + /// function A in base profile WB(A), and the weight of function A in test + /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A))) + /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap. + double + computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc, + const sampleprof::FunctionSamples *TestFunc, + SampleOverlapStats *FuncOverlap, + uint64_t BaseFuncSample, + uint64_t TestFuncSample); + + /// Profile-level similarity (PS) is a weighted aggregate over function-level + /// similarities (FS). This method weights the FS value by the function + /// weights in the base and test profiles for the aggregation. + double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample, + uint64_t TestFuncSample) const; +}; +} // end anonymous namespace + +bool SampleOverlapAggregator::detectZeroSampleProfile( + raw_fd_ostream &OS) const { + bool HaveZeroSample = false; + if (ProfOverlap.BaseSample == 0) { + OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n"; + HaveZeroSample = true; + } + if (ProfOverlap.TestSample == 0) { + OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n"; + HaveZeroSample = true; + } + return HaveZeroSample; +} + +double SampleOverlapAggregator::computeBlockSimilarity( + uint64_t BaseSample, uint64_t TestSample, + const SampleOverlapStats &FuncOverlap) const { + double BaseFrac = 0.0; + double TestFrac = 0.0; + if (FuncOverlap.BaseSample > 0) + BaseFrac = static_cast(BaseSample) / FuncOverlap.BaseSample; + if (FuncOverlap.TestSample > 0) + TestFrac = static_cast(TestSample) / FuncOverlap.TestSample; + return 1.0 - std::fabs(BaseFrac - TestFrac); +} + +void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample, + uint64_t TestSample, + uint64_t HotBlockCount) { + bool IsBaseHot = (BaseSample >= BaseHotThreshold); + bool IsTestHot = (TestSample >= TestHotThreshold); + if (!IsBaseHot && !IsTestHot) + return; + + HotBlockOverlap.UnionCount += HotBlockCount; + if (IsBaseHot) + HotBlockOverlap.BaseCount += HotBlockCount; + if (IsTestHot) + HotBlockOverlap.TestCount += HotBlockCount; + if (IsBaseHot && IsTestHot) + HotBlockOverlap.OverlapCount += HotBlockCount; +} + +void SampleOverlapAggregator::getHotFunctions( + const StringMap &ProfStats, + StringMap &HotFunc, uint64_t HotThreshold) const { + for (const auto &F : ProfStats) { + if (isFunctionHot(F.second, HotThreshold)) + HotFunc.try_emplace(F.first(), F.second); + } +} + +void SampleOverlapAggregator::computeHotFuncOverlap() { + StringMap BaseHotFunc; + getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold); + HotFuncOverlap.BaseCount = BaseHotFunc.size(); + + StringMap TestHotFunc; + getHotFunctions(TestStats, TestHotFunc, TestHotThreshold); + HotFuncOverlap.TestCount = TestHotFunc.size(); + HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount; + + for (const auto &F : BaseHotFunc) { + if (TestHotFunc.count(F.first())) + ++HotFuncOverlap.OverlapCount; + else + ++HotFuncOverlap.UnionCount; + } +} + +void SampleOverlapAggregator::updateOverlapStatsForFunction( + uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount, + SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) { + assert(Status != MS_None && + "Match status should be updated before updating overlap statistics"); + if (Status == MS_FirstUnique) { + TestSample = 0; + FuncOverlap.BaseUniqueSample += BaseSample; + } else if (Status == MS_SecondUnique) { + BaseSample = 0; + FuncOverlap.TestUniqueSample += TestSample; + } else { + ++FuncOverlap.OverlapCount; + } + + FuncOverlap.UnionSample += std::max(BaseSample, TestSample); + FuncOverlap.OverlapSample += std::min(BaseSample, TestSample); + Difference += + 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap); + updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount); +} + +void SampleOverlapAggregator::updateForUnmatchedCallee( + const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap, + double &Difference, MatchStatus Status) { + assert((Status == MS_FirstUnique || Status == MS_SecondUnique) && + "Status must be either of the two unmatched cases"); + FuncSampleStats FuncStats; + if (Status == MS_FirstUnique) { + getFuncSampleStats(Func, FuncStats, BaseHotThreshold); + updateOverlapStatsForFunction(FuncStats.SampleSum, 0, + FuncStats.HotBlockCount, FuncOverlap, + Difference, Status); + } else { + getFuncSampleStats(Func, FuncStats, TestHotThreshold); + updateOverlapStatsForFunction(0, FuncStats.SampleSum, + FuncStats.HotBlockCount, FuncOverlap, + Difference, Status); + } +} + +double SampleOverlapAggregator::computeSampleFunctionInternalOverlap( + const sampleprof::FunctionSamples &BaseFunc, + const sampleprof::FunctionSamples &TestFunc, + SampleOverlapStats &FuncOverlap) { + + using namespace sampleprof; + + double Difference = 0; + + // Accumulate Difference for regular line/block samples in the function. + // We match them through sort-merge join algorithm because + // FunctionSamples::getBodySamples() returns a map of sample counters ordered + // by their offsets. + MatchStep BlockIterStep( + BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(), + TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend()); + BlockIterStep.updateOneStep(); + while (!BlockIterStep.areBothFinished()) { + uint64_t BaseSample = + BlockIterStep.isFirstFinished() + ? 0 + : BlockIterStep.getFirstIter()->second.getSamples(); + uint64_t TestSample = + BlockIterStep.isSecondFinished() + ? 0 + : BlockIterStep.getSecondIter()->second.getSamples(); + updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap, + Difference, BlockIterStep.getMatchStatus()); + + BlockIterStep.updateOneStep(); + } + + // Accumulate Difference for callsite lines in the function. We match + // them through sort-merge algorithm because + // FunctionSamples::getCallsiteSamples() returns a map of callsite records + // ordered by their offsets. + MatchStep CallsiteIterStep( + BaseFunc.getCallsiteSamples().cbegin(), + BaseFunc.getCallsiteSamples().cend(), + TestFunc.getCallsiteSamples().cbegin(), + TestFunc.getCallsiteSamples().cend()); + CallsiteIterStep.updateOneStep(); + while (!CallsiteIterStep.areBothFinished()) { + MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus(); + assert(CallsiteStepStatus != MS_None && + "Match status should be updated before entering loop body"); + + if (CallsiteStepStatus != MS_Match) { + auto Callsite = (CallsiteStepStatus == MS_FirstUnique) + ? CallsiteIterStep.getFirstIter() + : CallsiteIterStep.getSecondIter(); + for (const auto &F : Callsite->second) + updateForUnmatchedCallee(F.second, FuncOverlap, Difference, + CallsiteStepStatus); + } else { + // There may be multiple inlinees at the same offset, so we need to try + // matching all of them. This match is implemented through sort-merge + // algorithm because callsite records at the same offset are ordered by + // function names. + MatchStep CalleeIterStep( + CallsiteIterStep.getFirstIter()->second.cbegin(), + CallsiteIterStep.getFirstIter()->second.cend(), + CallsiteIterStep.getSecondIter()->second.cbegin(), + CallsiteIterStep.getSecondIter()->second.cend()); + CalleeIterStep.updateOneStep(); + while (!CalleeIterStep.areBothFinished()) { + MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus(); + if (CalleeStepStatus != MS_Match) { + auto Callee = (CalleeStepStatus == MS_FirstUnique) + ? CalleeIterStep.getFirstIter() + : CalleeIterStep.getSecondIter(); + updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference, + CalleeStepStatus); + } else { + // An inlined function can contain other inlinees inside, so compute + // the Difference recursively. + Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap( + CalleeIterStep.getFirstIter()->second, + CalleeIterStep.getSecondIter()->second, + FuncOverlap); + } + CalleeIterStep.updateOneStep(); + } + } + CallsiteIterStep.updateOneStep(); + } + + // Difference reflects the total differences of line/block samples in this + // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to + // reflect the similarity between function profiles in [0.0f to 1.0f]. + return (2.0 - Difference) / 2; +} + +double SampleOverlapAggregator::weightForFuncSimilarity( + double FuncInternalSimilarity, uint64_t BaseFuncSample, + uint64_t TestFuncSample) const { + // Compute the weight as the distance between the function weights in two + // profiles. + double BaseFrac = 0.0; + double TestFrac = 0.0; + assert(ProfOverlap.BaseSample > 0 && + "Total samples in base profile should be greater than 0"); + BaseFrac = static_cast(BaseFuncSample) / ProfOverlap.BaseSample; + assert(ProfOverlap.TestSample > 0 && + "Total samples in test profile should be greater than 0"); + TestFrac = static_cast(TestFuncSample) / ProfOverlap.TestSample; + double WeightDistance = std::fabs(BaseFrac - TestFrac); + + // Take WeightDistance into the similarity. + return FuncInternalSimilarity * (1 - WeightDistance); +} + +double +SampleOverlapAggregator::weightByImportance(double FuncSimilarity, + uint64_t BaseFuncSample, + uint64_t TestFuncSample) const { + + double BaseFrac = 0.0; + double TestFrac = 0.0; + assert(ProfOverlap.BaseSample > 0 && + "Total samples in base profile should be greater than 0"); + BaseFrac = static_cast(BaseFuncSample) / ProfOverlap.BaseSample / 2.0; + assert(ProfOverlap.TestSample > 0 && + "Total samples in test profile should be greater than 0"); + TestFrac = static_cast(TestFuncSample) / ProfOverlap.TestSample / 2.0; + return FuncSimilarity * (BaseFrac + TestFrac); +} + +double SampleOverlapAggregator::computeSampleFunctionOverlap( + const sampleprof::FunctionSamples *BaseFunc, + const sampleprof::FunctionSamples *TestFunc, + SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample, + uint64_t TestFuncSample) { + // Default function internal similarity before weighted, meaning two functions + // has no overlap. + const double DefaultFuncInternalSimilarity = 0; + double FuncSimilarity; + double FuncInternalSimilarity; + + // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap. + // In this case, we use DefaultFuncInternalSimilarity as the function internal + // similarity. + if (!BaseFunc || !TestFunc) { + FuncInternalSimilarity = DefaultFuncInternalSimilarity; + } else { + assert(FuncOverlap != nullptr && + "FuncOverlap should be provided in this case"); + FuncInternalSimilarity = computeSampleFunctionInternalOverlap( + *BaseFunc, *TestFunc, *FuncOverlap); + // Now, FuncInternalSimilarity may be a little less than 0 due to + // imprecision of floating point accumulations. Make it zero if the + // difference is below Epsilon. + FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon) + ? 0 + : FuncInternalSimilarity; + } + FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity, + BaseFuncSample, TestFuncSample); + return FuncSimilarity; +} + +void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) { + using namespace sampleprof; + + StringMap BaseFuncProf; + const auto &BaseProfiles = BaseReader->getProfiles(); + for (const auto &BaseFunc : BaseProfiles) { + BaseFuncProf.try_emplace(BaseFunc.second.getFuncName(), &(BaseFunc.second)); + } + ProfOverlap.UnionCount = BaseFuncProf.size(); + + const auto &TestProfiles = TestReader->getProfiles(); + for (const auto &TestFunc : TestProfiles) { + SampleOverlapStats FuncOverlap; + FuncOverlap.TestName = TestFunc.second.getFuncName(); + assert(TestStats.count(FuncOverlap.TestName) && + "TestStats should have records for all functions in test profile " + "except inlinees"); + FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum; + + const auto Match = BaseFuncProf.find(FuncOverlap.TestName); + if (Match == BaseFuncProf.end()) { + const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName]; + ++ProfOverlap.TestUniqueCount; + ProfOverlap.TestUniqueSample += FuncStats.SampleSum; + FuncOverlap.TestUniqueSample = FuncStats.SampleSum; + + updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount); + + double FuncSimilarity = computeSampleFunctionOverlap( + nullptr, nullptr, nullptr, 0, FuncStats.SampleSum); + ProfOverlap.Similarity += + weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum); + + ++ProfOverlap.UnionCount; + ProfOverlap.UnionSample += FuncStats.SampleSum; + } else { + ++ProfOverlap.OverlapCount; + + // Two functions match with each other. Compute function-level overlap and + // aggregate them into profile-level overlap. + FuncOverlap.BaseName = Match->second->getFuncName(); + assert(BaseStats.count(FuncOverlap.BaseName) && + "BaseStats should have records for all functions in base profile " + "except inlinees"); + FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum; + + FuncOverlap.Similarity = computeSampleFunctionOverlap( + Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample, + FuncOverlap.TestSample); + ProfOverlap.Similarity += + weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample, + FuncOverlap.TestSample); + ProfOverlap.OverlapSample += FuncOverlap.OverlapSample; + ProfOverlap.UnionSample += FuncOverlap.UnionSample; + + // Accumulate the percentage of base unique and test unique samples into + // ProfOverlap. + ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample; + ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample; + + // Remove matched base functions for later reporting functions not found + // in test profile. + BaseFuncProf.erase(Match); + } + + // Print function-level similarity information if specified by options. + assert(TestStats.count(FuncOverlap.TestName) && + "TestStats should have records for all functions in test profile " + "except inlinees"); + if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff || + (Match != BaseFuncProf.end() && + FuncOverlap.Similarity < LowSimilarityThreshold) || + (Match != BaseFuncProf.end() && !FuncFilter.NameFilter.empty() && + FuncOverlap.BaseName.find(FuncFilter.NameFilter) != + FuncOverlap.BaseName.npos)) { + assert(ProfOverlap.BaseSample > 0 && + "Total samples in base profile should be greater than 0"); + FuncOverlap.BaseWeight = + static_cast(FuncOverlap.BaseSample) / ProfOverlap.BaseSample; + assert(ProfOverlap.TestSample > 0 && + "Total samples in test profile should be greater than 0"); + FuncOverlap.TestWeight = + static_cast(FuncOverlap.TestSample) / ProfOverlap.TestSample; + FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap); + } + } + + // Traverse through functions in base profile but not in test profile. + for (const auto &F : BaseFuncProf) { + assert(BaseStats.count(F.second->getFuncName()) && + "BaseStats should have records for all functions in base profile " + "except inlinees"); + const FuncSampleStats &FuncStats = BaseStats[F.second->getFuncName()]; + ++ProfOverlap.BaseUniqueCount; + ProfOverlap.BaseUniqueSample += FuncStats.SampleSum; + + updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount); + + double FuncSimilarity = computeSampleFunctionOverlap( + nullptr, nullptr, nullptr, FuncStats.SampleSum, 0); + ProfOverlap.Similarity += + weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0); + + ProfOverlap.UnionSample += FuncStats.SampleSum; + } + + // Now, ProfSimilarity may be a little greater than 1 due to imprecision + // of floating point accumulations. Make it 1.0 if the difference is below + // Epsilon. + ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon) + ? 1 + : ProfOverlap.Similarity; + + computeHotFuncOverlap(); +} + +void SampleOverlapAggregator::initializeSampleProfileOverlap() { + const auto &BaseProf = BaseReader->getProfiles(); + for (const auto &I : BaseProf) { + ++ProfOverlap.BaseCount; + FuncSampleStats FuncStats; + getFuncSampleStats(I.second, FuncStats, BaseHotThreshold); + ProfOverlap.BaseSample += FuncStats.SampleSum; + BaseStats.try_emplace(I.second.getFuncName(), FuncStats); + } + + const auto &TestProf = TestReader->getProfiles(); + for (const auto &I : TestProf) { + ++ProfOverlap.TestCount; + FuncSampleStats FuncStats; + getFuncSampleStats(I.second, FuncStats, TestHotThreshold); + ProfOverlap.TestSample += FuncStats.SampleSum; + TestStats.try_emplace(I.second.getFuncName(), FuncStats); + } + + ProfOverlap.BaseName = StringRef(BaseFilename); + ProfOverlap.TestName = StringRef(TestFilename); +} + +void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const { + using namespace sampleprof; + + if (FuncSimilarityDump.empty()) + return; + + formatted_raw_ostream FOS(OS); + FOS << "Function-level details:\n"; + FOS << "Base weight"; + FOS.PadToColumn(TestWeightCol); + FOS << "Test weight"; + FOS.PadToColumn(SimilarityCol); + FOS << "Similarity"; + FOS.PadToColumn(OverlapCol); + FOS << "Overlap"; + FOS.PadToColumn(BaseUniqueCol); + FOS << "Base unique"; + FOS.PadToColumn(TestUniqueCol); + FOS << "Test unique"; + FOS.PadToColumn(BaseSampleCol); + FOS << "Base samples"; + FOS.PadToColumn(TestSampleCol); + FOS << "Test samples"; + FOS.PadToColumn(FuncNameCol); + FOS << "Function name\n"; + for (const auto &F : FuncSimilarityDump) { + double OverlapPercent = + F.second.UnionSample > 0 + ? static_cast(F.second.OverlapSample) / F.second.UnionSample + : 0; + double BaseUniquePercent = + F.second.BaseSample > 0 + ? static_cast(F.second.BaseUniqueSample) / + F.second.BaseSample + : 0; + double TestUniquePercent = + F.second.TestSample > 0 + ? static_cast(F.second.TestUniqueSample) / + F.second.TestSample + : 0; + + FOS << format("%.2f%%", F.second.BaseWeight * 100); + FOS.PadToColumn(TestWeightCol); + FOS << format("%.2f%%", F.second.TestWeight * 100); + FOS.PadToColumn(SimilarityCol); + FOS << format("%.2f%%", F.second.Similarity * 100); + FOS.PadToColumn(OverlapCol); + FOS << format("%.2f%%", OverlapPercent * 100); + FOS.PadToColumn(BaseUniqueCol); + FOS << format("%.2f%%", BaseUniquePercent * 100); + FOS.PadToColumn(TestUniqueCol); + FOS << format("%.2f%%", TestUniquePercent * 100); + FOS.PadToColumn(BaseSampleCol); + FOS << F.second.BaseSample; + FOS.PadToColumn(TestSampleCol); + FOS << F.second.TestSample; + FOS.PadToColumn(FuncNameCol); + FOS << F.second.TestName << "\n"; + } +} + +void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const { + OS << "Profile overlap infomation for base_profile: " << ProfOverlap.BaseName + << " and test_profile: " << ProfOverlap.TestName << "\nProgram level:\n"; + + OS << " Whole program profile similarity: " + << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n"; + + assert(ProfOverlap.UnionSample > 0 && + "Total samples in two profile should be greater than 0"); + double OverlapPercent = + static_cast(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample; + assert(ProfOverlap.BaseSample > 0 && + "Total samples in base profile should be greater than 0"); + double BaseUniquePercent = static_cast(ProfOverlap.BaseUniqueSample) / + ProfOverlap.BaseSample; + assert(ProfOverlap.TestSample > 0 && + "Total samples in test profile should be greater than 0"); + double TestUniquePercent = static_cast(ProfOverlap.TestUniqueSample) / + ProfOverlap.TestSample; + + OS << " Whole program sample overlap: " + << format("%.3f%%", OverlapPercent * 100) << "\n"; + OS << " percentage of samples unique in base profile: " + << format("%.3f%%", BaseUniquePercent * 100) << "\n"; + OS << " percentage of samples unique in test profile: " + << format("%.3f%%", TestUniquePercent * 100) << "\n"; + OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n" + << " total samples in test profile: " << ProfOverlap.TestSample << "\n"; + + assert(ProfOverlap.UnionCount > 0 && + "There should be at least one function in two input profiles"); + double FuncOverlapPercent = + static_cast(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount; + OS << " Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100) + << "\n"; + OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n"; + OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount + << "\n"; + OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount + << "\n"; +} + +void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap( + raw_fd_ostream &OS) const { + assert(HotFuncOverlap.UnionCount > 0 && + "There should be at least one hot function in two input profiles"); + OS << " Hot-function overlap: " + << format("%.3f%%", static_cast(HotFuncOverlap.OverlapCount) / + HotFuncOverlap.UnionCount * 100) + << "\n"; + OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n"; + OS << " hot functions unique in base profile: " + << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n"; + OS << " hot functions unique in test profile: " + << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n"; + + assert(HotBlockOverlap.UnionCount > 0 && + "There should be at least one hot block in two input profiles"); + OS << " Hot-block overlap: " + << format("%.3f%%", static_cast(HotBlockOverlap.OverlapCount) / + HotBlockOverlap.UnionCount * 100) + << "\n"; + OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n"; + OS << " hot blocks unique in base profile: " + << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n"; + OS << " hot blocks unique in test profile: " + << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n"; +} + +std::error_code SampleOverlapAggregator::loadProfiles() { + using namespace sampleprof; + + LLVMContext Context; + auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context); + if (std::error_code EC = BaseReaderOrErr.getError()) + exitWithErrorCode(EC, BaseFilename); + + auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context); + if (std::error_code EC = TestReaderOrErr.getError()) + exitWithErrorCode(EC, TestFilename); + + BaseReader = std::move(BaseReaderOrErr.get()); + TestReader = std::move(TestReaderOrErr.get()); + + if (std::error_code EC = BaseReader->read()) + exitWithErrorCode(EC, BaseFilename); + if (std::error_code EC = TestReader->read()) + exitWithErrorCode(EC, TestFilename); + + // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in + // profile summary. + const uint64_t HotCutoff = 990000; + ProfileSummary &BasePS = BaseReader->getSummary(); + for (const auto &SummaryEntry : BasePS.getDetailedSummary()) { + if (SummaryEntry.Cutoff == HotCutoff) { + BaseHotThreshold = SummaryEntry.MinCount; + break; + } + } + + ProfileSummary &TestPS = TestReader->getSummary(); + for (const auto &SummaryEntry : TestPS.getDetailedSummary()) { + if (SummaryEntry.Cutoff == HotCutoff) { + TestHotThreshold = SummaryEntry.MinCount; + break; + } + } + return std::error_code(); +} + +void overlapSampleProfile(const std::string &BaseFilename, + const std::string &TestFilename, + const OverlapFuncFilters &FuncFilter, + uint64_t SimilarityCutoff, raw_fd_ostream &OS) { + using namespace sampleprof; + + // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics + // report 2--3 places after decimal point in percentage numbers. + SampleOverlapAggregator OverlapAggr( + BaseFilename, TestFilename, + static_cast(SimilarityCutoff) / 1000000, 0.000005, FuncFilter); + if (std::error_code EC = OverlapAggr.loadProfiles()) + exitWithErrorCode(EC); + + OverlapAggr.initializeSampleProfileOverlap(); + if (OverlapAggr.detectZeroSampleProfile(OS)) + return; + + OverlapAggr.computeSampleProfileOverlap(OS); + + OverlapAggr.dumpProgramSummary(OS); + OverlapAggr.dumpHotFuncAndBlockOverlap(OS); + OverlapAggr.dumpFuncSimilarity(OS); +} + static int overlap_main(int argc, const char *argv[]) { cl::opt BaseFilename(cl::Positional, cl::Required, cl::desc("")); @@ -963,6 +1887,15 @@ static int overlap_main(int argc, const char *argv[]) { cl::opt FuncNameFilter( "function", cl::desc("Function level overlap information for matching functions")); + cl::opt SimilarityCutoff( + "similarity-cutoff", cl::init(0), + cl::desc( + "For sample profiles, list function names for overlapped functions " + "with similarities below the cutoff (percentage times 10000).")); + cl::opt ProfileKind( + cl::desc("Profile kind:"), cl::init(instr), + cl::values(clEnumVal(instr, "Instrumentation profile (default)"), + clEnumVal(sample, "Sample profile"))); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n"); std::error_code EC; @@ -970,9 +1903,14 @@ static int overlap_main(int argc, const char *argv[]) { if (EC) exitWithErrorCode(EC, Output); - overlapInstrProfile(BaseFilename, TestFilename, - OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS, - IsCS); + if (ProfileKind == instr) + overlapInstrProfile(BaseFilename, TestFilename, + OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS, + IsCS); + else + overlapSampleProfile(BaseFilename, TestFilename, + OverlapFuncFilters{ValueCutoff, FuncNameFilter}, + SimilarityCutoff, OS); return 0; } @@ -1267,17 +2205,21 @@ static void dumpHotFunctionList(const std::vector &ColumnTitle, uint64_t HotProfCount, uint64_t TotalProfCount, const std::string &HotFuncMetric, raw_fd_ostream &OS) { - assert(ColumnOffset.size() == ColumnTitle.size()); - assert(ColumnTitle.size() >= 4); - assert(TotalFuncCount > 0); + assert(ColumnOffset.size() == ColumnTitle.size() && + "ColumnOffset and ColumnTitle should have the same size"); + assert(ColumnTitle.size() >= 4 && + "ColumnTitle should have at least 4 elements"); + assert(TotalFuncCount > 0 && + "There should be at least one function in the profile"); double TotalProfPercent = 0; if (TotalProfCount > 0) - TotalProfPercent = ((double)HotProfCount) / TotalProfCount * 100; + TotalProfPercent = static_cast(HotProfCount) / TotalProfCount * 100; formatted_raw_ostream FOS(OS); FOS << HotFuncCount << " out of " << TotalFuncCount << " functions with profile (" - << format("%.2f%%", (((double)HotFuncCount) / TotalFuncCount * 100)) + << format("%.2f%%", + (static_cast(HotFuncCount) / TotalFuncCount * 100)) << ") are considered hot functions"; if (!HotFuncMetric.empty()) FOS << " (" << HotFuncMetric << ")"; @@ -1318,7 +2260,6 @@ showHotFunctionList(const StringMap &Profiles, break; } } - assert(MinCountThreshold != 0); // Traverse all functions in the profile and keep only hot functions. // The following loop also calculates the sum of total samples of all @@ -1329,18 +2270,16 @@ showHotFunctionList(const StringMap &Profiles, uint64_t ProfileTotalSample = 0; uint64_t HotFuncSample = 0; uint64_t HotFuncCount = 0; - uint64_t MaxCount = 0; + for (const auto &I : Profiles) { + FuncSampleStats FuncStats; const FunctionSamples &FuncProf = I.second; ProfileTotalSample += FuncProf.getTotalSamples(); - MaxCount = FuncProf.getMaxCountInside(); + getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold); - // MinCountThreshold is a block/line threshold computed for a given cutoff. - // We intentionally compare the maximum sample count in a function with this - // threshold to get an approximate threshold for hot functions. - if (MaxCount >= MinCountThreshold) { + if (isFunctionHot(FuncStats, MinCountThreshold)) { HotFunc.emplace(FuncProf.getTotalSamples(), - std::make_pair(&(I.second), MaxCount)); + std::make_pair(&(I.second), FuncStats.MaxSample)); HotFuncSample += FuncProf.getTotalSamples(); ++HotFuncCount; }