diff --git a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfbin deleted file mode 100755 index e4e698e91099..000000000000 Binary files a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfbin and /dev/null differ diff --git a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfscript b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfscript deleted file mode 100644 index 3ec8f44cfef0..000000000000 --- a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfscript +++ /dev/null @@ -1,4 +0,0 @@ -PERF_RECORD_MMAP2 3019402/3019402: [0x400000(0x1000) @ 0 00:1d 265650677 1451231]: r-xp recursion-compression-noprobe.perfbin - - 4007e1 - 0x4007d6/0x4007e1/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x400795/0x4007b0/P/-/-/0 0x40079c/0x400790/P/-/-/0 0x400801/0x400770/P/-/-/0 0x400698/0x400801/P/-/-/0 0x400673/0x400696/P/-/-/0 diff --git a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfbin deleted file mode 100755 index a3dbda2f0b3e..000000000000 Binary files a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfbin and /dev/null differ diff --git a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfscript b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfscript deleted file mode 100644 index 91a69e2c9dd0..000000000000 --- a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfscript +++ /dev/null @@ -1,23 +0,0 @@ -PERF_RECORD_MMAP2 3367317/3367317: [0x201000(0x1000) @ 0 00:1d 238458915 1121070]: r-xp recursion-compression-pseudoprobe.perfbin - - 2017db - 2017ba - 2017e5 - 2017ba - 2017e5 - 2017d9 - 2017ba - 2017b0 - 2017b0 - 2017b0 - 2017b0 - 2017b0 - 2017b0 - 2017b0 - 2017b0 - 2017e5 - 2017d9 - 201847 - 7fcb072a67c3 - 5541f689495641d7 - 0x2017cd/0x2017db/P/-/-/0 0x2017b5/0x2017c0/P/-/-/0 0x2017a7/0x2017b2/P/-/-/0 0x2017e0/0x2017a0/P/-/-/0 0x2017cd/0x2017db/P/-/-/0 0x2017b5/0x2017c0/P/-/-/0 0x2017a7/0x2017b2/P/-/-/0 0x2017e0/0x2017a0/P/-/-/0 0x2017cd/0x2017db/P/-/-/0 0x2017d4/0x2017c0/P/-/-/0 0x2017b5/0x2017c0/P/-/-/0 0x2017a7/0x2017b2/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0 diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test deleted file mode 100644 index 47e0a51a4261..000000000000 --- a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test +++ /dev/null @@ -1,65 +0,0 @@ -; Firstly test uncompression(--compress-recursion=0) -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0 -; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t -; RUN: FileCheck %s --input-file %t - -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 2: 13 fb:11 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:12:0 -; CHECK-UNCOMPRESS: 1: 11 -; CHECK-UNCOMPRESS: 2: 1 fa:1 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:3:0 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 2: 2 fb:1 -; CHECK-UNCOMPRESS:[main:1 @ foo]:3:0 -; CHECK-UNCOMPRESS: 2: 1 -; CHECK-UNCOMPRESS: 3: 2 fa:1 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:1:0 -; CHECK-UNCOMPRESS: 4: 1 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0 -; CHECK-UNCOMPRESS: 2: 1 fa:1 - -; CHECK: [main:1 @ foo:3 @ fa]:14:0 -; CHECK: 1: 1 -; CHECK: 2: 13 fb:11 -; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:12:0 -; CHECK: 1: 11 -; CHECK: 2: 1 fa:1 -; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:4:0 -; CHECK: 1: 1 -; CHECK: 2: 2 fb:1 -; CHECK: 4: 1 -; CHECK: [main:1 @ foo]:3:0 -; CHECK: 2: 1 -; CHECK: 3: 2 fa:1 -; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:0:0 - - -; original code: -; clang -O3 -g test.c -o a.out -#include - -int fb(int n) { - if(n > 10) return fb(n / 2); - return fa(n - 1); -} - -int fa(int n) { - if(n < 2) return n; - if(n % 2) return fb(n - 1); - return fa(n - 1); -} - -void foo() { - int s, i = 0; - while (i++ < 10000) - s += fa(i); - printf("sum is %d\n", s); -} - -int main() { - foo(); - return 0; -} diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test deleted file mode 100644 index 86afe6c632bd..000000000000 --- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test +++ /dev/null @@ -1,169 +0,0 @@ -; Firstly test uncompression(--compress-recursion=0) -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 -; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER -; RUN: FileCheck %s --input-file %t - -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 3: 1 -; CHECK-UNCOMPRESS: 4: 1 -; CHECK-UNCOMPRESS: 7: 1 fb:1 -; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909 -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 3: 1 -; CHECK-UNCOMPRESS: 4: 1 -; CHECK-UNCOMPRESS: 7: 1 fb:1 -; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909 -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 3: 1 -; CHECK-UNCOMPRESS: 5: 1 -; CHECK-UNCOMPRESS: 8: 1 fa:1 -; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909 -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 3: 1 -; CHECK-UNCOMPRESS: 6: 1 fa:1 -; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 3: 1 -; CHECK-UNCOMPRESS: 6: 1 fa:1 -; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 3: 1 -; CHECK-UNCOMPRESS: 6: 1 fa:1 -; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 2: 1 -; CHECK-UNCOMPRESS: 5: 1 fb:1 -; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 2: 1 -; CHECK-UNCOMPRESS: 5: 1 fb:1 -; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 2: 1 -; CHECK-UNCOMPRESS: 5: 1 fb:1 -; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb:6 @ fa]:2:1 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 3: 1 -; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909 -; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:1:0 -; CHECK-UNCOMPRESS: 5: 1 fb:1 -; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 - - -; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4 -; CHECK: 1: 4 -; CHECK: 2: 3 -; CHECK: 3: 1 -; CEHCK: 5: 4 fb:4 -; CHECK: 6: 1 fa:1 -; CHECK !CFGChecksum: 72617220756 -; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:6:2 -; CHECK: 1: 2 -; CHECK: 3: 2 -; CHECK: 4: 1 -; CHECK: 7: 1 fb:1 -; CHECK: !CFGChecksum: 120515930909 -; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1 -; CHECK: 1: 1 -; CHECK: 3: 1 -; CHECK: 4: 1 -; CHECK: 7: 1 fb:1 -; CHECK: !CFGChecksum: 120515930909 -; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa]:4:1 -; CHECK: 1: 1 -; CHECK: 3: 1 -; CHECK: 5: 1 -; CHECK: 8: 1 fa:1 -; CHECK: !CFGChecksum: 120515930909 -; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1 -; CHECK: 1: 1 -; CHECK: 3: 1 -; CHECK: 6: 1 fa:1 -; CHECK: !CFGChecksum: 72617220756 -; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1 -; CHECK: 1: 1 -; CHECK: 3: 1 -; CHECK: 6: 1 fa:1 -; CHECK: !CFGChecksum: 72617220756 - - -; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Range Counter: -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 -; CHECK-UNWINDER: (7a0, 7a7): 1 -; CHECK-UNWINDER: (7a0, 7ab): 3 -; CHECK-UNWINDER: (7b2, 7b5): 1 -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 -; CHECK-UNWINDER: (7c0, 7d4): 1 -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 -; CHECK-UNWINDER: (7c0, 7cd): 1 -; CHECK-UNWINDER: (7db, 7e0): 1 -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 -; CHECK-UNWINDER: (7a0, 7a7): 1 -; CHECK-UNWINDER: (7b2, 7b5): 1 -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 -; CHECK-UNWINDER: (7c0, 7cd): 2 -; CHECK-UNWINDER: (7db, 7e0): 1 -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 -; CHECK-UNWINDER: (7a0, 7a7): 1 -; CHECK-UNWINDER: (7b2, 7b5): 1 - -; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Branch Counter: -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 -; CHECK-UNWINDER: (7a7, 7b2): 1 -; CHECK-UNWINDER: (7ab, 7a0): 4 -; CHECK-UNWINDER: (7b5, 7c0): 1 -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 -; CHECK-UNWINDER: (7d4, 7c0): 1 -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 -; CHECK-UNWINDER: (7cd, 7db): 1 -; CHECK-UNWINDER: (7e0, 7a0): 1 -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 -; CHECK-UNWINDER: (7a7, 7b2): 1 -; CHECK-UNWINDER: (7b5, 7c0): 1 -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 -; CHECK-UNWINDER: (7cd, 7db): 2 -; CHECK-UNWINDER: (7e0, 7a0): 1 -; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 -; CHECK-UNWINDER: (7a7, 7b2): 1 -; CHECK-UNWINDER: (7b5, 7c0): 1 - - -; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling -; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls -; -g test.c -o a.out - -#include - -int fb(int n) { - if(n > 10) return fb(n / 2); - return fa(n - 1); -} - -int fa(int n) { - if(n < 2) return n; - if(n % 2) return fb(n - 1); - return fa(n - 1); -} - -void foo() { - int s, i = 0; - while (i++ < 10000) - s += fa(i); - printf("sum is %d\n", s); -} - -int main() { - foo(); - return 0; -} diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index d05c665f8583..64a502be59a9 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// #include "PerfReader.h" -#include "ProfileGenerator.h" static cl::opt ShowMmapEvents("show-mmap-events", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, @@ -125,8 +124,6 @@ VirtualUnwinder::getOrCreateCounterForProbe(const ProfiledBinary *Binary, ProbeBasedKey->Probes.emplace_back(CallProbe); } } - CSProfileGenerator::compressRecursionContext( - ProbeBasedKey->Probes); ProbeBasedKey->genHashCode(); Hashable ContextId(ProbeBasedKey); auto Ret = CtxCounterMap->emplace(ContextId, SampleCounter()); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index f769bd592f87..ce228a781538 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -22,22 +22,12 @@ static cl::opt OutputFormat( clEnumValN(SPF_GCC, "gcc", "GCC encoding (only meaningful for -sample)"))); -static cl::opt RecursionCompression( - "compress-recursion", - cl::desc("Compressing recursion by deduplicating adjacent frame " - "sequences up to the specified size. -1 means no size limit."), - cl::Hidden, - cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); - using namespace llvm; using namespace sampleprof; namespace llvm { namespace sampleprof { -// Initialize the MaxCompressionSize to -1 which means no size limit -int32_t CSProfileGenerator::MaxCompressionSize = -1; - static bool usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) { return BinarySampleCounters.size() && @@ -329,16 +319,26 @@ void CSProfileGenerator::populateInferredFunctionSamples() { } } -// Helper function to extract context prefix string stack -// Extract context stack for reusing, leaf context stack will -// be added compressed while looking up function profile -static void -extractPrefixContextStack(SmallVectorImpl &ContextStrStack, - const SmallVectorImpl &Probes, - ProfiledBinary *Binary) { +// Helper function to extract context prefix +// PrefixContextId is the context id string except for the leaf probe's +// context, the final ContextId will be: +// ContextId = PrefixContextId + LeafContextId; +// Remind that the string in ContextStrStack is in callee-caller order +// So process the string vector reversely +static std::string +extractPrefixContextId(const SmallVector &Probes, + ProfiledBinary *Binary) { + SmallVector ContextStrStack; for (const auto *P : Probes) { Binary->getInlineContextForProbe(P, ContextStrStack, true); } + std::ostringstream OContextStr; + for (auto &CxtStr : ContextStrStack) { + if (OContextStr.str().size()) + OContextStr << " @ "; + OContextStr << CxtStr; + } + return OContextStr.str(); } void PseudoProbeCSProfileGenerator::generateProfile() { @@ -350,15 +350,15 @@ void PseudoProbeCSProfileGenerator::generateProfile() { for (const auto &CI : BI.second) { const ProbeBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); - SmallVector ContextStrStack; - extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary); + std::string PrefixContextId = + extractPrefixContextId(CtxKey->Probes, Binary); // Fill in function body samples from probes, also infer caller's samples // from callee's probe - populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack, + populateBodySamplesWithProbes(CI.second.RangeCounter, PrefixContextId, Binary); // Fill in boundary samples for a call probe populateBoundarySamplesWithProbes(CI.second.BranchCounter, - ContextStrStack, Binary); + PrefixContextId, Binary); } } } @@ -403,8 +403,8 @@ void PseudoProbeCSProfileGenerator::extractProbesFromRange( } void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( - const RangeSample &RangeCounter, - SmallVectorImpl &ContextStrStack, ProfiledBinary *Binary) { + const RangeSample &RangeCounter, StringRef PrefixContextId, + ProfiledBinary *Binary) { ProbeCounterMap ProbeCounter; // Extract the top frame probes by looking up each address among the range in // the Address2ProbeMap @@ -413,7 +413,7 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( const PseudoProbe *Probe = PI.first; uint64_t Count = PI.second; FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary); + getFunctionProfileForLeafProbe(PrefixContextId, Probe, Binary); FunctionProfile.addBodySamples(Probe->Index, 0, Count); FunctionProfile.addTotalSamples(Count); @@ -446,8 +446,8 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( } void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( - const BranchSample &BranchCounter, - SmallVectorImpl &ContextStrStack, ProfiledBinary *Binary) { + const BranchSample &BranchCounter, StringRef PrefixContextId, + ProfiledBinary *Binary) { for (auto BI : BranchCounter) { uint64_t SourceOffset = BI.first.first; uint64_t TargetOffset = BI.first.second; @@ -457,7 +457,7 @@ void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( if (CallProbe == nullptr) continue; FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary); + getFunctionProfileForLeafProbe(PrefixContextId, CallProbe, Binary); FunctionProfile.addBodySamples(CallProbe->Index, 0, Count); FunctionProfile.addTotalSamples(Count); StringRef CalleeName = FunctionSamples::getCanonicalFnName( @@ -470,26 +470,25 @@ void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( } FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( - SmallVectorImpl &ContextStrStack, + StringRef PrefixContextId, SmallVector &LeafInlinedContext, const PseudoProbeFuncDesc *LeafFuncDesc) { - assert(ContextStrStack.size() && "Profile context must have the leaf frame"); - // Compress the context string except for the leaf frame - std::string LeafFrame = ContextStrStack.back(); - ContextStrStack.pop_back(); - CSProfileGenerator::compressRecursionContext(ContextStrStack); - + assert(LeafInlinedContext.size() && + "Profile context must have the leaf frame"); std::ostringstream OContextStr; - for (uint32_t I = 0; I < ContextStrStack.size(); I++) { + OContextStr << PrefixContextId.str(); + + for (uint32_t I = 0; I < LeafInlinedContext.size() - 1; I++) { if (OContextStr.str().size()) OContextStr << " @ "; - OContextStr << ContextStrStack[I]; + OContextStr << LeafInlinedContext[I]; } // For leaf inlined context with the top frame, we should strip off the top // frame's probe id, like: // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" if (OContextStr.str().size()) OContextStr << " @ "; - OContextStr << StringRef(LeafFrame).split(":").first.str(); + StringRef LeafLoc = LeafInlinedContext.back(); + OContextStr << LeafLoc.split(":").first.str(); FunctionSamples &FunctionProile = getFunctionProfileForContext(OContextStr.str()); @@ -498,18 +497,17 @@ FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( } FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( - SmallVectorImpl &ContextStrStack, const PseudoProbe *LeafProbe, + StringRef PrefixContextId, const PseudoProbe *LeafProbe, ProfiledBinary *Binary) { - // Explicitly copy the context for appending the leaf context - SmallVector ContextStrStackCopy(ContextStrStack.begin(), - ContextStrStack.end()); - Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy); + SmallVector LeafInlinedContext; + Binary->getInlineContextForProbe(LeafProbe, LeafInlinedContext); // Note that the context from probe doesn't include leaf frame, // hence we need to retrieve and append the leaf frame. const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID); - ContextStrStackCopy.emplace_back(FuncDesc->FuncName + ":" + - Twine(LeafProbe->Index).str()); - return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc); + LeafInlinedContext.emplace_back(FuncDesc->FuncName + ":" + + Twine(LeafProbe->Index).str()); + return getFunctionProfileForLeafProbe(PrefixContextId, LeafInlinedContext, + FuncDesc); } } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h index b37011b2b762..29f528026a0c 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -50,7 +50,6 @@ protected: */ void findDisjointRanges(RangeSample &DisjointRanges, const RangeSample &Ranges); - // Used by SampleProfileWriter StringMap ProfileMap; }; @@ -92,110 +91,6 @@ public: populateInferredFunctionSamples(); } - // Remove adjacent repeated context sequences up to a given sequence length, - // -1 means no size limit. Note that repeated sequences are identified based - // on the exact call site, this is finer granularity than function recursion. - template - static void compressRecursionContext(SmallVectorImpl &Context, - int32_t CSize = MaxCompressionSize) { - uint32_t I = 1; - uint32_t HS = static_cast(Context.size() / 2); - uint32_t MaxDedupSize = - CSize == -1 ? HS : std::min(static_cast(CSize), HS); - auto BeginIter = Context.begin(); - // Use an in-place algorithm to save memory copy - // End indicates the end location of current iteration's data - uint32_t End = 0; - // Deduplicate from length 1 to the max possible size of a repeated - // sequence. - while (I <= MaxDedupSize) { - // This is a linear algorithm that deduplicates adjacent repeated - // sequences of size I. The deduplication detection runs on a sliding - // window whose size is 2*I and it keeps sliding the window to deduplicate - // the data inside. Once duplication is detected, deduplicate it by - // skipping the right half part of the window, otherwise just copy back - // the new one by appending them at the back of End pointer(for the next - // iteration). - // - // For example: - // Input: [a1, a2, b1, b2] - // (Added index to distinguish the same char, the origin is [a, a, b, - // b], the size of the dedup window is 2(I = 1) at the beginning) - // - // 1) The initial status is a dummy window[null, a1], then just copy the - // right half of the window(End = 0), then slide the window. - // Result: [a1], a2, b1, b2 (End points to the element right before ], - // after ] is the data of the previous iteration) - // - // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of - // the window i.e the duplication happen. Only slide the window. - // Result: [a1], a2, b1, b2 - // - // 3) Next window is [a2, b1], copy the right half of the window(b1 is - // new) to the End and slide the window. - // Result: [a1, b1], b1, b2 - // - // 4) Next window is [b1, b2], same to 2), skip b2. - // Result: [a1, b1], b1, b2 - // After resize, it will be [a, b] - - // Use pointers like below to do comparison inside the window - // [a b c a b c] - // | | | | | - // LeftBoundary Left Right Left+I Right+I - // A duplication found if Left < LeftBoundry. - - int32_t Right = I - 1; - End = I; - int32_t LeftBoundary = 0; - while (Right + I < Context.size()) { - // To avoids scanning a part of a sequence repeatedly, it finds out - // the common suffix of two hald in the window. The common suffix will - // serve as the common prefix of next possible pair of duplicate - // sequences. The non-common part will be ignored and never scanned - // again. - - // For example. - // Input: [a, b1], c1, b2, c2 - // I = 2 - // - // 1) For the window [a, b1, c1, b2], non-common-suffix for the right - // part is 'c1', copy it and only slide the window 1 step. - // Result: [a, b1, c1], b2, c2 - // - // 2) Next window is [b1, c1, b2, c2], so duplication happen. - // Result after resize: [a, b, c] - - int32_t Left = Right; - while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) { - // Find the longest suffix inside the window. When stops, Left points - // at the diverging point in the current sequence. - Left--; - } - - bool DuplicationFound = (Left < LeftBoundary); - // Don't need to recheck the data before Right - LeftBoundary = Right + 1; - if (DuplicationFound) { - // Duplication found, skip right half of the window. - Right += I; - } else { - // Copy the non-common-suffix part of the adjacent sequence. - std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1, - BeginIter + End); - End += Left + I - Right; - // Only slide the window by the size of non-common-suffix - Right = Left + I; - } - } - // Don't forget the remaining part that's not scanned. - std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End); - End += Context.size() - Right - 1; - I++; - Context.resize(End); - } - } - protected: // Lookup or create FunctionSamples for the context FunctionSamples &getFunctionProfileForContext(StringRef ContextId); @@ -214,11 +109,6 @@ private: const BranchSample &BranchCounters, ProfiledBinary *Binary); void populateInferredFunctionSamples(); - -public: - // Deduplicate adjacent repeated context sequences up to a given sequence - // length. -1 means no size limit. - static int32_t MaxCompressionSize; }; using ProbeCounterMap = std::unordered_map; @@ -237,23 +127,22 @@ private: ProbeCounterMap &ProbeCounter, ProfiledBinary *Binary); // Fill in function body samples from probes - void - populateBodySamplesWithProbes(const RangeSample &RangeCounter, - SmallVectorImpl &ContextStrStack, - ProfiledBinary *Binary); + void populateBodySamplesWithProbes(const RangeSample &RangeCounter, + StringRef PrefixContextId, + ProfiledBinary *Binary); // Fill in boundary samples for a call probe - void populateBoundarySamplesWithProbes( - const BranchSample &BranchCounter, - SmallVectorImpl &ContextStrStack, ProfiledBinary *Binary); + void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, + StringRef PrefixContextId, + ProfiledBinary *Binary); // Helper function to get FunctionSamples for the leaf inlined context - FunctionSamples & - getFunctionProfileForLeafProbe(SmallVectorImpl &ContextStrStack, - const PseudoProbeFuncDesc *LeafFuncDesc); + FunctionSamples &getFunctionProfileForLeafProbe( + StringRef PrefixContextId, + SmallVector &LeafInlinedContext, + const PseudoProbeFuncDesc *LeafFuncDesc); // Helper function to get FunctionSamples for the leaf probe - FunctionSamples & - getFunctionProfileForLeafProbe(SmallVectorImpl &ContextStrStack, - const PseudoProbe *LeafProbe, - ProfiledBinary *Binary); + FunctionSamples &getFunctionProfileForLeafProbe(StringRef PrefixContextId, + const PseudoProbe *LeafProbe, + ProfiledBinary *Binary); }; } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 16ef04aba99e..4b31dff8cd02 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -8,7 +8,6 @@ #include "ProfiledBinary.h" #include "ErrorHandling.h" -#include "ProfileGenerator.h" #include "llvm/ADT/Triple.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Support/CommandLine.h" @@ -129,7 +128,7 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1, std::string ProfiledBinary::getExpandedContextStr(const std::list &Stack) const { std::string ContextStr; - SmallVector ContextVec; + SmallVector ContextVec; // Process from frame root to leaf for (auto Iter = Stack.rbegin(); Iter != Stack.rend(); Iter++) { uint64_t Offset = virtualAddrToOffset(*Iter); @@ -140,22 +139,21 @@ ProfiledBinary::getExpandedContextStr(const std::list &Stack) const { } assert(ContextVec.size() && "Context length should be at least 1"); - // Compress the context string except for the leaf frame - std::string LeafFrame = ContextVec.back(); - ContextVec.pop_back(); - CSProfileGenerator::compressRecursionContext(ContextVec); std::ostringstream OContextStr; for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) { if (OContextStr.str().size()) { OContextStr << " @ "; } - OContextStr << ContextVec[I]; + + if (I == ContextVec.size() - 1) { + // Only keep the function name for the leaf frame + StringRef Ref(ContextVec[I]); + OContextStr << Ref.split(":").first.str(); + } else { + OContextStr << ContextVec[I]; + } } - // Only keep the function name for the leaf frame - if (OContextStr.str().size()) - OContextStr << " @ "; - OContextStr << StringRef(LeafFrame).split(":").first.str(); return OContextStr.str(); } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 552037b6c319..5408116a0354 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -243,7 +243,7 @@ public: } void getInlineContextForProbe(const PseudoProbe *Probe, - SmallVectorImpl &InlineContextStack, + SmallVector &InlineContextStack, bool IncludeLeaf = false) const { return ProbeDecoder.getInlineContextForProbe(Probe, InlineContextStack, IncludeLeaf); diff --git a/llvm/tools/llvm-profgen/PseudoProbe.cpp b/llvm/tools/llvm-profgen/PseudoProbe.cpp index a537d9012e6d..700984e2184a 100644 --- a/llvm/tools/llvm-profgen/PseudoProbe.cpp +++ b/llvm/tools/llvm-profgen/PseudoProbe.cpp @@ -34,7 +34,7 @@ void PseudoProbeFuncDesc::print(raw_ostream &OS) { OS << "Hash: " << FuncHash << "\n"; } -void PseudoProbe::getInlineContext(SmallVectorImpl &ContextStack, +void PseudoProbe::getInlineContext(SmallVector &ContextStack, const GUIDProbeFunctionMap &GUID2FuncMAP, bool ShowName) const { uint32_t Begin = ContextStack.size(); @@ -320,7 +320,7 @@ PseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const { } void PseudoProbeDecoder::getInlineContextForProbe( - const PseudoProbe *Probe, SmallVectorImpl &InlineContextStack, + const PseudoProbe *Probe, SmallVector &InlineContextStack, bool IncludeLeaf) const { Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap, true); if (!IncludeLeaf) diff --git a/llvm/tools/llvm-profgen/PseudoProbe.h b/llvm/tools/llvm-profgen/PseudoProbe.h index 207772453c97..a6647eb39c7a 100644 --- a/llvm/tools/llvm-profgen/PseudoProbe.h +++ b/llvm/tools/llvm-profgen/PseudoProbe.h @@ -138,7 +138,7 @@ struct PseudoProbe { // Get the inlined context by traversing current inline tree backwards, // each tree node has its InlineSite which is taken as the context. // \p ContextStack is populated in root to leaf order - void getInlineContext(SmallVectorImpl &ContextStack, + void getInlineContext(SmallVector &ContextStack, const GUIDProbeFunctionMap &GUID2FuncMAP, bool ShowName) const; // Helper function to get the string from context stack @@ -214,7 +214,7 @@ public: // IncludeLeaf = false, Output: [main:1, foo:2] void getInlineContextForProbe(const PseudoProbe *Probe, - SmallVectorImpl &InlineContextStack, + SmallVector &InlineContextStack, bool IncludeLeaf) const; const AddressProbesMap &getAddress2ProbesMap() const { diff --git a/llvm/unittests/tools/CMakeLists.txt b/llvm/unittests/tools/CMakeLists.txt index 7861da8c0e38..e7c7dca68d49 100644 --- a/llvm/unittests/tools/CMakeLists.txt +++ b/llvm/unittests/tools/CMakeLists.txt @@ -7,4 +7,4 @@ endif() add_subdirectory( llvm-exegesis ) -add_subdirectory(llvm-profgen) + diff --git a/llvm/unittests/tools/llvm-profgen/CMakeLists.txt b/llvm/unittests/tools/llvm-profgen/CMakeLists.txt deleted file mode 100644 index 734e4b0cd00d..000000000000 --- a/llvm/unittests/tools/llvm-profgen/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_llvm_unittest(LLVMProfgenTests - ContextCompressionTest.cpp - ) - -target_link_libraries(LLVMProfgenTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/tools/llvm-profgen/ContextCompressionTest.cpp b/llvm/unittests/tools/llvm-profgen/ContextCompressionTest.cpp deleted file mode 100644 index 7f0cee8878af..000000000000 --- a/llvm/unittests/tools/llvm-profgen/ContextCompressionTest.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===-- ContextCompressionTest.cpp -------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "../tools/llvm-profgen/ProfileGenerator.h" -#include "gtest/gtest.h" - -using namespace llvm; -using namespace sampleprof; - -TEST(TestCompression, TestNoSizeLimit1) { - SmallVector Context = {"a", "b", "c", "a", "b", "c"}; - SmallVector Expect = {"a", "b", "c"}; - CSProfileGenerator::compressRecursionContext(Context, -1); - EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin())); -} - -TEST(TestCompression, TestNoSizeLimit2) { - SmallVector Context = {"m", "a", "a", "b", "c", "a", - "b", "c", "b", "c", "d"}; - SmallVector Expect = {"m", "a", "b", "c", "d"}; - CSProfileGenerator::compressRecursionContext(Context, -1); - EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin())); -} - -TEST(TestCompression, TestMaxDedupSize) { - SmallVector Context = {"m", "a", "a", "b", "c", "a", - "b", "c", "b", "c", "d"}; - SmallVector Expect = {"m", "a", "b", "c", - "a", "b", "c", "d"}; - CSProfileGenerator::compressRecursionContext(Context, 2); - EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin())); -}