From 04c3040f417683e7c31b3ee3381a3263106f48c5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 1 Mar 2021 13:43:23 -0800 Subject: [PATCH] [InstrProfiling] Place __llvm_prf_vnodes and __llvm_prf_names in llvm.used on ELF `__llvm_prf_vnodes` and `__llvm_prf_names` are used by runtime but not referenced via relocation in the translation unit. With `-z start-stop-gc` (D96914 https://sourceware.org/bugzilla/show_bug.cgi?id=27451), the linker no longer lets `__start_/__stop_` references retain them. Place `__llvm_prf_vnodes` and `__llvm_prf_names` in `llvm.used` to make them retained by the linker. This patch changes most existing `UsedVars` cases to `CompilerUsedVars` to reflect the ideal state - if the binary format properly supports section based GC (dead stripping), `llvm.compiler.used` should be sufficient. `__llvm_prf_vnodes` and `__llvm_prf_names` are switched to `UsedVars` since we want them to be unconditionally retained by both compiler and linker. Behaviors on other COFF/Mach-O are not affected. Differential Revision: https://reviews.llvm.org/D97649 --- .../Linux/Inputs/instrprof-value-merge.c | 70 +++++++++++++++ .../profile/Linux/instrprof-value-merge-lld.c | 10 +++ .../profile/Linux/instrprof-value-merge.c | 90 ++++--------------- .../Instrumentation/InstrProfiling.h | 1 + .../Instrumentation/InstrProfiling.cpp | 21 ++++- .../Instrumentation/InstrProfiling/icall.ll | 6 ++ 6 files changed, 123 insertions(+), 75 deletions(-) create mode 100644 compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c create mode 100644 compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c diff --git a/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c b/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c new file mode 100644 index 000000000000..a23bc484943e --- /dev/null +++ b/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c @@ -0,0 +1,70 @@ +#include + +void (*f0)(); +void (*f1)(); +void (*f2)(); + +char dst[200]; +char src[200]; +volatile int n; + +__attribute__((noinline)) void foo() {} + +__attribute__((noinline)) void bar() { + f0 = foo; + f1 = foo; + f2 = foo; + n = 4; +} +int main(int argc, char *argv[]) { + int i; + bar(); + if (argc == 1) { + f0(); + for (i = 0; i < 9; i++) + f1(); + for (i = 0; i < 99; i++) + f2(); + } else { + memcpy((void *)dst, (void *)src, n); + for (i = 0; i < 6; i++) + memcpy((void *)(dst + 2), (void *)src, n + 1); + for (i = 0; i < 66; i++) + memcpy((void *)(dst + 9), (void *)src, n + 2); + } +} + +// CHECK: Counters: +// CHECK: main: +// CHECK: Hash: 0x0a9bd81e87ab6e87 +// CHECK: Counters: 6 +// CHECK: Indirect Call Site Count: 3 +// CHECK: Number of Memory Intrinsics Calls: 3 +// CHECK: Block counts: [27, 297, 12, 132, 3, 2] +// CHECK: Indirect Target Results: +// CHECK: [ 0, foo, 3 ] +// CHECK: [ 1, foo, 27 ] +// CHECK: [ 2, foo, 297 ] +// CHECK: Memory Intrinsic Size Results: +// CHECK: [ 0, 4, 2 ] +// CHECK: [ 1, 5, 12 ] +// CHECK: [ 2, 6, 132 ] +// CHECK: Instrumentation level: IR +// CHECK: Functions shown: 1 +// CHECK: Total functions: 3 +// CHECK: Maximum function count: 327 +// CHECK: Maximum internal block count: 297 +// CHECK: Statistics for indirect call sites profile: +// CHECK: Total number of sites: 3 +// CHECK: Total number of sites with values: 3 +// CHECK: Total number of profiled values: 3 +// CHECK: Value sites histogram: +// CHECK: NumTargets, SiteCount +// CHECK: 1, 3 +// CHECK: Statistics for memory intrinsic calls sizes profile: +// CHECK: Total number of sites: 3 +// CHECK: Total number of sites with values: 3 +// CHECK: Total number of profiled values: 3 +// CHECK: Value sites histogram: +// CHECK: NumTargets, SiteCount +// CHECK: 1, 3 diff --git a/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c b/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c new file mode 100644 index 000000000000..e0079c02b850 --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c @@ -0,0 +1,10 @@ +// REQUIRES: lld-available + +// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=lld -ffunction-sections -fdata-sections -Wl,--gc-sections -z start-stop-gc +// RUN: rm -rf %t.profdir +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c diff --git a/compiler-rt/test/profile/Linux/instrprof-value-merge.c b/compiler-rt/test/profile/Linux/instrprof-value-merge.c index 2619a1d00336..45eed474b4ab 100644 --- a/compiler-rt/test/profile/Linux/instrprof-value-merge.c +++ b/compiler-rt/test/profile/Linux/instrprof-value-merge.c @@ -1,79 +1,27 @@ -// RUN: %clang_pgogen -o %t -O3 %s +// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c // RUN: rm -rf %t.profdir // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 -// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %s +// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c -#include +/// -z start-stop-gc requires binutils 2.37. +// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=bfd -ffunction-sections -fdata-sections -Wl,--gc-sections +// RUN: rm -rf %t.profdir +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c -void (*f0)(); -void (*f1)(); -void (*f2)(); - -char dst[200]; -char src[200]; -volatile int n; - -__attribute__((noinline)) void foo() {} - -__attribute__((noinline)) void bar() { - f0 = foo; - f1 = foo; - f2 = foo; - n = 4; -} -int main(int argc, char *argv[]) { - int i; - bar(); - if (argc == 1) { - f0(); - for (i = 0; i < 9; i++) - f1(); - for (i = 0; i < 99; i++) - f2(); - } else { - memcpy((void *)dst, (void *)src, n); - for (i = 0; i < 6; i++) - memcpy((void *)(dst + 2), (void *)src, n + 1); - for (i = 0; i < 66; i++) - memcpy((void *)(dst + 9), (void *)src, n + 2); - } -} - -// CHECK: Counters: -// CHECK: main: -// CHECK: Hash: 0x0a9bd81e87ab6e87 -// CHECK: Counters: 6 -// CHECK: Indirect Call Site Count: 3 -// CHECK: Number of Memory Intrinsics Calls: 3 -// CHECK: Block counts: [27, 297, 12, 132, 3, 2] -// CHECK: Indirect Target Results: -// CHECK: [ 0, foo, 3 ] -// CHECK: [ 1, foo, 27 ] -// CHECK: [ 2, foo, 297 ] -// CHECK: Memory Intrinsic Size Results: -// CHECK: [ 0, 4, 2 ] -// CHECK: [ 1, 5, 12 ] -// CHECK: [ 2, 6, 132 ] -// CHECK: Instrumentation level: IR -// CHECK: Functions shown: 1 -// CHECK: Total functions: 3 -// CHECK: Maximum function count: 327 -// CHECK: Maximum internal block count: 297 -// CHECK: Statistics for indirect call sites profile: -// CHECK: Total number of sites: 3 -// CHECK: Total number of sites with values: 3 -// CHECK: Total number of profiled values: 3 -// CHECK: Value sites histogram: -// CHECK: NumTargets, SiteCount -// CHECK: 1, 3 -// CHECK: Statistics for memory intrinsic calls sizes profile: -// CHECK: Total number of sites: 3 -// CHECK: Total number of sites with values: 3 -// CHECK: Total number of profiled values: 3 -// CHECK: Value sites histogram: -// CHECK: NumTargets, SiteCount -// CHECK: 1, 3 +// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=gold -ffunction-sections -fdata-sections -Wl,--gc-sections +// RUN: rm -rf %t.profdir +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h index 5242211138f5..94b156f3b137 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -57,6 +57,7 @@ private: } }; DenseMap ProfileDataMap; + std::vector CompilerUsedVars; std::vector UsedVars; std::vector ReferencedNames; GlobalVariable *NamesVar; diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index d73bb66ed003..a17d6f52d77d 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -539,6 +539,7 @@ bool InstrProfiling::run( NamesVar = nullptr; NamesSize = 0; ProfileDataMap.clear(); + CompilerUsedVars.clear(); UsedVars.clear(); TT = Triple(M.getTargetTriple()); @@ -921,7 +922,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { ProfileDataMap[NamePtr] = PD; // Mark the data variable as used so that it isn't stripped out. - UsedVars.push_back(Data); + CompilerUsedVars.push_back(Data); // Now that the linkage set by the FE has been passed to the data and counter // variables, reset Name variable's linkage and visibility to private so that // it can be removed later by the compiler. @@ -976,6 +977,8 @@ void InstrProfiling::emitVNodes() { Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName()); VNodesVar->setSection( getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat())); + // VNodesVar is used by runtime but not referenced via relocation by other + // sections. Conservatively make it linker retained. UsedVars.push_back(VNodesVar); } @@ -1004,6 +1007,8 @@ void InstrProfiling::emitNameData() { // linker from inserting padding before the start of the names section or // between names entries. NamesVar->setAlignment(Align(1)); + // NamesVar is used by runtime but not referenced via relocation by other + // sections. Conservatively make it linker retained. UsedVars.push_back(NamesVar); for (auto *NamePtr : ReferencedNames) @@ -1031,6 +1036,9 @@ void InstrProfiling::emitRegistration() { getInstrProfRegFuncName(), M); IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF)); + for (Value *Data : CompilerUsedVars) + if (Data != NamesVar && !isa(Data)) + IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy)); for (Value *Data : UsedVars) if (Data != NamesVar && !isa(Data)) IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy)); @@ -1081,7 +1089,7 @@ bool InstrProfiling::emitRuntimeHook() { IRB.CreateRet(Load); // Mark the user variable as used so that it isn't stripped out. - UsedVars.push_back(User); + CompilerUsedVars.push_back(User); return true; } @@ -1094,9 +1102,14 @@ void InstrProfiling::emitUses() { // or discarded as a unit, so llvm.compiler.used is sufficient. Otherwise, // conservatively make all of them retained by the linker. if (TT.isOSBinFormatELF()) - appendToCompilerUsed(*M, UsedVars); + appendToCompilerUsed(*M, CompilerUsedVars); else - appendToUsed(*M, UsedVars); + appendToUsed(*M, CompilerUsedVars); + + // We do not add proper references from used metadata sections to NamesVar and + // VNodesVar, so we have to be conservative and place them in llvm.used + // regardless of the target, + appendToUsed(*M, UsedVars); } void InstrProfiling::emitInitialization() { diff --git a/llvm/test/Instrumentation/InstrProfiling/icall.ll b/llvm/test/Instrumentation/InstrProfiling/icall.ll index 311770ae5707..bc7d6c90d0be 100644 --- a/llvm/test/Instrumentation/InstrProfiling/icall.ll +++ b/llvm/test/Instrumentation/InstrProfiling/icall.ll @@ -50,6 +50,12 @@ attributes #0 = { nounwind } ; DYN-NOT: @__profvp_foo ; DYN-NOT: @__llvm_prf_vnodes +;; __llvm_prf_vnodes and __llvm_prf_nm are not referenced by other metadata sections. +;; We have to conservatively place them in llvm.used. +; STATIC: @llvm.used = appending global +; STATIC-SAME: @__llvm_prf_vnodes +; STATIC-SAME: @__llvm_prf_nm + ; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0) ; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0) ; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0)