[InstrProfiling] Place __llvm_prf_vnodes and __llvm_prf_names in llvm.used on ELF

`__llvm_prf_vnodes` and `__llvm_prf_names` are used by runtime but not
referenced via relocation in the translation unit.

With `-z start-stop-gc` (D96914 https://sourceware.org/bugzilla/show_bug.cgi?id=27451),
the linker no longer lets `__start_/__stop_` references retain them.

Place `__llvm_prf_vnodes` and `__llvm_prf_names` in `llvm.used` to make
them retained by the linker.

This patch changes most existing `UsedVars` cases to `CompilerUsedVars`
to reflect the ideal state - if the binary format properly supports
section based GC (dead stripping), `llvm.compiler.used` should be sufficient.

`__llvm_prf_vnodes` and `__llvm_prf_names` are switched to `UsedVars`
since we want them to be unconditionally retained by both compiler and linker.

Behaviors on other COFF/Mach-O are not affected.

Differential Revision: https://reviews.llvm.org/D97649
This commit is contained in:
Fangrui Song 2021-03-01 13:43:23 -08:00
parent 5a9c34918b
commit 04c3040f41
6 changed files with 123 additions and 75 deletions

View File

@ -0,0 +1,70 @@
#include <string.h>
void (*f0)();
void (*f1)();
void (*f2)();
char dst[200];
char src[200];
volatile int n;
__attribute__((noinline)) void foo() {}
__attribute__((noinline)) void bar() {
f0 = foo;
f1 = foo;
f2 = foo;
n = 4;
}
int main(int argc, char *argv[]) {
int i;
bar();
if (argc == 1) {
f0();
for (i = 0; i < 9; i++)
f1();
for (i = 0; i < 99; i++)
f2();
} else {
memcpy((void *)dst, (void *)src, n);
for (i = 0; i < 6; i++)
memcpy((void *)(dst + 2), (void *)src, n + 1);
for (i = 0; i < 66; i++)
memcpy((void *)(dst + 9), (void *)src, n + 2);
}
}
// CHECK: Counters:
// CHECK: main:
// CHECK: Hash: 0x0a9bd81e87ab6e87
// CHECK: Counters: 6
// CHECK: Indirect Call Site Count: 3
// CHECK: Number of Memory Intrinsics Calls: 3
// CHECK: Block counts: [27, 297, 12, 132, 3, 2]
// CHECK: Indirect Target Results:
// CHECK: [ 0, foo, 3 ]
// CHECK: [ 1, foo, 27 ]
// CHECK: [ 2, foo, 297 ]
// CHECK: Memory Intrinsic Size Results:
// CHECK: [ 0, 4, 2 ]
// CHECK: [ 1, 5, 12 ]
// CHECK: [ 2, 6, 132 ]
// CHECK: Instrumentation level: IR
// CHECK: Functions shown: 1
// CHECK: Total functions: 3
// CHECK: Maximum function count: 327
// CHECK: Maximum internal block count: 297
// CHECK: Statistics for indirect call sites profile:
// CHECK: Total number of sites: 3
// CHECK: Total number of sites with values: 3
// CHECK: Total number of profiled values: 3
// CHECK: Value sites histogram:
// CHECK: NumTargets, SiteCount
// CHECK: 1, 3
// CHECK: Statistics for memory intrinsic calls sizes profile:
// CHECK: Total number of sites: 3
// CHECK: Total number of sites with values: 3
// CHECK: Total number of profiled values: 3
// CHECK: Value sites histogram:
// CHECK: NumTargets, SiteCount
// CHECK: 1, 3

View File

@ -0,0 +1,10 @@
// REQUIRES: lld-available
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=lld -ffunction-sections -fdata-sections -Wl,--gc-sections -z start-stop-gc
// RUN: rm -rf %t.profdir
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c

View File

@ -1,79 +1,27 @@
// RUN: %clang_pgogen -o %t -O3 %s
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c
// RUN: rm -rf %t.profdir
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %s
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
#include <string.h>
/// -z start-stop-gc requires binutils 2.37.
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=bfd -ffunction-sections -fdata-sections -Wl,--gc-sections
// RUN: rm -rf %t.profdir
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
void (*f0)();
void (*f1)();
void (*f2)();
char dst[200];
char src[200];
volatile int n;
__attribute__((noinline)) void foo() {}
__attribute__((noinline)) void bar() {
f0 = foo;
f1 = foo;
f2 = foo;
n = 4;
}
int main(int argc, char *argv[]) {
int i;
bar();
if (argc == 1) {
f0();
for (i = 0; i < 9; i++)
f1();
for (i = 0; i < 99; i++)
f2();
} else {
memcpy((void *)dst, (void *)src, n);
for (i = 0; i < 6; i++)
memcpy((void *)(dst + 2), (void *)src, n + 1);
for (i = 0; i < 66; i++)
memcpy((void *)(dst + 9), (void *)src, n + 2);
}
}
// CHECK: Counters:
// CHECK: main:
// CHECK: Hash: 0x0a9bd81e87ab6e87
// CHECK: Counters: 6
// CHECK: Indirect Call Site Count: 3
// CHECK: Number of Memory Intrinsics Calls: 3
// CHECK: Block counts: [27, 297, 12, 132, 3, 2]
// CHECK: Indirect Target Results:
// CHECK: [ 0, foo, 3 ]
// CHECK: [ 1, foo, 27 ]
// CHECK: [ 2, foo, 297 ]
// CHECK: Memory Intrinsic Size Results:
// CHECK: [ 0, 4, 2 ]
// CHECK: [ 1, 5, 12 ]
// CHECK: [ 2, 6, 132 ]
// CHECK: Instrumentation level: IR
// CHECK: Functions shown: 1
// CHECK: Total functions: 3
// CHECK: Maximum function count: 327
// CHECK: Maximum internal block count: 297
// CHECK: Statistics for indirect call sites profile:
// CHECK: Total number of sites: 3
// CHECK: Total number of sites with values: 3
// CHECK: Total number of profiled values: 3
// CHECK: Value sites histogram:
// CHECK: NumTargets, SiteCount
// CHECK: 1, 3
// CHECK: Statistics for memory intrinsic calls sizes profile:
// CHECK: Total number of sites: 3
// CHECK: Total number of sites with values: 3
// CHECK: Total number of profiled values: 3
// CHECK: Value sites histogram:
// CHECK: NumTargets, SiteCount
// CHECK: 1, 3
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=gold -ffunction-sections -fdata-sections -Wl,--gc-sections
// RUN: rm -rf %t.profdir
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c

View File

@ -57,6 +57,7 @@ private:
}
};
DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
std::vector<GlobalValue *> CompilerUsedVars;
std::vector<GlobalValue *> UsedVars;
std::vector<GlobalVariable *> ReferencedNames;
GlobalVariable *NamesVar;

View File

@ -539,6 +539,7 @@ bool InstrProfiling::run(
NamesVar = nullptr;
NamesSize = 0;
ProfileDataMap.clear();
CompilerUsedVars.clear();
UsedVars.clear();
TT = Triple(M.getTargetTriple());
@ -921,7 +922,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
ProfileDataMap[NamePtr] = PD;
// Mark the data variable as used so that it isn't stripped out.
UsedVars.push_back(Data);
CompilerUsedVars.push_back(Data);
// Now that the linkage set by the FE has been passed to the data and counter
// variables, reset Name variable's linkage and visibility to private so that
// it can be removed later by the compiler.
@ -976,6 +977,8 @@ void InstrProfiling::emitVNodes() {
Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
VNodesVar->setSection(
getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
// VNodesVar is used by runtime but not referenced via relocation by other
// sections. Conservatively make it linker retained.
UsedVars.push_back(VNodesVar);
}
@ -1004,6 +1007,8 @@ void InstrProfiling::emitNameData() {
// linker from inserting padding before the start of the names section or
// between names entries.
NamesVar->setAlignment(Align(1));
// NamesVar is used by runtime but not referenced via relocation by other
// sections. Conservatively make it linker retained.
UsedVars.push_back(NamesVar);
for (auto *NamePtr : ReferencedNames)
@ -1031,6 +1036,9 @@ void InstrProfiling::emitRegistration() {
getInstrProfRegFuncName(), M);
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
for (Value *Data : CompilerUsedVars)
if (Data != NamesVar && !isa<Function>(Data))
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
for (Value *Data : UsedVars)
if (Data != NamesVar && !isa<Function>(Data))
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
@ -1081,7 +1089,7 @@ bool InstrProfiling::emitRuntimeHook() {
IRB.CreateRet(Load);
// Mark the user variable as used so that it isn't stripped out.
UsedVars.push_back(User);
CompilerUsedVars.push_back(User);
return true;
}
@ -1094,9 +1102,14 @@ void InstrProfiling::emitUses() {
// or discarded as a unit, so llvm.compiler.used is sufficient. Otherwise,
// conservatively make all of them retained by the linker.
if (TT.isOSBinFormatELF())
appendToCompilerUsed(*M, UsedVars);
appendToCompilerUsed(*M, CompilerUsedVars);
else
appendToUsed(*M, UsedVars);
appendToUsed(*M, CompilerUsedVars);
// We do not add proper references from used metadata sections to NamesVar and
// VNodesVar, so we have to be conservative and place them in llvm.used
// regardless of the target,
appendToUsed(*M, UsedVars);
}
void InstrProfiling::emitInitialization() {

View File

@ -50,6 +50,12 @@ attributes #0 = { nounwind }
; DYN-NOT: @__profvp_foo
; DYN-NOT: @__llvm_prf_vnodes
;; __llvm_prf_vnodes and __llvm_prf_nm are not referenced by other metadata sections.
;; We have to conservatively place them in llvm.used.
; STATIC: @llvm.used = appending global
; STATIC-SAME: @__llvm_prf_vnodes
; STATIC-SAME: @__llvm_prf_nm
; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0)
; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0)
; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0)