forked from OSchip/llvm-project
[InstrProfiling] Place __llvm_prf_vnodes and __llvm_prf_names in llvm.used on ELF
`__llvm_prf_vnodes` and `__llvm_prf_names` are used by runtime but not referenced via relocation in the translation unit. With `-z start-stop-gc` (D96914 https://sourceware.org/bugzilla/show_bug.cgi?id=27451), the linker no longer lets `__start_/__stop_` references retain them. Place `__llvm_prf_vnodes` and `__llvm_prf_names` in `llvm.used` to make them retained by the linker. This patch changes most existing `UsedVars` cases to `CompilerUsedVars` to reflect the ideal state - if the binary format properly supports section based GC (dead stripping), `llvm.compiler.used` should be sufficient. `__llvm_prf_vnodes` and `__llvm_prf_names` are switched to `UsedVars` since we want them to be unconditionally retained by both compiler and linker. Behaviors on other COFF/Mach-O are not affected. Differential Revision: https://reviews.llvm.org/D97649
This commit is contained in:
parent
5a9c34918b
commit
04c3040f41
|
@ -0,0 +1,70 @@
|
|||
#include <string.h>
|
||||
|
||||
void (*f0)();
|
||||
void (*f1)();
|
||||
void (*f2)();
|
||||
|
||||
char dst[200];
|
||||
char src[200];
|
||||
volatile int n;
|
||||
|
||||
__attribute__((noinline)) void foo() {}
|
||||
|
||||
__attribute__((noinline)) void bar() {
|
||||
f0 = foo;
|
||||
f1 = foo;
|
||||
f2 = foo;
|
||||
n = 4;
|
||||
}
|
||||
int main(int argc, char *argv[]) {
|
||||
int i;
|
||||
bar();
|
||||
if (argc == 1) {
|
||||
f0();
|
||||
for (i = 0; i < 9; i++)
|
||||
f1();
|
||||
for (i = 0; i < 99; i++)
|
||||
f2();
|
||||
} else {
|
||||
memcpy((void *)dst, (void *)src, n);
|
||||
for (i = 0; i < 6; i++)
|
||||
memcpy((void *)(dst + 2), (void *)src, n + 1);
|
||||
for (i = 0; i < 66; i++)
|
||||
memcpy((void *)(dst + 9), (void *)src, n + 2);
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: Counters:
|
||||
// CHECK: main:
|
||||
// CHECK: Hash: 0x0a9bd81e87ab6e87
|
||||
// CHECK: Counters: 6
|
||||
// CHECK: Indirect Call Site Count: 3
|
||||
// CHECK: Number of Memory Intrinsics Calls: 3
|
||||
// CHECK: Block counts: [27, 297, 12, 132, 3, 2]
|
||||
// CHECK: Indirect Target Results:
|
||||
// CHECK: [ 0, foo, 3 ]
|
||||
// CHECK: [ 1, foo, 27 ]
|
||||
// CHECK: [ 2, foo, 297 ]
|
||||
// CHECK: Memory Intrinsic Size Results:
|
||||
// CHECK: [ 0, 4, 2 ]
|
||||
// CHECK: [ 1, 5, 12 ]
|
||||
// CHECK: [ 2, 6, 132 ]
|
||||
// CHECK: Instrumentation level: IR
|
||||
// CHECK: Functions shown: 1
|
||||
// CHECK: Total functions: 3
|
||||
// CHECK: Maximum function count: 327
|
||||
// CHECK: Maximum internal block count: 297
|
||||
// CHECK: Statistics for indirect call sites profile:
|
||||
// CHECK: Total number of sites: 3
|
||||
// CHECK: Total number of sites with values: 3
|
||||
// CHECK: Total number of profiled values: 3
|
||||
// CHECK: Value sites histogram:
|
||||
// CHECK: NumTargets, SiteCount
|
||||
// CHECK: 1, 3
|
||||
// CHECK: Statistics for memory intrinsic calls sizes profile:
|
||||
// CHECK: Total number of sites: 3
|
||||
// CHECK: Total number of sites with values: 3
|
||||
// CHECK: Total number of profiled values: 3
|
||||
// CHECK: Value sites histogram:
|
||||
// CHECK: NumTargets, SiteCount
|
||||
// CHECK: 1, 3
|
|
@ -0,0 +1,10 @@
|
|||
// REQUIRES: lld-available
|
||||
|
||||
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=lld -ffunction-sections -fdata-sections -Wl,--gc-sections -z start-stop-gc
|
||||
// RUN: rm -rf %t.profdir
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
|
||||
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
|
|
@ -1,79 +1,27 @@
|
|||
// RUN: %clang_pgogen -o %t -O3 %s
|
||||
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c
|
||||
// RUN: rm -rf %t.profdir
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
|
||||
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %s
|
||||
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
|
||||
|
||||
#include <string.h>
|
||||
/// -z start-stop-gc requires binutils 2.37.
|
||||
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=bfd -ffunction-sections -fdata-sections -Wl,--gc-sections
|
||||
// RUN: rm -rf %t.profdir
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
|
||||
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
|
||||
|
||||
void (*f0)();
|
||||
void (*f1)();
|
||||
void (*f2)();
|
||||
|
||||
char dst[200];
|
||||
char src[200];
|
||||
volatile int n;
|
||||
|
||||
__attribute__((noinline)) void foo() {}
|
||||
|
||||
__attribute__((noinline)) void bar() {
|
||||
f0 = foo;
|
||||
f1 = foo;
|
||||
f2 = foo;
|
||||
n = 4;
|
||||
}
|
||||
int main(int argc, char *argv[]) {
|
||||
int i;
|
||||
bar();
|
||||
if (argc == 1) {
|
||||
f0();
|
||||
for (i = 0; i < 9; i++)
|
||||
f1();
|
||||
for (i = 0; i < 99; i++)
|
||||
f2();
|
||||
} else {
|
||||
memcpy((void *)dst, (void *)src, n);
|
||||
for (i = 0; i < 6; i++)
|
||||
memcpy((void *)(dst + 2), (void *)src, n + 1);
|
||||
for (i = 0; i < 66; i++)
|
||||
memcpy((void *)(dst + 9), (void *)src, n + 2);
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: Counters:
|
||||
// CHECK: main:
|
||||
// CHECK: Hash: 0x0a9bd81e87ab6e87
|
||||
// CHECK: Counters: 6
|
||||
// CHECK: Indirect Call Site Count: 3
|
||||
// CHECK: Number of Memory Intrinsics Calls: 3
|
||||
// CHECK: Block counts: [27, 297, 12, 132, 3, 2]
|
||||
// CHECK: Indirect Target Results:
|
||||
// CHECK: [ 0, foo, 3 ]
|
||||
// CHECK: [ 1, foo, 27 ]
|
||||
// CHECK: [ 2, foo, 297 ]
|
||||
// CHECK: Memory Intrinsic Size Results:
|
||||
// CHECK: [ 0, 4, 2 ]
|
||||
// CHECK: [ 1, 5, 12 ]
|
||||
// CHECK: [ 2, 6, 132 ]
|
||||
// CHECK: Instrumentation level: IR
|
||||
// CHECK: Functions shown: 1
|
||||
// CHECK: Total functions: 3
|
||||
// CHECK: Maximum function count: 327
|
||||
// CHECK: Maximum internal block count: 297
|
||||
// CHECK: Statistics for indirect call sites profile:
|
||||
// CHECK: Total number of sites: 3
|
||||
// CHECK: Total number of sites with values: 3
|
||||
// CHECK: Total number of profiled values: 3
|
||||
// CHECK: Value sites histogram:
|
||||
// CHECK: NumTargets, SiteCount
|
||||
// CHECK: 1, 3
|
||||
// CHECK: Statistics for memory intrinsic calls sizes profile:
|
||||
// CHECK: Total number of sites: 3
|
||||
// CHECK: Total number of sites with values: 3
|
||||
// CHECK: Total number of profiled values: 3
|
||||
// CHECK: Value sites histogram:
|
||||
// CHECK: NumTargets, SiteCount
|
||||
// CHECK: 1, 3
|
||||
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=gold -ffunction-sections -fdata-sections -Wl,--gc-sections
|
||||
// RUN: rm -rf %t.profdir
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
|
||||
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
|
||||
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
|
||||
|
|
|
@ -57,6 +57,7 @@ private:
|
|||
}
|
||||
};
|
||||
DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
|
||||
std::vector<GlobalValue *> CompilerUsedVars;
|
||||
std::vector<GlobalValue *> UsedVars;
|
||||
std::vector<GlobalVariable *> ReferencedNames;
|
||||
GlobalVariable *NamesVar;
|
||||
|
|
|
@ -539,6 +539,7 @@ bool InstrProfiling::run(
|
|||
NamesVar = nullptr;
|
||||
NamesSize = 0;
|
||||
ProfileDataMap.clear();
|
||||
CompilerUsedVars.clear();
|
||||
UsedVars.clear();
|
||||
TT = Triple(M.getTargetTriple());
|
||||
|
||||
|
@ -921,7 +922,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
|
|||
ProfileDataMap[NamePtr] = PD;
|
||||
|
||||
// Mark the data variable as used so that it isn't stripped out.
|
||||
UsedVars.push_back(Data);
|
||||
CompilerUsedVars.push_back(Data);
|
||||
// Now that the linkage set by the FE has been passed to the data and counter
|
||||
// variables, reset Name variable's linkage and visibility to private so that
|
||||
// it can be removed later by the compiler.
|
||||
|
@ -976,6 +977,8 @@ void InstrProfiling::emitVNodes() {
|
|||
Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
|
||||
VNodesVar->setSection(
|
||||
getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
|
||||
// VNodesVar is used by runtime but not referenced via relocation by other
|
||||
// sections. Conservatively make it linker retained.
|
||||
UsedVars.push_back(VNodesVar);
|
||||
}
|
||||
|
||||
|
@ -1004,6 +1007,8 @@ void InstrProfiling::emitNameData() {
|
|||
// linker from inserting padding before the start of the names section or
|
||||
// between names entries.
|
||||
NamesVar->setAlignment(Align(1));
|
||||
// NamesVar is used by runtime but not referenced via relocation by other
|
||||
// sections. Conservatively make it linker retained.
|
||||
UsedVars.push_back(NamesVar);
|
||||
|
||||
for (auto *NamePtr : ReferencedNames)
|
||||
|
@ -1031,6 +1036,9 @@ void InstrProfiling::emitRegistration() {
|
|||
getInstrProfRegFuncName(), M);
|
||||
|
||||
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
|
||||
for (Value *Data : CompilerUsedVars)
|
||||
if (Data != NamesVar && !isa<Function>(Data))
|
||||
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
|
||||
for (Value *Data : UsedVars)
|
||||
if (Data != NamesVar && !isa<Function>(Data))
|
||||
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
|
||||
|
@ -1081,7 +1089,7 @@ bool InstrProfiling::emitRuntimeHook() {
|
|||
IRB.CreateRet(Load);
|
||||
|
||||
// Mark the user variable as used so that it isn't stripped out.
|
||||
UsedVars.push_back(User);
|
||||
CompilerUsedVars.push_back(User);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1094,9 +1102,14 @@ void InstrProfiling::emitUses() {
|
|||
// or discarded as a unit, so llvm.compiler.used is sufficient. Otherwise,
|
||||
// conservatively make all of them retained by the linker.
|
||||
if (TT.isOSBinFormatELF())
|
||||
appendToCompilerUsed(*M, UsedVars);
|
||||
appendToCompilerUsed(*M, CompilerUsedVars);
|
||||
else
|
||||
appendToUsed(*M, UsedVars);
|
||||
appendToUsed(*M, CompilerUsedVars);
|
||||
|
||||
// We do not add proper references from used metadata sections to NamesVar and
|
||||
// VNodesVar, so we have to be conservative and place them in llvm.used
|
||||
// regardless of the target,
|
||||
appendToUsed(*M, UsedVars);
|
||||
}
|
||||
|
||||
void InstrProfiling::emitInitialization() {
|
||||
|
|
|
@ -50,6 +50,12 @@ attributes #0 = { nounwind }
|
|||
; DYN-NOT: @__profvp_foo
|
||||
; DYN-NOT: @__llvm_prf_vnodes
|
||||
|
||||
;; __llvm_prf_vnodes and __llvm_prf_nm are not referenced by other metadata sections.
|
||||
;; We have to conservatively place them in llvm.used.
|
||||
; STATIC: @llvm.used = appending global
|
||||
; STATIC-SAME: @__llvm_prf_vnodes
|
||||
; STATIC-SAME: @__llvm_prf_nm
|
||||
|
||||
; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0)
|
||||
; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0)
|
||||
; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0)
|
||||
|
|
Loading…
Reference in New Issue