[InstrProfiling] Use llvm.compiler.used instead of llvm.used for ELF

Many optimizers (e.g.  GlobalOpt/ConstantMerge) do not respect linker semantics
for comdat and may not discard the sections as a unit.

The interconnected `__llvm_prf_{cnts,data}` sections (in comdat for ELF)
are similar to D97432: `__profd_` is not directly referenced, so
`__profd_` may be discarded while `__profc_` is retained, breaking the
interconnection.  We currently conservatively add all such sections to
`llvm.used` and let the linker do GC for ELF.

In D97448, we will change GlobalObject's in the llvm.used list to use SHF_GNU_RETAIN,
causing the metadata sections to be unnecessarily retained (some `check-profile` tests check for GC).
Use `llvm.compiler.used` to retain the current GC behavior.

Differential Revision: https://reviews.llvm.org/D97585
This commit is contained in:
Fangrui Song 2021-02-26 16:14:03 -08:00
parent 5077d42cfa
commit bf176c49e8
2 changed files with 35 additions and 11 deletions

View File

@ -1086,7 +1086,16 @@ bool InstrProfiling::emitRuntimeHook() {
}
void InstrProfiling::emitUses() {
if (!UsedVars.empty())
// The metadata sections are parallel arrays. Optimizers (e.g.
// GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
// we conservatively retain all unconditionally in the compiler.
//
// On ELF, the linker can guarantee the associated sections will be retained
// or discarded as a unit, so llvm.compiler.used is sufficient. Otherwise,
// conservatively make all of them retained by the linker.
if (TT.isOSBinFormatELF())
appendToCompilerUsed(*M, UsedVars);
else
appendToUsed(*M, UsedVars);
}

View File

@ -1,7 +1,8 @@
; RUN: opt < %s -instrprof -S | FileCheck %s
; RUN: opt < %s -passes=instrprof -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64 -passes=instrprof -S | FileCheck %s --check-prefixes=CHECK,ELF
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -passes=instrprof -S | FileCheck %s --check-prefixes=CHECK,MACHO
; RUN: opt < %s -mtriple=x86_64-windows -passes=instrprof -S | FileCheck %s --check-prefixes=CHECK,WIN
target triple = "x86_64-apple-macosx10.10.0"
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s
; CHECK: @__llvm_profile_runtime = external global i32
@ -12,22 +13,34 @@ target triple = "x86_64-apple-macosx10.10.0"
@__profn_baz = hidden constant [3 x i8] c"baz"
; CHECK-NOT: __profn_baz
; CHECK: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
; CHECK: @__profd_foo = hidden {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8
; ELF: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profd_foo), align 8
; ELF: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", comdat, align 8
; MACHO: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
; MACHO: @__profd_foo = hidden {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8
; WIN: @__profc_foo = internal global [1 x i64] zeroinitializer, section ".lprfc$M", align 8
; WIN: @__profd_foo = internal {{.*}}, section ".lprfd$M", align 8
define void @foo() {
call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
; CHECK: @__profc_bar = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
; CHECK: @__profd_bar = hidden {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8
; ELF: @__profc_bar = hidden global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profd_bar), align 8
; ELF: @__profd_bar = hidden {{.*}}, section "__llvm_prf_data", comdat, align 8
; MACHO: @__profc_bar = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
; MACHO: @__profd_bar = hidden {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8
; WIN: @__profc_bar = internal global [1 x i64] zeroinitializer, section ".lprfc$M", align 8
; WIN: @__profd_bar = internal {{.*}}, section ".lprfd$M", align 8
define void @bar() {
call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_bar, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
; CHECK: @__profc_baz = hidden global [3 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
; CHECK: @__profd_baz = hidden {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8
; ELF: @__profc_baz = hidden global [3 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profd_baz), align 8
; ELF: @__profd_baz = hidden {{.*}}, section "__llvm_prf_data", comdat, align 8
; MACHO: @__profc_baz = hidden global [3 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
; MACHO: @__profd_baz = hidden {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8
; WIN: @__profc_baz = internal global [3 x i64] zeroinitializer, section ".lprfc$M", align 8
; WIN: @__profd_baz = internal {{.*}}, section ".lprfd$M", align 8
define void @baz() {
call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_baz, i32 0, i32 0), i64 0, i32 3, i32 0)
call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_baz, i32 0, i32 0), i64 0, i32 3, i32 1)
@ -37,4 +50,6 @@ define void @baz() {
declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
; CHECK: @llvm.used = appending global {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz {{.*}} section "llvm.metadata"
; ELF: @llvm.compiler.used = appending global {{.*}} @__llvm_profile_runtime_user {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz
; MACHO: @llvm.used = appending global {{.*}} @__llvm_profile_runtime_user {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz
; WIN: @llvm.used = appending global {{.*}} @__llvm_profile_runtime_user {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz