[InstrProf] Add single byte coverage mode

Use the llvm flag `-pgo-function-entry-coverage` to create single byte "counters" to track functions coverage. This mode has significantly less size overhead in both code and data because
  * We mark a function as "covered" with a store instead of an increment which generally requires fewer assembly instructions
  * We use a single byte per function rather than 8 bytes per block

The trade off of course is that this mode only tells you if a function has been covered. This is useful, for example, to detect dead code.

When combined with debug info correlation [0] we are able to create an instrumented Clang binary that is only 150M (the vanilla Clang binary is 143M). That is an overhead of 7M (4.9%) compared to the default instrumentation (without value profiling) which has an overhead of 31M (21.7%).

[0] https://groups.google.com/g/llvm-dev/c/r03Z6JoN7d4

Reviewed By: kyulee

Differential Revision: https://reviews.llvm.org/D116180
This commit is contained in:
Ellis Hoag 2022-01-27 11:22:43 -08:00
parent 936f247ade
commit 11d3074267
31 changed files with 530 additions and 71 deletions

View File

@ -660,6 +660,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
* generated profile, and 0 if this is a Clang FE generated profile.
* 1 in bit 57 indicates there are context-sensitive records in the profile.
* The 59th bit indicates whether to use debug info to correlate profiles.
* The 60th bit indicates single byte coverage instrumentation.
* The 61st bit indicates function entry instrumentation only.
*/
#define VARIANT_MASKS_ALL 0xff00000000000000ULL
#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
@ -667,6 +669,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
#define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58)
#define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59)
#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60)
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61)
#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
#define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias

View File

@ -45,7 +45,9 @@ COMPILER_RT_VISIBILITY void __llvm_profile_reset_counters(void) {
char *I = __llvm_profile_begin_counters();
char *E = __llvm_profile_end_counters();
memset(I, 0, E - I);
char ResetValue =
(__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE) ? 0xFF : 0;
memset(I, ResetValue, E - I);
const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
const __llvm_profile_data *DataEnd = __llvm_profile_end_data();

View File

@ -65,6 +65,8 @@ uint64_t __llvm_profile_get_data_size(const __llvm_profile_data *Begin,
}
COMPILER_RT_VISIBILITY size_t __llvm_profile_counter_entry_size(void) {
if (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE)
return sizeof(uint8_t);
return sizeof(uint64_t);
}

View File

@ -155,8 +155,14 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
if (SrcCounters < SrcCountersStart || SrcCounters >= SrcNameStart ||
(SrcCounters + __llvm_profile_counter_entry_size() * NC) > SrcNameStart)
return 1;
for (unsigned I = 0; I < NC; I++)
((uint64_t *)DstCounters)[I] += ((uint64_t *)SrcCounters)[I];
for (unsigned I = 0; I < NC; I++) {
if (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE) {
// A value of zero signifies the function is covered.
DstCounters[I] &= SrcCounters[I];
} else {
((uint64_t *)DstCounters)[I] += ((uint64_t *)SrcCounters)[I];
}
}
/* Now merge value profile data. */
if (!VPMergeHook)

View File

@ -8,3 +8,13 @@
// RUN: llvm-profdata merge -o %t.normal.profdata %t.profraw
// RUN: diff %t.normal.profdata %t.profdata
// RUN: %clang_pgogen -o %t.cov -g -mllvm --debug-info-correlate -mllvm -pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
// RUN: env LLVM_PROFILE_FILE=%t.cov.proflite %run %t.cov
// RUN: llvm-profdata merge -o %t.cov.profdata --debug-info=%t.cov.dSYM %t.cov.proflite
// RUN: %clang_pgogen -o %t.cov.normal -mllvm --pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
// RUN: env LLVM_PROFILE_FILE=%t.cov.profraw %run %t.cov.normal
// RUN: llvm-profdata merge -o %t.cov.normal.profdata %t.cov.profraw
// RUN: diff %t.cov.normal.profdata %t.cov.profdata

View File

@ -1,4 +1,5 @@
int foo(int);
int unused(int);
inline int bar(int a) {
while (a > 100)

View File

@ -5,3 +5,5 @@ int foo(int a) {
return 4 * a + 1;
return bar(a);
}
int unused(int a) { return a * a; }

View File

@ -1,7 +1,7 @@
#include "instrprof-debug-info-correlate-bar.h"
typedef int (*FP)(int);
FP Fps[2] = {foo, bar};
FP Fps[3] = {foo, bar, unused};
int main() {
for (int i = 0; i < 5; i++)

View File

@ -14,3 +14,13 @@
// RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite
// RUN: diff %t.normal.profdata %t.profdata
// RUN: %clang_pgogen -o %t.cov -g -mllvm --debug-info-correlate -mllvm -pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
// RUN: env LLVM_PROFILE_FILE=%t.cov.proflite %run %t.cov
// RUN: llvm-profdata merge -o %t.cov.profdata --debug-info=%t.cov %t.cov.proflite
// RUN: %clang_pgogen -o %t.cov.normal -mllvm --pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
// RUN: env LLVM_PROFILE_FILE=%t.cov.profraw %run %t.cov.normal
// RUN: llvm-profdata merge -o %t.cov.normal.profdata %t.cov.profraw
// RUN: diff %t.cov.normal.profdata %t.cov.profdata

View File

@ -0,0 +1,18 @@
// RUN: %clang_pgogen -mllvm -pgo-function-entry-coverage %s -o %t.out
// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.out
// RUN: llvm-profdata merge -o %t.profdata %t.profraw
// RUN: llvm-profdata show --covered %t.profdata | FileCheck %s --check-prefix CHECK --implicit-check-not goo
int foo(int i) { return 4 * i + 1; }
int bar(int i) { return 4 * i + 2; }
int goo(int i) { return 4 * i + 3; }
int main(int argc, char *argv[]) {
foo(5);
argc ? bar(6) : goo(7);
return 0;
}
// CHECK: main
// CHECK: foo
// CHECK: bar

View File

@ -0,0 +1,93 @@
// RUN: %clang_pgogen -O2 -mllvm -pgo-function-entry-coverage -o %t %s
// RUN: %run %t %t.profraw 1 1
// RUN: llvm-profdata show --all-functions --counts %t.profraw | FileCheck %s
// FIXME: llvm-profdata exits with "Malformed instrumentation profile data"
// XFAIL: msvc
#include "profile_test.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
int __llvm_profile_runtime = 0;
uint64_t __llvm_profile_get_size_for_buffer(void);
int __llvm_profile_write_buffer(char *);
void __llvm_profile_reset_counters(void);
int __llvm_profile_merge_from_buffer(const char *, uint64_t);
__attribute__((noinline)) int dumpBuffer(const char *FileN, const char *Buffer,
uint64_t Size) {
FILE *File = fopen(FileN, "w");
if (!File)
return 1;
if (fwrite(Buffer, 1, Size, File) != Size)
return 1;
return fclose(File);
}
int g = 0;
__attribute__((noinline)) void foo(char c) {
if (c == '1')
g++;
else
g--;
}
/* This function is not profiled */
__attribute__((noinline)) void bar(int M) { g += M; }
int main(int argc, const char *argv[]) {
int i;
if (argc < 4)
return 1;
const uint64_t MaxSize = 10000;
static ALIGNED(sizeof(uint64_t)) char Buffer[MaxSize];
uint64_t Size = __llvm_profile_get_size_for_buffer();
if (Size > MaxSize)
return 1;
/* Start profiling. */
__llvm_profile_reset_counters();
foo(argv[2][0]);
/* End profiling by freezing counters. */
if (__llvm_profile_write_buffer(Buffer))
return 1;
/* Its profile will be discarded. */
for (i = 0; i < 10; i++)
bar(1);
/* Start profiling again and merge in previously
saved counters in buffer. */
__llvm_profile_reset_counters();
__llvm_profile_merge_from_buffer(Buffer, Size);
foo(argv[3][0]);
/* End profiling */
if (__llvm_profile_write_buffer(Buffer))
return 1;
/* Its profile will be discarded. */
bar(2);
/* Now it is time to dump the profile to file. */
return dumpBuffer(argv[1], Buffer, Size);
}
// CHECK-LABEL: dumpBuffer:
// CHECK: Counters: 1
// CHECK-NEXT: Block counts: [0]
// CHECK-LABEL: foo:
// CHECK: Counters: 1
// CHECK-NEXT: Block counts: [1]
// CHECK-LABEL: bar:
// CHECK: Counters: 1
// CHECK-NEXT: Block counts: [0]
// CHECK-LABEL: main:
// CHECK: Counters: 1
// CHECK-NEXT: Block counts: [0]

View File

@ -13139,6 +13139,33 @@ Semantics:
""""""""""
See description of '``llvm.instrprof.increment``' intrinsic.
'``llvm.instrprof.cover``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare void @llvm.instrprof.cover(i8* <name>, i64 <hash>,
i32 <num-counters>, i32 <index>)
Overview:
"""""""""
The '``llvm.instrprof.cover``' intrinsic is used to implement coverage
instrumentation.
Arguments:
""""""""""
The arguments are the same as the first four arguments of
'``llvm.instrprof.increment``'.
Semantics:
""""""""""
Similar to the '``llvm.instrprof.increment``' intrinsic, but it stores zero to
the profiling variable to signify that the function has been covered. We store
zero because this is more efficient on some targets.
'``llvm.instrprof.value.profile``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -1194,6 +1194,17 @@ public:
ConstantInt *getIndex() const;
};
/// This represents the llvm.instrprof.cover intrinsic.
class InstrProfCoverInst : public InstrProfInstBase {
public:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::instrprof_cover;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
/// This represents the llvm.instrprof.increment intrinsic.
class InstrProfIncrementInst : public InstrProfInstBase {
public:

View File

@ -582,6 +582,10 @@ def int_experimental_noalias_scope_decl
def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
def int_stackguard : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], []>;
// A cover for instrumentation based profiling.
def int_instrprof_cover : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty,
llvm_i32_ty, llvm_i32_ty]>;
// A counter increment for instrumentation based profiling.
def int_instrprof_increment : Intrinsic<[],
[llvm_ptr_ty, llvm_i64_ty,

View File

@ -285,7 +285,9 @@ enum class InstrProfKind {
IR = 0x2, // An IR-level profile (default when -fprofile-generate is used).
BB = 0x4, // A profile with entry basic block instrumentation.
CS = 0x8, // A context sensitive IR-level profile.
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/CS)
SingleByteCoverage = 0x10, // Use single byte probes for coverage.
FunctionEntryOnly = 0x20, // Only instrument the function entry basic block.
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionEntryOnly)
};
const std::error_category &instrprof_category();
@ -1170,7 +1172,8 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
// aware this is an ir_level profile so it can set the version flag.
GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
bool InstrEntryBBEnabled,
bool DebugInfoCorrelate);
bool DebugInfoCorrelate,
bool PGOFunctionEntryCoverage);
// Create the variable for the profile file name.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);

View File

@ -660,6 +660,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
* generated profile, and 0 if this is a Clang FE generated profile.
* 1 in bit 57 indicates there are context-sensitive records in the profile.
* The 59th bit indicates whether to use debug info to correlate profiles.
* The 60th bit indicates single byte coverage instrumentation.
* The 61st bit indicates function entry instrumentation only.
*/
#define VARIANT_MASKS_ALL 0xff00000000000000ULL
#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
@ -667,6 +669,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
#define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58)
#define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59)
#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60)
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61)
#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
#define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias

View File

@ -100,6 +100,12 @@ public:
/// Return true if we must provide debug info to create PGO profiles.
virtual bool useDebugInfoCorrelate() const { return false; }
/// Return true if the profile has single byte counters representing coverage.
virtual bool hasSingleByteCoverage() const = 0;
/// Return true if the profile only instruments function entries.
virtual bool functionEntryOnly() const = 0;
/// Returns a BitsetEnum describing the attributes of the profile. To check
/// individual attributes prefer using the helpers above.
virtual InstrProfKind getProfileKind() const = 0;
@ -206,6 +212,14 @@ public:
return static_cast<bool>(ProfileKind & InstrProfKind::BB);
}
bool hasSingleByteCoverage() const override {
return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
}
bool functionEntryOnly() const override {
return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly);
}
InstrProfKind getProfileKind() const override { return ProfileKind; }
/// Read the header.
@ -287,6 +301,14 @@ public:
return (Version & VARIANT_MASK_DBG_CORRELATE) != 0;
}
bool hasSingleByteCoverage() const override {
return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0;
}
bool functionEntryOnly() const override {
return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
}
/// Returns a BitsetEnum describing the attributes of the raw instr profile.
InstrProfKind getProfileKind() const override {
InstrProfKind ProfileKind = InstrProfKind::Unknown;
@ -299,6 +321,12 @@ public:
if (Version & VARIANT_MASK_INSTR_ENTRY) {
ProfileKind |= InstrProfKind::BB;
}
if (Version & VARIANT_MASK_BYTE_COVERAGE) {
ProfileKind |= InstrProfKind::SingleByteCoverage;
}
if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
ProfileKind |= InstrProfKind::FunctionEntryOnly;
}
return ProfileKind;
}
@ -359,7 +387,9 @@ private:
return Symtab->getFuncName(swap(NameRef));
}
int getCounterTypeSize() const { return sizeof(uint64_t); }
int getCounterTypeSize() const {
return hasSingleByteCoverage() ? sizeof(uint8_t) : sizeof(uint64_t);
}
};
using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
@ -439,6 +469,8 @@ struct InstrProfReaderIndexBase {
virtual bool isIRLevelProfile() const = 0;
virtual bool hasCSIRLevelProfile() const = 0;
virtual bool instrEntryBBEnabled() const = 0;
virtual bool hasSingleByteCoverage() const = 0;
virtual bool functionEntryOnly() const = 0;
virtual InstrProfKind getProfileKind() const = 0;
virtual Error populateSymtab(InstrProfSymtab &) = 0;
};
@ -492,6 +524,14 @@ public:
return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
}
bool hasSingleByteCoverage() const override {
return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0;
}
bool functionEntryOnly() const override {
return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
}
InstrProfKind getProfileKind() const override {
InstrProfKind ProfileKind = InstrProfKind::Unknown;
if (FormatVersion & VARIANT_MASK_IR_PROF) {
@ -503,6 +543,12 @@ public:
if (FormatVersion & VARIANT_MASK_INSTR_ENTRY) {
ProfileKind |= InstrProfKind::BB;
}
if (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) {
ProfileKind |= InstrProfKind::SingleByteCoverage;
}
if (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
ProfileKind |= InstrProfKind::FunctionEntryOnly;
}
return ProfileKind;
}
@ -564,6 +610,12 @@ public:
return Index->instrEntryBBEnabled();
}
bool hasSingleByteCoverage() const override {
return Index->hasSingleByteCoverage();
}
bool functionEntryOnly() const override { return Index->functionEntryOnly(); }
/// Returns a BitsetEnum describing the attributes of the indexed instr
/// profile.
InstrProfKind getProfileKind() const override {

View File

@ -87,12 +87,25 @@ public:
return Error::success();
}
// Returns true if merging is should fail assuming A and B are incompatible.
auto testIncompatible = [&](InstrProfKind A, InstrProfKind B) {
return (static_cast<bool>(ProfileKind & A) &&
static_cast<bool>(Other & B)) ||
(static_cast<bool>(ProfileKind & B) &&
static_cast<bool>(Other & A));
};
// Check if the profiles are in-compatible. Clang frontend profiles can't be
// merged with other profile types.
if (static_cast<bool>((ProfileKind & InstrProfKind::FE) ^
(Other & InstrProfKind::FE))) {
return make_error<InstrProfError>(instrprof_error::unsupported_version);
}
if (testIncompatible(InstrProfKind::FunctionEntryOnly, InstrProfKind::BB)) {
return make_error<InstrProfError>(
instrprof_error::unsupported_version,
"cannot merge FunctionEntryOnly profiles and BB profiles together");
}
// Now we update the profile type with the bits that are set.
ProfileKind |= Other;

View File

@ -87,21 +87,32 @@ private:
/// Count the number of instrumented value sites for the function.
void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
/// Replace instrprof_value_profile with a call to runtime library.
/// Replace instrprof.value.profile with a call to runtime library.
void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
/// Replace instrprof_increment with an increment of the appropriate value.
/// Replace instrprof.cover with a store instruction to the coverage byte.
void lowerCover(InstrProfCoverInst *Inc);
/// Replace instrprof.increment with an increment of the appropriate value.
void lowerIncrement(InstrProfIncrementInst *Inc);
/// Force emitting of name vars for unused functions.
void lowerCoverageData(GlobalVariable *CoverageNamesVar);
/// Compute the address of the counter value that this profiling instruction
/// acts on.
Value *getCounterAddress(InstrProfInstBase *I);
/// Get the region counters for an increment, creating them if necessary.
///
/// If the counter array doesn't yet exist, the profile data variables
/// referring to them will also be created.
GlobalVariable *getOrCreateRegionCounters(InstrProfInstBase *Inc);
/// Create the region counters.
GlobalVariable *createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
GlobalValue::LinkageTypes Linkage);
/// Emit the section with compressed function names.
void emitNameData();

View File

@ -6870,6 +6870,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_gc_relocate:
visitGCRelocate(cast<GCRelocateInst>(I));
return;
case Intrinsic::instrprof_cover:
llvm_unreachable("instrprof failed to lower a cover");
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
case Intrinsic::instrprof_value_profile:

View File

@ -1185,7 +1185,8 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
// aware this is an ir_level profile so it can set the version flag.
GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
bool InstrEntryBBEnabled,
bool DebugInfoCorrelate) {
bool DebugInfoCorrelate,
bool PGOFunctionEntryCoverage) {
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
Type *IntTy64 = Type::getInt64Ty(M.getContext());
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
@ -1195,6 +1196,9 @@ GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
if (DebugInfoCorrelate)
ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
if (PGOFunctionEntryCoverage)
ProfileVersion |=
VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
auto IRLevelVersionVariable = new GlobalVariable(
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);

View File

@ -479,9 +479,15 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts(
Record.Counts.clear();
Record.Counts.reserve(NumCounters);
for (uint32_t I = 0; I < NumCounters; I++) {
const auto *CounterValue = reinterpret_cast<const uint64_t *>(
CountersStart + CounterBaseOffset + I * getCounterTypeSize());
Record.Counts.push_back(swap(*CounterValue));
const char *Ptr =
CountersStart + CounterBaseOffset + I * getCounterTypeSize();
if (hasSingleByteCoverage()) {
// A value of zero signifies the block is covered.
Record.Counts.push_back(*Ptr == 0 ? 1 : 0);
} else {
const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr);
Record.Counts.push_back(swap(*CounterValue));
}
}
return success();

View File

@ -308,6 +308,10 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
Header.Version |= VARIANT_MASK_CSIR_PROF;
if (static_cast<bool>(ProfileKind & InstrProfKind::BB))
Header.Version |= VARIANT_MASK_INSTR_ENTRY;
if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
Header.Unused = 0;
Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);

View File

@ -456,6 +456,9 @@ bool InstrProfiling::lowerIntrinsics(Function *F) {
} else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
lowerIncrement(IPI);
MadeChange = true;
} else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
lowerCover(IPC);
MadeChange = true;
} else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
lowerValueProfileInst(IPVP);
MadeChange = true;
@ -539,7 +542,8 @@ static bool containsProfilingIntrinsics(Module &M) {
return !F->use_empty();
return false;
};
return containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
return containsIntrinsic(llvm::Intrinsic::instrprof_cover) ||
containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) ||
containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);
}
@ -689,47 +693,58 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
Ind->eraseFromParent();
}
Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
auto *Counters = getOrCreateRegionCounters(I);
IRBuilder<> Builder(I);
auto *Addr = Builder.CreateConstInBoundsGEP2_32(
Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
if (!isRuntimeCounterRelocationEnabled())
return Addr;
Type *Int64Ty = Type::getInt64Ty(M->getContext());
Function *Fn = I->getParent()->getParent();
Instruction &EntryI = Fn->getEntryBlock().front();
LoadInst *LI = dyn_cast<LoadInst>(&EntryI);
if (!LI) {
IRBuilder<> EntryBuilder(&EntryI);
auto *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
if (!Bias) {
// Compiler must define this variable when runtime counter relocation
// is being used. Runtime has a weak external reference that is used
// to check whether that's the case or not.
Bias = new GlobalVariable(
*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
Bias->setVisibility(GlobalVariable::HiddenVisibility);
// A definition that's weak (linkonce_odr) without being in a COMDAT
// section wouldn't lead to link errors, but it would lead to a dead
// data word from every TU but one. Putting it in COMDAT ensures there
// will be exactly one data slot in the link.
if (TT.supportsCOMDAT())
Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
}
LI = EntryBuilder.CreateLoad(Int64Ty, Bias);
}
auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
return Builder.CreateIntToPtr(Add, Addr->getType());
}
void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) {
auto *Addr = getCounterAddress(CoverInstruction);
IRBuilder<> Builder(CoverInstruction);
// We store zero to represent that this block is covered.
Builder.CreateStore(Builder.getInt8(0), Addr);
CoverInstruction->eraseFromParent();
}
void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
auto *Addr = getCounterAddress(Inc);
IRBuilder<> Builder(Inc);
uint64_t Index = Inc->getIndex()->getZExtValue();
Value *Addr = Builder.CreateConstInBoundsGEP2_32(Counters->getValueType(),
Counters, 0, Index);
if (isRuntimeCounterRelocationEnabled()) {
Type *Int64Ty = Type::getInt64Ty(M->getContext());
Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext());
Function *Fn = Inc->getParent()->getParent();
Instruction &I = Fn->getEntryBlock().front();
LoadInst *LI = dyn_cast<LoadInst>(&I);
if (!LI) {
IRBuilder<> Builder(&I);
GlobalVariable *Bias =
M->getGlobalVariable(getInstrProfCounterBiasVarName());
if (!Bias) {
// Compiler must define this variable when runtime counter relocation
// is being used. Runtime has a weak external reference that is used
// to check whether that's the case or not.
Bias = new GlobalVariable(
*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
Bias->setVisibility(GlobalVariable::HiddenVisibility);
// A definition that's weak (linkonce_odr) without being in a COMDAT
// section wouldn't lead to link errors, but it would lead to a dead
// data word from every TU but one. Putting it in COMDAT ensures there
// will be exactly one data slot in the link.
if (TT.supportsCOMDAT())
Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
}
LI = Builder.CreateLoad(Int64Ty, Bias);
}
auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
Addr = Builder.CreateIntToPtr(Add, Int64PtrTy);
}
if (Options.Atomic || AtomicCounterUpdateAll ||
(Index == 0 && AtomicFirstCounter)) {
(Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
MaybeAlign(), AtomicOrdering::Monotonic);
} else {
@ -848,6 +863,31 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
return true;
}
GlobalVariable *
InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
GlobalValue::LinkageTypes Linkage) {
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
auto &Ctx = M->getContext();
GlobalVariable *GV;
if (isa<InstrProfCoverInst>(Inc)) {
auto *CounterTy = Type::getInt8Ty(Ctx);
auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
// TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
std::vector<Constant *> InitialValues(NumCounters,
Constant::getAllOnesValue(CounterTy));
GV = new GlobalVariable(*M, CounterArrTy, false, Linkage,
ConstantArray::get(CounterArrTy, InitialValues),
Name);
GV->setAlignment(Align(1));
} else {
auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
GV = new GlobalVariable(*M, CounterTy, false, Linkage,
Constant::getNullValue(CounterTy), Name);
GV->setAlignment(Align(8));
}
return GV;
}
GlobalVariable *
InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
GlobalVariable *NamePtr = Inc->getName();
@ -914,16 +954,11 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
LLVMContext &Ctx = M->getContext();
ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
// Create the counters variable.
auto *CounterPtr =
new GlobalVariable(*M, CounterTy, false, Linkage,
Constant::getNullValue(CounterTy), CntsVarName);
auto *CounterPtr = createRegionCounters(Inc, CntsVarName, Linkage);
CounterPtr->setVisibility(Visibility);
CounterPtr->setSection(
getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
CounterPtr->setAlignment(Align(8));
MaybeSetComdat(CounterPtr);
CounterPtr->setLinkage(Linkage);
PD.RegionCounters = CounterPtr;

View File

@ -255,6 +255,11 @@ static cl::opt<bool> PGOInstrumentEntry(
"pgo-instrument-entry", cl::init(false), cl::Hidden,
cl::desc("Force to instrument function entry basicblock."));
static cl::opt<bool> PGOFunctionEntryCoverage(
"pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore,
cl::desc(
"Use this option to enable function entry coverage instrumentation."));
static cl::opt<bool>
PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
cl::desc("Fix function entry count in profile use."));
@ -469,9 +474,9 @@ private:
createProfileFileNameVar(M, InstrProfileOutput);
// The variable in a comdat may be discarded by LTO. Ensure the
// declaration will be retained.
appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true,
PGOInstrumentEntry,
DebugInfoCorrelate));
appendToCompilerUsed(M, createIRLevelProfileFlagVar(
M, /*IsCS=*/true, PGOInstrumentEntry,
DebugInfoCorrelate, PGOFunctionEntryCoverage));
return false;
}
std::string InstrProfileOutput;
@ -914,22 +919,39 @@ static void instrumentOneFunc(
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
FuncInfo.FunctionHash);
if (PGOFunctionEntryCoverage) {
assert(!IsCS &&
"entry coverge does not support context-sensitive instrumentation");
auto &EntryBB = F.getEntryBlock();
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
// llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
// i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
{Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
return;
}
std::vector<BasicBlock *> InstrumentBBs;
FuncInfo.getInstrumentBBs(InstrumentBBs);
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
uint32_t I = 0;
Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
for (auto *InstrBB : InstrumentBBs) {
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
assert(Builder.GetInsertPoint() != InstrBB->end() &&
"Cannot get the Instrumentation point");
// llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
// i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
{ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
Builder.getInt32(I++)});
{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
}
// Now instrument select instructions:
@ -1502,6 +1524,8 @@ void PGOUseFunc::annotateIrrLoopHeaderWeights() {
}
void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
if (PGOFunctionEntryCoverage)
return;
Module *M = F.getParent();
IRBuilder<> Builder(&SI);
Type *Int64Ty = Builder.getInt64Ty();
@ -1623,7 +1647,7 @@ static bool InstrumentAllFunctions(
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS)
createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry,
DebugInfoCorrelate);
DebugInfoCorrelate, PGOFunctionEntryCoverage);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
@ -1645,9 +1669,9 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
createProfileFileNameVar(M, CSInstrName);
// The variable in a comdat may be discarded by LTO. Ensure the declaration
// will be retained.
appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true,
PGOInstrumentEntry,
DebugInfoCorrelate));
appendToCompilerUsed(M, createIRLevelProfileFlagVar(
M, /*IsCS=*/true, PGOInstrumentEntry,
DebugInfoCorrelate, PGOFunctionEntryCoverage));
return PreservedAnalyses::all();
}
@ -1844,6 +1868,18 @@ static bool annotateAllFunctions(
ProfileFileName.data(), "Not an IR level instrumentation profile"));
return false;
}
if (PGOReader->hasSingleByteCoverage()) {
Ctx.diagnose(DiagnosticInfoPGOProfile(
ProfileFileName.data(),
"Cannot use coverage profiles for optimization"));
return false;
}
if (PGOReader->functionEntryOnly()) {
Ctx.diagnose(DiagnosticInfoPGOProfile(
ProfileFileName.data(),
"Function entry profiles are not yet supported for optimization"));
return false;
}
// Add the profile summary (read from the header of the indexed summary) here
// so that we can use it below when reading counters (which checks if the

View File

@ -0,0 +1,23 @@
; RUN: opt < %s -instrprof -S | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
@__profn_foo = private constant [3 x i8] c"foo"
; CHECK: @__profc_foo = private global [1 x i8] c"\FF", section "__llvm_prf_cnts", comdat, align 1
@__profn_bar = private constant [3 x i8] c"bar"
; CHECK: @__profc_bar = private global [1 x i8] c"\FF", section "__llvm_prf_cnts", comdat, align 1
define void @_Z3foov() {
call void @llvm.instrprof.cover(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 1, i32 0)
; CHECK: store i8 0, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profc_foo, i32 0, i32 0), align 1
ret void
}
%class.A = type { i32 (...)** }
define dso_local void @_Z3barv(%class.A* nocapture nonnull align 8 %0) unnamed_addr #0 align 2 {
call void @llvm.instrprof.cover(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 87654321, i32 1, i32 0)
; CHECK: store i8 0, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profc_bar, i32 0, i32 0), align 1
ret void
}
declare void @llvm.instrprof.cover(i8*, i64, i32, i32)

View File

@ -0,0 +1,34 @@
; RUN: opt < %s -instrprof -debug-info-correlate -S | opt --O2 -S | FileCheck %s
@__profn_foo = private constant [3 x i8] c"foo"
; CHECK: @__profc_foo
define void @_Z3foov() !dbg !12 {
call void @llvm.instrprof.cover(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 1, i32 0)
; CHECK: store i8 0, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profc_foo
ret void
}
declare void @llvm.instrprof.cover(i8*, i64, i32, i32)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8, !9, !10}
!llvm.ident = !{!11}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "debug-info-correlate-coverage.cpp", directory: "")
!2 = !{i32 7, !"Dwarf Version", i32 4}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 1, !"branch-target-enforcement", i32 0}
!6 = !{i32 1, !"sign-return-address", i32 0}
!7 = !{i32 1, !"sign-return-address-all", i32 0}
!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0}
!9 = !{i32 7, !"uwtable", i32 1}
!10 = !{i32 7, !"frame-pointer", i32 1}
!11 = !{!"clang version 14.0.0"}
!12 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !13, file: !13, line: 1, type: !14, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !16)
!13 = !DIFile(filename: "debug-info-correlate-coverage.cpp", directory: "")
!14 = !DISubroutineType(types: !15)
!15 = !{null}
!16 = !{}

View File

@ -0,0 +1,26 @@
; RUN: opt < %s -passes=pgo-instr-gen -pgo-function-entry-coverage -S | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo(i32 %i) {
entry:
; CHECK: call void @llvm.instrprof.cover({{.*}})
%cmp = icmp sgt i32 %i, 0
br i1 %cmp, label %if.then, label %if.else
if.then:
; CHECK-NOT: llvm.instrprof.cover(
%add = add nsw i32 %i, 2
%s = select i1 %cmp, i32 %add, i32 0
br label %if.end
if.else:
%sub = sub nsw i32 %i, 2
br label %if.end
if.end:
%retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
ret i32 %retv
}
; CHECK: declare void @llvm.instrprof.cover(

View File

@ -0,0 +1,5 @@
// RUN: llvm-profdata show --covered %S/Inputs/function-entry-coverage.profdata | FileCheck %s
// CHECK: main
// CHECK: foo
// CHECK: bar

View File

@ -2092,7 +2092,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
bool ShowAllFunctions, bool ShowCS,
uint64_t ValueCutoff, bool OnlyListBelow,
const std::string &ShowFunction, bool TextFormat,
bool ShowBinaryIds, raw_fd_ostream &OS) {
bool ShowBinaryIds, bool ShowCovered,
raw_fd_ostream &OS) {
auto ReaderOrErr = InstrProfReader::create(Filename);
std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
if (ShowDetailedSummary && Cutoffs.empty()) {
@ -2149,6 +2150,13 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
assert(Func.Counts.size() > 0 && "function missing entry counter");
Builder.addRecord(Func);
if (ShowCovered) {
if (std::any_of(Func.Counts.begin(), Func.Counts.end(),
[](uint64_t C) { return C; }))
OS << Func.Name << "\n";
continue;
}
uint64_t FuncMax = 0;
uint64_t FuncSum = 0;
for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
@ -2225,7 +2233,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
if (Reader->hasError())
exitWithError(Reader->getError(), Filename);
if (TextFormat)
if (TextFormat || ShowCovered)
return 0;
std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
bool IsIR = Reader->isIRLevelProfile();
@ -2576,6 +2584,9 @@ static int show_main(int argc, const char *argv[]) {
"debug-info", cl::init(""),
cl::desc("Read and extract profile metadata from debug info and show "
"the functions it found."));
cl::opt<bool> ShowCovered(
"covered", cl::init(false),
cl::desc("Show only the functions that have been executed."));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
@ -2607,7 +2618,7 @@ static int show_main(int argc, const char *argv[]) {
Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets,
ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs,
ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
TextFormat, ShowBinaryIds, OS);
TextFormat, ShowBinaryIds, ShowCovered, OS);
if (ProfileKind == sample)
return showSampleProfile(Filename, ShowCounts, TopNFunctions,
ShowAllFunctions, ShowDetailedSummary,