Let -basic-block-sections=labels emit basicblock metadata in a new .bb_addr_map section, instead of emitting special unary-encoded symbols.

This patch introduces the new .bb_addr_map section feature which allows us to emit the bits needed for mapping binary profiles to basic blocks into a separate section.
The format of the emitted data is represented as follows. It includes a header for every function:

|  Address of the function                      |  -> 8 bytes (pointer size)
|  Number of basic blocks in this function (>0) |  -> ULEB128

The header is followed by a BB record for every basic block. These records are ordered in the same order as MachineBasicBlocks are placed in the function. Each BB Info is structured as follows:

|  Offset of the basic block relative to function begin |  -> ULEB128
|  Binary size of the basic block                       |  -> ULEB128
|  BB metadata                                          |  -> ULEB128  [ MBB.isReturn() OR MBB.hasTailCall() << 1  OR  MBB.isEHPad() << 2 ]

The new feature will replace the existing "BB labels" functionality with -basic-block-sections=labels.
The .bb_addr_map section scrubs the specially-encoded BB symbols from the binary and makes it friendly to profilers and debuggers.
Furthermore, the new feature reduces the binary size overhead from 70% bloat to only 12%.

For more information and results please refer to the RFC: https://lists.llvm.org/pipermail/llvm-dev/2020-July/143512.html

Reviewed By: MaskRay, snehasish

Differential Revision: https://reviews.llvm.org/D85408
This commit is contained in:
Rahman Lavaee 2020-09-14 10:16:44 -07:00
parent ce6dd973ac
commit 7841e21c98
13 changed files with 184 additions and 129 deletions

View File

@ -1700,9 +1700,12 @@ are listed below.
**-fbasic-block-sections=[labels, all, list=<arg>, none]**
Controls whether Clang emits a label for each basic block. Further, with
values "all" and "list=arg", each basic block or a subset of basic blocks
can be placed in its own unique section.
Controls how Clang emits text sections for basic blocks. With values ``all``
and ``list=<arg>``, each basic block or a subset of basic blocks can be placed
in its own unique section. With the "labels" value, normal text sections are
emitted, but a ``.bb_addr_map`` section is emitted which includes address
offsets for each basic block in the program, relative to the parent function
address.
With the ``list=<arg>`` option, a file containing the subset of basic blocks
that need to placed in unique sections can be specified. The format of the

View File

@ -1,12 +1,11 @@
// REQUIRES: x86-registered-target
// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -o - < %s | FileCheck %s --check-prefix=PLAIN
// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fbasic-block-sections=all -fbasic-block-sections=none -o - < %s | FileCheck %s --check-prefix=PLAIN
// RUN: %clang_cc1 -triple x86_64 -S -o - < %s | FileCheck %s --check-prefix=PLAIN
// RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=all -fbasic-block-sections=none -o - < %s | FileCheck %s --check-prefix=PLAIN
// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fbasic-block-sections=labels -o - < %s | FileCheck %s --check-prefix=BB_LABELS
// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fbasic-block-sections=all -o - < %s | FileCheck %s --check-prefix=BB_WORLD --check-prefix=BB_ALL
// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fbasic-block-sections=list=%S/Inputs/basic-block-sections.funcnames -o - < %s | FileCheck %s --check-prefix=BB_WORLD --check-prefix=BB_LIST
// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fbasic-block-sections=all -funique-basic-block-section-names -o - < %s | FileCheck %s --check-prefix=UNIQUE
// RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=all -o - < %s | FileCheck %s --check-prefix=BB_WORLD --check-prefix=BB_ALL
// RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=list=%S/Inputs/basic-block-sections.funcnames -o - < %s | FileCheck %s --check-prefix=BB_WORLD --check-prefix=BB_LIST
// RUN: %clang_cc1 -triple x86_64 -S -fbasic-block-sections=all -funique-basic-block-section-names -o - < %s | FileCheck %s --check-prefix=UNIQUE
int world(int a) {
if (a > 10)
@ -26,12 +25,6 @@ int another(int a) {
// PLAIN-NOT: section
// PLAIN: world:
//
// BB_LABELS-NOT: section
// BB_LABELS: world:
// BB_LABELS: a.BB.world:
// BB_LABELS: aa.BB.world:
// BB_LABELS: a.BB.another:
//
// BB_WORLD: .section .text.world,"ax",@progbits{{$}}
// BB_WORLD: world:
// BB_WORLD: .section .text.world,"ax",@progbits,unique

View File

@ -342,6 +342,8 @@ public:
void emitStackSizeSection(const MachineFunction &MF);
void emitBBAddrMapSection(const MachineFunction &MF);
void emitRemarksSection(remarks::RemarkStreamer &RS);
enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };

View File

@ -510,9 +510,6 @@ public:
void setBBSectionsType(BasicBlockSection V) { BBSectionsType = V; }
/// Creates basic block Labels for this function.
void createBBLabels();
/// Assign IsBeginSection IsEndSection fields for basic blocks in this
/// function.
void assignBeginEndSections();

View File

@ -338,6 +338,8 @@ public:
MCSection *getStackSizesSection(const MCSection &TextSec) const;
MCSection *getBBAddrMapSection(const MCSection &TextSec) const;
// ELF specific sections.
MCSection *getDataRelROSection() const { return DataRelROSection; }
const MCSection *getMergeableConst4Section() const {

View File

@ -1023,6 +1023,46 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
MCConstantExpr::create(FrameOffset, OutContext));
}
/// Returns the BB metadata to be emitted in the bb_addr_map section for a given
/// basic block. This can be used to capture more precise profile information.
/// We use the last 3 bits (LSBs) to ecnode the following information:
/// * (1): set if return block (ret or tail call).
/// * (2): set if ends with a tail call.
/// * (3): set if exception handling (EH) landing pad.
/// The remaining bits are zero.
static unsigned getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
return ((unsigned)MBB.isReturnBlock()) |
((!MBB.empty() && TII->isTailCall(MBB.back())) << 1) |
(MBB.isEHPad() << 2);
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
MCSection *BBAddrMapSection =
getObjFileLowering().getBBAddrMapSection(*MF.getSection());
assert(BBAddrMapSection && ".bb_addr_map section is not initialized.");
const MCSymbol *FunctionSymbol = getFunctionBegin();
OutStreamer->PushSection();
OutStreamer->SwitchSection(BBAddrMapSection);
OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize());
// Emit the total number of basic blocks in this function.
OutStreamer->emitULEB128IntValue(MF.size());
// Emit BB Information for each basic block in the funciton.
for (const MachineBasicBlock &MBB : MF) {
const MCSymbol *MBBSymbol =
MBB.pred_empty() ? FunctionSymbol : MBB.getSymbol();
// Emit the basic block offset.
emitLabelDifferenceAsULEB128(MBBSymbol, FunctionSymbol);
// Emit the basic block size. When BBs have alignments, their size cannot
// always be computed from their offsets.
emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
}
OutStreamer->PopSection();
}
void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
if (!MF.getTarget().Options.EmitStackSizeSection)
return;
@ -1179,34 +1219,26 @@ void AsmPrinter::emitFunctionBody() {
}
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled and we want to emit size directive for the BBs,
// or if this basic blocks marks the end of a section (except the section
// containing the entry basic block as the end symbol for that section is
// CurrentFnEnd).
if ((MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels()) ||
(MBB.isEndSection() && !MBB.sameSection(&MF->front())))
// we have BBLabels enabled or if this basic blocks marks the end of a
// section (except the section containing the entry basic block as the end
// symbol for that section is CurrentFnEnd).
if (MF->hasBBLabels() ||
(MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection() &&
!MBB.sameSection(&MF->front())))
OutStreamer->emitLabel(MBB.getEndSymbol());
// Helper for emitting the size directive associated with a basic block
// symbol.
auto emitELFSizeDirective = [&](MCSymbol *SymForSize) {
const MCExpr *SizeExp = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(MBB.getEndSymbol(), OutContext),
MCSymbolRefExpr::create(SymForSize, OutContext), OutContext);
OutStreamer->emitELFSize(SymForSize, SizeExp);
};
// Emit size directive for the size of each basic block, if BBLabels is
// enabled.
if (MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels())
emitELFSizeDirective(MBB.getSymbol());
// Emit size directive for the size of each basic block section once we
// get to the end of that section.
if (MBB.isEndSection()) {
// The size directive for the section containing the entry block is
// handled separately by the function section.
if (!MBB.sameSection(&MF->front())) {
if (MAI->hasDotTypeDotSizeDirective())
emitELFSizeDirective(CurrentSectionBeginSym);
if (MAI->hasDotTypeDotSizeDirective()) {
// Emit the size directive for the basic block section.
const MCExpr *SizeExp = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(MBB.getEndSymbol(), OutContext),
MCSymbolRefExpr::create(CurrentSectionBeginSym, OutContext),
OutContext);
OutStreamer->emitELFSize(CurrentSectionBeginSym, SizeExp);
}
MBBSectionRanges[MBB.getSectionIDNum()] =
MBBSectionRange{CurrentSectionBeginSym, MBB.getEndSymbol()};
}
@ -1298,6 +1330,11 @@ void AsmPrinter::emitFunctionBody() {
HI.Handler->endFunction(MF);
}
// Emit section containing BB address offsets and their metadata, when
// BB labels are requested for this function.
if (MF->hasBBLabels())
emitBBAddrMapSection(*MF);
// Emit section containing stack size metadata.
emitStackSizeSection(*MF);
@ -1807,7 +1844,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
F.hasFnAttribute("function-instrument") ||
F.hasFnAttribute("xray-instruction-threshold") ||
needFuncLabelsForEHOrDebugInfo(MF) || NeedsLocalForSize ||
MF.getTarget().Options.EmitStackSizeSection) {
MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;

View File

@ -48,19 +48,11 @@
// Basic Block Labels
// ==================
//
// With -fbasic-block-sections=labels, or when a basic block is placed in a
// unique section, it is labelled with a symbol. This allows easy mapping of
// virtual addresses from PMU profiles back to the corresponding basic blocks.
// Since the number of basic blocks is large, the labeling bloats the symbol
// table sizes and the string table sizes significantly. While the binary size
// does increase, it does not affect performance as the symbol table is not
// loaded in memory during run-time. The string table size bloat is kept very
// minimal using a unary naming scheme that uses string suffix compression. The
// basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ... This
// turns out to be very good for string table sizes and the bloat in the string
// table size for a very large binary is ~8 %. The naming also allows using
// the --symbol-ordering-file option in LLD to arbitrarily reorder the
// sections.
// With -fbasic-block-sections=labels, we emit the offsets of BB addresses of
// every function into a .bb_addr_map section. Along with the function symbols,
// this allows for mapping of virtual addresses in PMU profiles back to the
// corresponding basic blocks. This logic is implemented in AsmPrinter. This
// pass only assigns the BBSectionType of every function to ``labels``.
//
//===----------------------------------------------------------------------===//
@ -304,7 +296,6 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
if (BBSectionsType == BasicBlockSection::Labels) {
MF.setBBSectionsType(BBSectionsType);
MF.createBBLabels();
return true;
}
@ -314,7 +305,6 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
FuncBBClusterInfo))
return true;
MF.setBBSectionsType(BBSectionsType);
MF.createBBLabels();
assignSections(MF, FuncBBClusterInfo);
// We make sure that the cluster including the entry basic block precedes all

View File

@ -451,10 +451,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
}
// Check Basic Block Section Flags.
if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) {
MF.createBBLabels();
MF.setBBSectionsType(BasicBlockSection::Labels);
} else if (MF.hasBBSections()) {
MF.createBBLabels();
MF.assignBeginEndSections();
}
PFS.SM = &SM;

View File

@ -60,28 +60,11 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
if (!CachedMCSymbol) {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
// We emit a non-temporary symbol for every basic block if we have BBLabels
// or -- with basic block sections -- when a basic block begins a section.
// With basic block symbols, we use a unary encoding which can
// compress the symbol names significantly. For basic block sections where
// this block is the first in a cluster, we use a non-temp descriptive name.
// Otherwise we fall back to use temp label.
if (MF->hasBBLabels()) {
auto Iter = MF->getBBSectionsSymbolPrefix().begin();
if (getNumber() < 0 ||
getNumber() >= (int)MF->getBBSectionsSymbolPrefix().size())
report_fatal_error("Unreachable MBB: " + Twine(getNumber()));
// The basic blocks for function foo are named a.BB.foo, aa.BB.foo, and
// so on.
std::string Prefix(Iter + 1, Iter + getNumber() + 1);
std::reverse(Prefix.begin(), Prefix.end());
CachedMCSymbol =
Ctx.getOrCreateSymbol(Twine(Prefix) + ".BB." + Twine(MF->getName()));
} else if (MF->hasBBSections() && isBeginSection()) {
// We emit a non-temporary symbol -- with a descriptive name -- if it begins
// a section (with basic block sections). Otherwise we fall back to use temp
// label.
if (MF->hasBBSections() && isBeginSection()) {
SmallString<5> Suffix;
if (SectionID == MBBSectionID::ColdSectionID) {
Suffix += ".cold";
@ -92,6 +75,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
}
CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix);
} else {
const StringRef Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
Twine(MF->getFunctionNumber()) +
"_" + Twine(getNumber()));

View File

@ -341,33 +341,6 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
MBBNumbering.resize(BlockNo);
}
/// This is used with -fbasic-block-sections or -fbasicblock-labels option.
/// A unary encoding of basic block labels is done to keep ".strtab" sizes
/// small.
void MachineFunction::createBBLabels() {
const TargetInstrInfo *TII = getSubtarget().getInstrInfo();
this->BBSectionsSymbolPrefix.resize(getNumBlockIDs(), 'a');
for (auto MBBI = begin(), E = end(); MBBI != E; ++MBBI) {
assert(
(MBBI->getNumber() >= 0 && MBBI->getNumber() < (int)getNumBlockIDs()) &&
"BasicBlock number was out of range!");
// 'a' - Normal block.
// 'r' - Return block.
// 'l' - Landing Pad.
// 'L' - Return and landing pad.
bool isEHPad = MBBI->isEHPad();
bool isRetBlock = MBBI->isReturnBlock() && !TII->isTailCall(MBBI->back());
char type = 'a';
if (isEHPad && isRetBlock)
type = 'L';
else if (isEHPad)
type = 'l';
else if (isRetBlock)
type = 'r';
BBSectionsSymbolPrefix[MBBI->getNumber()] = type;
}
}
/// This method iterates over the basic blocks and assigns their IsBeginSection
/// and IsEndSection fields. This must be called after MBB layout is finalized
/// and the SectionID's are assigned to MBBs.

View File

@ -953,3 +953,21 @@ MCObjectFileInfo::getStackSizesSection(const MCSection &TextSec) const {
GroupName, MCSection::NonUniqueID,
cast<MCSymbolELF>(TextSec.getBeginSymbol()));
}
MCSection *
MCObjectFileInfo::getBBAddrMapSection(const MCSection &TextSec) const {
if (Env != IsELF)
return nullptr;
const MCSectionELF &ElfSec = static_cast<const MCSectionELF &>(TextSec);
unsigned Flags = ELF::SHF_LINK_ORDER;
StringRef GroupName;
if (const MCSymbol *Group = ElfSec.getGroup()) {
GroupName = Group->getName();
Flags |= ELF::SHF_GROUP;
}
return Ctx->getELFSection(".bb_addr_map", ELF::SHT_PROGBITS, Flags, 0,
GroupName, MCSection::NonUniqueID,
cast<MCSymbolELF>(TextSec.getBeginSymbol()));
}

View File

@ -0,0 +1,35 @@
; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=labels | FileCheck %s
$_Z4fooTIiET_v = comdat any
define dso_local i32 @_Z3barv() {
ret i32 0
}
;; Check we add SHF_LINK_ORDER for .bb_addr_map and link it with the corresponding .text sections.
; CHECK: .section .text._Z3barv,"ax",@progbits
; CHECK-LABEL: _Z3barv:
; CHECK-NEXT: [[BAR_BEGIN:.Lfunc_begin[0-9]+]]:
; CHECK: .section .bb_addr_map,"o",@progbits,.text._Z3barv{{$}}
; CHECK-NEXT: .quad [[BAR_BEGIN]]
define dso_local i32 @_Z3foov() {
%1 = call i32 @_Z4fooTIiET_v()
ret i32 %1
}
; CHECK: .section .text._Z3foov,"ax",@progbits
; CHECK-LABEL: _Z3foov:
; CHECK-NEXT: [[FOO_BEGIN:.Lfunc_begin[0-9]+]]:
; CHECK: .section .bb_addr_map,"o",@progbits,.text._Z3foov{{$}}
; CHECK-NEXT: .quad [[FOO_BEGIN]]
define linkonce_odr dso_local i32 @_Z4fooTIiET_v() comdat {
ret i32 0
}
;; Check we add .bb_addr_map section to a COMDAT group with the corresponding .text section if such a COMDAT exists.
; CHECK: .section .text._Z4fooTIiET_v,"axG",@progbits,_Z4fooTIiET_v,comdat
; CHECK-LABEL: _Z4fooTIiET_v:
; CHECK-NEXT: [[FOOCOMDAT_BEGIN:.Lfunc_begin[0-9]+]]:
; CHECK: .section .bb_addr_map,"Go",@progbits,_Z4fooTIiET_v,comdat,.text._Z4fooTIiET_v{{$}}
; CHECK-NEXT: .quad [[FOOCOMDAT_BEGIN]]

View File

@ -1,23 +1,24 @@
; Check the basic block sections labels option
; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=labels | FileCheck %s -check-prefix=LINUX-LABELS
; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=labels | FileCheck %s
define void @_Z3bazb(i1 zeroext) {
%2 = alloca i8, align 1
%3 = zext i1 %0 to i8
store i8 %3, i8* %2, align 1
%4 = load i8, i8* %2, align 1
%5 = trunc i8 %4 to i1
br i1 %5, label %6, label %8
define void @_Z3bazb(i1 zeroext) personality i32 (...)* @__gxx_personality_v0 {
br i1 %0, label %2, label %7
6: ; preds = %1
%7 = call i32 @_Z3barv()
br label %10
2:
%3 = invoke i32 @_Z3barv()
to label %7 unwind label %5
br label %9
8: ; preds = %1
%9 = call i32 @_Z3foov()
br label %10
5:
landingpad { i8*, i32 }
catch i8* null
br label %9
10: ; preds = %8, %6
7:
%8 = call i32 @_Z3foov()
br label %9
9:
ret void
}
@ -25,9 +26,31 @@ declare i32 @_Z3barv() #1
declare i32 @_Z3foov() #1
; LINUX-LABELS: .section
; LINUX-LABELS: _Z3bazb:
; LINUX-LABELS-NOT: .section
; LINUX-LABELS: r.BB._Z3bazb:
; LINUX-LABELS-NOT: .section
; LINUX-LABELS: rr.BB._Z3bazb:
declare i32 @__gxx_personality_v0(...)
; CHECK-LABEL: _Z3bazb:
; CHECK-LABEL: .Lfunc_begin0:
; CHECK-LABEL: .LBB_END0_0:
; CHECK-LABEL: .LBB0_1:
; CHECK-LABEL: .LBB_END0_1:
; CHECK-LABEL: .LBB0_2:
; CHECK-LABEL: .LBB_END0_2:
; CHECK-LABEL: .LBB0_3:
; CHECK-LABEL: .LBB_END0_3:
; CHECK-LABEL: .Lfunc_end0:
; CHECK: .section .bb_addr_map,"o",@progbits,.text
; CHECK-NEXT: .quad .Lfunc_begin0
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .uleb128 .Lfunc_begin0-.Lfunc_begin0
; CHECK-NEXT: .uleb128 .LBB_END0_0-.Lfunc_begin0
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .uleb128 .LBB0_1-.Lfunc_begin0
; CHECK-NEXT: .uleb128 .LBB_END0_1-.LBB0_1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .uleb128 .LBB0_2-.Lfunc_begin0
; CHECK-NEXT: .uleb128 .LBB_END0_2-.LBB0_2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .uleb128 .LBB0_3-.Lfunc_begin0
; CHECK-NEXT: .uleb128 .LBB_END0_3-.LBB0_3
; CHECK-NEXT: .byte 5