[llvm-objdump] Let --symbolize-operands symbolize basic block addresses based on the SHT_LLVM_BB_ADDR_MAP section.

`--symbolize-operands` already symbolizes branch targets based on the disassembly. When the object file is created with `-fbasic-block-sections=labels` (ELF-only) it will include a SHT_LLVM_BB_ADDR_MAP section which maps basic blocks to their addresses. In such case `llvm-objdump` can annotate the disassembly based on labels inferred on this section.

In contrast to the current labels, SHT_LLVM_BB_ADDR_MAP-based labels are created for every machine basic block including empty blocks and those which are not branched into (fallthrough blocks).

The old logic is still executed even when the SHT_LLVM_BB_ADDR_MAP section is present to handle functions which have not been received an entry in this section.

Reviewed By: jhenderson, MaskRay

Differential Revision: https://reviews.llvm.org/D124560
This commit is contained in:
Rahman Lavaee 2022-05-13 10:32:13 -07:00
parent 9c7c8be4a3
commit 5f7ef65245
7 changed files with 412 additions and 14 deletions

View File

@ -226,6 +226,8 @@ OPTIONS
When printing a PC-relative global symbol reference, print it as an offset from the leading symbol.
When a bb-address-map section is present (i.e., the object file is built with ``-fbasic-block-sections=labels``), labels are retrieved from that section instead.
Only works with PowerPC objects or X86 linked images.
Example:

View File

@ -102,6 +102,12 @@ public:
/// Returns a vector containing a symbol version for each dynamic symbol.
/// Returns an empty vector if version sections do not exist.
Expected<std::vector<VersionEntry>> readDynsymVersions() const;
/// Returns a vector of all BB address maps in the object file. When
// `TextSectionIndex` is specified, only returns the BB address maps
// corresponding to the section with that index.
Expected<std::vector<BBAddrMap>>
readBBAddrMap(Optional<unsigned> TextSectionIndex = None) const;
};
class ELFSectionRef : public SectionRef {

View File

@ -812,8 +812,20 @@ struct BBAddrMap {
: Offset(Offset), Size(Size), HasReturn(Metadata & 1),
HasTailCall(Metadata & (1 << 1)), IsEHPad(Metadata & (1 << 2)),
CanFallThrough(Metadata & (1 << 3)){};
bool operator==(const BBEntry &Other) const {
return Offset == Other.Offset && Size == Other.Size &&
HasReturn == Other.HasReturn && HasTailCall == Other.HasTailCall &&
IsEHPad == Other.IsEHPad && CanFallThrough == Other.CanFallThrough;
}
};
std::vector<BBEntry> BBEntries; // Basic block entries for this function.
// Equality operator for unit testing.
bool operator==(const BBAddrMap &Other) const {
return Addr == Other.Addr && std::equal(BBEntries.begin(), BBEntries.end(),
Other.BBEntries.begin());
}
};
} // end namespace object.

View File

@ -670,6 +670,35 @@ ELFObjectFileBase::getPltAddresses() const {
return Result;
}
template <class ELFT>
Expected<std::vector<BBAddrMap>>
readBBAddrMapImpl(const ELFFile<ELFT> &EF,
Optional<unsigned> TextSectionIndex) {
using Elf_Shdr = typename ELFT::Shdr;
std::vector<BBAddrMap> BBAddrMaps;
const auto &Sections = cantFail(EF.sections());
for (const Elf_Shdr &Sec : Sections) {
if (Sec.sh_type != ELF::SHT_LLVM_BB_ADDR_MAP)
continue;
if (TextSectionIndex) {
Expected<const Elf_Shdr *> TextSecOrErr = EF.getSection(Sec.sh_link);
if (!TextSecOrErr)
return createError("unable to get the linked-to section for " +
describe(EF, Sec) + ": " +
toString(TextSecOrErr.takeError()));
if (*TextSectionIndex != std::distance(Sections.begin(), *TextSecOrErr))
continue;
}
Expected<std::vector<BBAddrMap>> BBAddrMapOrErr = EF.decodeBBAddrMap(Sec);
if (!BBAddrMapOrErr)
return createError("unable to read " + describe(EF, Sec) + ": " +
toString(BBAddrMapOrErr.takeError()));
std::move(BBAddrMapOrErr->begin(), BBAddrMapOrErr->end(),
std::back_inserter(BBAddrMaps));
}
return BBAddrMaps;
}
template <class ELFT>
static Expected<std::vector<VersionEntry>>
readDynsymVersionsImpl(const ELFFile<ELFT> &EF,
@ -738,3 +767,17 @@ ELFObjectFileBase::readDynsymVersions() const {
return readDynsymVersionsImpl(cast<ELF64BEObjectFile>(this)->getELFFile(),
Symbols);
}
Expected<std::vector<BBAddrMap>>
ELFObjectFileBase::readBBAddrMap(Optional<unsigned> TextSectionIndex) const {
if (const auto *Obj = dyn_cast<ELF32LEObjectFile>(this))
return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
if (const auto *Obj = dyn_cast<ELF64LEObjectFile>(this))
return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
if (const auto *Obj = dyn_cast<ELF32BEObjectFile>(this))
return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
if (const auto *Obj = cast<ELF64BEObjectFile>(this))
return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
else
llvm_unreachable("Unsupported binary format");
}

View File

@ -0,0 +1,172 @@
## Test that in the presence of SHT_LLVM_BB_ADDR_MAP sections,
## --symbolize-operands can display <BB*> labels.
# RUN: yaml2obj --docnum=1 %s -o %t1
# RUN: llvm-objdump %t1 -d --symbolize-operands -M intel --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck %s --match-full-lines --check-prefix=INTEL
# RUN: llvm-objdump %t1 -d --symbolize-operands -M att --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck %s --match-full-lines --check-prefix=ATT
# RUN: yaml2obj --docnum=2 %s -o %t2
# RUN: llvm-objdump %t2 -d --symbolize-operands -M intel --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck %s --match-full-lines --check-prefixes=INTEL,INTEL-MULTISECTION
# RUN: llvm-objdump %t2 -d --symbolize-operands -M att --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck %s --match-full-lines --check-prefixes=ATT,ATT-MULTISECTION
## Expect to find the branch and basic block labels and global variable name.
# ATT: <foo>:
# ATT-NEXT: <BB0>:
# ATT-NEXT: pushq %rax
# ATT-NEXT: <BB1>:
# ATT-NEXT: cmpl , %eax <symbol>
# ATT-NEXT: nop
# ATT-NEXT: <BB2>:
# ATT-NEXT: jge <BB3>
# ATT-NEXT: jmp <BB1>
# ATT-NEXT: <BB3>:
# ATT-NEXT: retq
# ATT-MULTISECTION: <bar>:
# ATT-MULTISECTION-NEXT: <BB0>:
# ATT-MULTISECTION-NEXT: pushq %rax
# ATT-MULTISECTION-NEXT: movl %edx, %eax
# ATT-MULTISECTION-NEXT: je <BB2>
# ATT-MULTISECTION-NEXT: <BB1>:
# ATT-MULTISECTION-NEXT: xorl %esi, %esi
# ATT-MULTISECTION-NEXT: <BB2>:
# ATT-MULTISECTION-NEXT: callq <bar>
# ATT-MULTISECTION-NEXT: retq
# INTEL: <foo>:
# INTEL-NEXT: <BB0>:
# INTEL-NEXT: push rax
# INTEL-NEXT: <BB1>:
# INTEL-NEXT: cmp eax, dword ptr <symbol>
# INTEL-NEXT: nop
# INTEL-NEXT: <BB2>:
# INTEL-NEXT: jge <BB3>
# INTEL-NEXT: jmp <BB1>
# INTEL-NEXT: <BB3>:
# INTEL-NEXT: ret
# INTEL-MULTISECTION: <bar>:
# INTEL-MULTISECTION-NEXT: <BB0>:
# INTEL-MULTISECTION-NEXT: push rax
# INTEL-MULTISECTION-NEXT: mov eax, edx
# INTEL-MULTISECTION-NEXT: je <BB2>
# INTEL-MULTISECTION-NEXT: <BB1>:
# INTEL-MULTISECTION-NEXT: xor esi, esi
# INTEL-MULTISECTION-NEXT: <BB2>:
# INTEL-MULTISECTION-NEXT: call <bar>
# INTEL-MULTISECTION-NEXT: ret
## This object file contains a text section, a SHT_LLVM_BB_ADDR_MAP section
## linked to it, and a data section.
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_EXEC
Machine: EM_X86_64
Sections:
- Name: .text
Type: SHT_PROGBITS
Address: 0x4000
Flags: [SHF_ALLOC, SHF_EXECINSTR]
Content: '503b0505100000907d02ebf5c3'
- Name: .data
Type: SHT_PROGBITS
Flags: [SHF_ALLOC, SHF_WRITE]
Address: 0x5000
- Name: bb_addr_map_1
Type: SHT_LLVM_BB_ADDR_MAP
Link: .text
Entries:
- Address: 0x4000
BBEntries:
- AddressOffset: 0x0
Size: 0x1
Metadata: 0x1
- AddressOffset: 0x1
Size: 0x6
Metadata: 0x0
- AddressOffset: 0x8
Size: 0x3
Metadata: 0x0
- AddressOffset: 0xc
Size: 0x1
Metadata: 0x2
Symbols:
- Name: foo
Section: .text
Value: 0x4000
- Name: symbol
Section: .data
Value: 0x500c
## This object file contains a separate text section and SHT_LLVM_BB_ADDR_MAP
## section for each of the two functions foo and bar. foo's section contents
## are identical to the ones above.
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_EXEC
Machine: EM_X86_64
Sections:
- Name: .text.foo
Type: SHT_PROGBITS
Address: 0x4000
Flags: [SHF_ALLOC, SHF_EXECINSTR]
Content: '503b0505200000907d02ebf5c3'
- Name: .text.bar
Type: SHT_PROGBITS
Address: 0x5000
Flags: [SHF_ALLOC, SHF_EXECINSTR]
Content: '5089d0740231f6e8f4ffffffc3'
- Name: .data
Type: SHT_PROGBITS
Flags: [SHF_ALLOC, SHF_WRITE]
Address: 0x6000
- Name: bb_addr_map.foo
Type: SHT_LLVM_BB_ADDR_MAP
Link: .text.foo
Entries:
- Address: 0x4000
BBEntries:
- AddressOffset: 0x0
Size: 0x1
Metadata: 0x1
- AddressOffset: 0x1
Size: 0x6
Metadata: 0x0
- AddressOffset: 0x8
Size: 0x3
Metadata: 0x0
- AddressOffset: 0xc
Size: 0x1
Metadata: 0x2
- Name: bb_addr_map.bar
Type: SHT_LLVM_BB_ADDR_MAP
Link: .text.bar
Entries:
- Address: 0x5000
BBEntries:
- AddressOffset: 0x0
Size: 0x1
Metadata: 0x1
- AddressOffset: 0x5
Size: 0x2
Metadata: 0x0
- AddressOffset: 0x7
Size: 0x6
Metadata: 0x0
Symbols:
- Name: foo
Section: .text.foo
Value: 0x4000
- Name: bar
Section: .text.bar
Value: 0x5000
- Name: symbol
Section: .data
Value: 0x600c

View File

@ -53,6 +53,7 @@
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/FaultMapParser.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
@ -982,11 +983,29 @@ static SymbolInfoTy createDummySymbolInfo(const ObjectFile *Obj,
}
static void
collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
MCDisassembler *DisAsm, MCInstPrinter *IP,
const MCSubtargetInfo *STI, uint64_t SectionAddr,
uint64_t Start, uint64_t End,
std::unordered_map<uint64_t, std::string> &Labels) {
collectBBAddrMapLabels(const std::unordered_map<uint64_t, BBAddrMap> &AddrToBBAddrMap,
uint64_t SectionAddr, uint64_t Start, uint64_t End,
std::unordered_map<uint64_t, std::vector<std::string>> &Labels) {
if (AddrToBBAddrMap.empty())
return;
Labels.clear();
uint64_t StartAddress = SectionAddr + Start;
uint64_t EndAddress = SectionAddr + End;
auto Iter = AddrToBBAddrMap.find(StartAddress);
if (Iter == AddrToBBAddrMap.end())
return;
for (unsigned I = 0, Size = Iter->second.BBEntries.size(); I < Size; ++I) {
uint64_t BBAddress = Iter->second.BBEntries[I].Offset + Iter->second.Addr;
if (BBAddress >= EndAddress)
continue;
Labels[BBAddress].push_back(("BB" + Twine(I)).str());
}
}
static void collectLocalBranchTargets(
ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA, MCDisassembler *DisAsm,
MCInstPrinter *IP, const MCSubtargetInfo *STI, uint64_t SectionAddr,
uint64_t Start, uint64_t End, std::unordered_map<uint64_t, std::string> &Labels) {
// So far only supports PowerPC and X86.
if (!STI->getTargetTriple().isPPC() && !STI->getTargetTriple().isX86())
return;
@ -1015,7 +1034,6 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
!(STI->getTargetTriple().isPPC() && Target == Index))
Labels[Target] = ("L" + Twine(LabelCount++)).str();
}
Index += Size;
}
}
@ -1250,6 +1268,20 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
if (!SectSize)
continue;
std::unordered_map<uint64_t, BBAddrMap> AddrToBBAddrMap;
if (SymbolizeOperands) {
if (auto *Elf = dyn_cast<ELFObjectFileBase>(Obj)) {
// Read the BB-address-map corresponding to this section, if present.
auto SectionBBAddrMapsOrErr = Elf->readBBAddrMap(Section.getIndex());
if (!SectionBBAddrMapsOrErr)
reportWarning(toString(SectionBBAddrMapsOrErr.takeError()),
Obj->getFileName());
for (auto &FunctionBBAddrMap : *SectionBBAddrMapsOrErr)
AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr,
std::move(FunctionBBAddrMap));
}
}
// Get the list of all the symbols in this section.
SectionSymbolsTy &Symbols = AllSymbols[Section];
std::vector<MappingSymbolPair> MappingSymbols;
@ -1413,9 +1445,13 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
formatted_raw_ostream FOS(outs());
std::unordered_map<uint64_t, std::string> AllLabels;
if (SymbolizeOperands)
std::unordered_map<uint64_t, std::vector<std::string>> BBAddrMapLabels;
if (SymbolizeOperands) {
collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI,
SectionAddr, Index, End, AllLabels);
collectBBAddrMapLabels(AddrToBBAddrMap, SectionAddr, Index, End,
BBAddrMapLabels);
}
while (Index < End) {
// ARM and AArch64 ELF binaries can interleave data and text in the
@ -1459,9 +1495,15 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
}
// Print local label if there's any.
auto Iter = AllLabels.find(SectionAddr + Index);
if (Iter != AllLabels.end())
FOS << "<" << Iter->second << ">:\n";
auto Iter1 = BBAddrMapLabels.find(SectionAddr + Index);
if (Iter1 != BBAddrMapLabels.end()) {
for (StringRef Label : Iter1->second)
FOS << "<" << Label << ">:\n";
} else {
auto Iter2 = AllLabels.find(SectionAddr + Index);
if (Iter2 != AllLabels.end())
FOS << "<" << Iter2->second << ">:\n";
}
// Disassemble a real instruction or a data when disassemble all is
// provided
@ -1556,6 +1598,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
}
// Print the labels corresponding to the target if there's any.
bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target);
bool LabelAvailable = AllLabels.count(Target);
if (TargetSym != nullptr) {
uint64_t TargetAddress = TargetSym->Addr;
@ -1569,14 +1612,18 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
// Always Print the binary symbol precisely corresponding to
// the target address.
*TargetOS << TargetName;
} else if (!LabelAvailable) {
} else if (BBAddrMapLabelAvailable) {
*TargetOS << BBAddrMapLabels[Target].front();
} else if (LabelAvailable) {
*TargetOS << AllLabels[Target];
} else {
// Always Print the binary symbol plus an offset if there's no
// local label corresponding to the target address.
*TargetOS << TargetName << "+0x" << Twine::utohexstr(Disp);
} else {
*TargetOS << AllLabels[Target];
}
*TargetOS << ">";
} else if (BBAddrMapLabelAvailable) {
*TargetOS << " <" << BBAddrMapLabels[Target].front() << ">";
} else if (LabelAvailable) {
*TargetOS << " <" << AllLabels[Target] << ">";
}

View File

@ -497,7 +497,7 @@ Sections:
}
// Tests for error paths of the ELFFile::decodeBBAddrMap API.
TEST(ELFObjectFileTest, InvalidBBAddrMap) {
TEST(ELFObjectFileTest, InvalidDecodeBBAddrMap) {
StringRef CommonYamlString(R"(
--- !ELF
FileHeader:
@ -604,6 +604,122 @@ Sections:
"ULEB128 value at offset 0x8 exceeds UINT32_MAX (0x100000000)");
}
// Test for the ELFObjectFile::readBBAddrMap API.
TEST(ELFObjectFileTest, ReadBBAddrMap) {
StringRef CommonYamlString(R"(
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_EXEC
Sections:
- Name: .llvm_bb_addr_map_1
Type: SHT_LLVM_BB_ADDR_MAP
Link: 1
Entries:
- Address: 0x11111
BBEntries:
- AddressOffset: 0x0
Size: 0x1
Metadata: 0x2
- Name: .llvm_bb_addr_map_2
Type: SHT_LLVM_BB_ADDR_MAP
Link: 1
Entries:
- Address: 0x22222
BBEntries:
- AddressOffset: 0x0
Size: 0x2
Metadata: 0x4
- Name: .llvm_bb_addr_map
Type: SHT_LLVM_BB_ADDR_MAP
# Link: 0 (by default)
Entries:
- Address: 0x33333
BBEntries:
- AddressOffset: 0x0
Size: 0x3
Metadata: 0x6
)");
BBAddrMap E1 = {0x11111, {{0x0, 0x1, 0x2}}};
BBAddrMap E2 = {0x22222, {{0x0, 0x2, 0x4}}};
BBAddrMap E3 = {0x33333, {{0x0, 0x3, 0x6}}};
std::vector<BBAddrMap> Section0BBAddrMaps = {E3};
std::vector<BBAddrMap> Section1BBAddrMaps = {E1, E2};
std::vector<BBAddrMap> AllBBAddrMaps = {E1, E2, E3};
auto DoCheckSucceeds = [&](StringRef YamlString,
Optional<unsigned> TextSectionIndex,
std::vector<BBAddrMap> ExpectedResult) {
SmallString<0> Storage;
Expected<ELFObjectFile<ELF64LE>> ElfOrErr =
toBinary<ELF64LE>(Storage, YamlString);
ASSERT_THAT_EXPECTED(ElfOrErr, Succeeded());
Expected<const typename ELF64LE::Shdr *> BBAddrMapSecOrErr =
ElfOrErr->getELFFile().getSection(1);
ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded());
auto BBAddrMaps = ElfOrErr->readBBAddrMap(TextSectionIndex);
EXPECT_THAT_EXPECTED(BBAddrMaps, Succeeded());
EXPECT_EQ(*BBAddrMaps, ExpectedResult);
};
auto DoCheckFails = [&](StringRef YamlString,
Optional<unsigned> TextSectionIndex,
const char *ErrMsg) {
SmallString<0> Storage;
Expected<ELFObjectFile<ELF64LE>> ElfOrErr =
toBinary<ELF64LE>(Storage, YamlString);
ASSERT_THAT_EXPECTED(ElfOrErr, Succeeded());
Expected<const typename ELF64LE::Shdr *> BBAddrMapSecOrErr =
ElfOrErr->getELFFile().getSection(1);
ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded());
EXPECT_THAT_ERROR(ElfOrErr->readBBAddrMap(TextSectionIndex).takeError(),
FailedWithMessage(ErrMsg));
};
// Check that we can retrieve the data in the normal case.
DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/None, AllBBAddrMaps);
DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, Section0BBAddrMaps);
DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, Section1BBAddrMaps);
// Check that when no bb-address-map section is found for a text section,
// we return an empty result.
DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, {});
// Check that we detect when a bb-addr-map section is linked to an invalid
// (not present) section.
SmallString<128> InvalidLinkedYamlString(CommonYamlString);
InvalidLinkedYamlString += R"(
Link: 10
)";
DoCheckFails(InvalidLinkedYamlString, /*TextSectionIndex=*/1,
"unable to get the linked-to section for SHT_LLVM_BB_ADDR_MAP "
"section with index 3: invalid section index: 10");
// Linked sections are not checked when we don't target a specific text
// section.
DoCheckSucceeds(InvalidLinkedYamlString, /*TextSectionIndex=*/None,
AllBBAddrMaps);
// Check that we can detect when bb-address-map decoding fails.
SmallString<128> TruncatedYamlString(CommonYamlString);
TruncatedYamlString += R"(
ShSize: 0x8
)";
DoCheckFails(TruncatedYamlString, /*TextSectionIndex=*/None,
"unable to read SHT_LLVM_BB_ADDR_MAP section with index 3: "
"unable to decode LEB128 at offset 0x00000008: malformed "
"uleb128, extends past end");
// Check that we can read the other section's bb-address-maps which are
// valid.
DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/1,
Section1BBAddrMaps);
}
// Test for ObjectFile::getRelocatedSection: check that it returns a relocated
// section for executable and relocatable files.
TEST(ELFObjectFileTest, ExecutableWithRelocs) {