[MC][AMDGPU][llvm-objdump] Synthesized local labels in disassembly

1. Add an accessor function to MCSymbolizer to retrieve addresses
   referenced by a symbolizable operand, but not resolved to a symbol.
   That way, the caller can synthesize labels at those addresses and
   then retry disassembling the section.

2. Implement that in AMDGPU -- a failed symbol lookup results in the
   address being added to a vector returned by the new function.

3. Use that in llvm-objdump when using MCSymbolizer (which only happens
   on AMDGPU) and SymbolizeOperands is on.

Differential Revision: https://reviews.llvm.org/D101145

Change-Id: I19087c3bbfece64bad5a56ee88bcc9110d83989e
This commit is contained in:
Tim Renouf 2021-04-23 08:59:24 +01:00
parent 9e8cde590f
commit 8710eff6c3
5 changed files with 170 additions and 11 deletions

View File

@ -15,6 +15,7 @@
#ifndef LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
#define LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include <algorithm>
#include <cstdint>
@ -75,6 +76,17 @@ public:
virtual void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
int64_t Value,
uint64_t Address) = 0;
/// Get the MCSymbolizer's list of addresses that were referenced by
/// symbolizable operands but not resolved to a symbol. The caller (some
/// code that is disassembling a section or other chunk of code) would
/// typically create a synthetic label at each address and add them to its
/// list of symbols in the section, before creating a new MCSymbolizer with
/// the enhanced symbol list and retrying disassembling the section.
/// The returned array is unordered and may have duplicates.
/// The returned ArrayRef stops being valid on any call to or destruction of
/// the MCSymbolizer object.
virtual ArrayRef<uint64_t> getReferencedAddresses() const { return {}; }
};
} // end namespace llvm

View File

@ -1834,6 +1834,8 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
Inst.addOperand(MCOperand::createExpr(Add));
return true;
}
// Add to list of referenced addresses, so caller can synthesize a label.
ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
return false;
}

View File

@ -183,6 +183,7 @@ public:
class AMDGPUSymbolizer : public MCSymbolizer {
private:
void *DisInfo;
std::vector<uint64_t> ReferencedAddresses;
public:
AMDGPUSymbolizer(MCContext &Ctx, std::unique_ptr<MCRelocationInfo> &&RelInfo,
@ -197,6 +198,10 @@ public:
void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
int64_t Value,
uint64_t Address) override;
ArrayRef<uint64_t> getReferencedAddresses() const override {
return ReferencedAddresses;
}
};
} // end namespace llvm

View File

@ -0,0 +1,89 @@
# RUN: yaml2obj %s -o %t
# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck %s
## Expect to find the branch labels.
# CHECK: <break_cond_is_arg>:
# CHECK: s_branch L1
# CHECK: <L0>:
# CHECK: s_cbranch_execz L2
# CHECK: <L1>:
# CHECK: s_branch L0
# CHECKL <L2>:
# I created this YAML starting with this LLVM IR:
#
# define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
# entry:
# br label %loop
# loop:
# %tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ]
# %tmp23 = add nuw i32 %tmp23phi, 1
# %tmp27 = icmp ult i32 %arg, %tmp23
# br i1 %tmp27, label %then, label %endif
# then: ; preds = %bb
# call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
# br label %endif
# endif: ; preds = %bb28, %bb
# br i1 %breakcond, label %loop, label %loopexit
# loopexit:
# ret void
# }
#
# declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
#
# attributes #0 = { nounwind writeonly }
#
# I compiled it to a relocatable ELF:
#
# llc -march=amdgcn -mcpu=gfx1030 llvm/a.ll -filetype=obj -o a.elf
#
# then converted it to YAML:
#
# obj2yaml a.elf
#
# then manually removed the BB0_1 etc local symbols.
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1030 ]
Sections:
- Name: .text
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
AddressAlign: 0x4
Content: 00008CBF0000FDBB81020236810385BE800384BE8102847D6AC10689040082BF7E077E88058105817E047E8A080088BF0500887D7E060787070404886A3C87BEF7FF88BF000070E000000104F4FF82BF7E047E880000FDBB1E2080BE
- Name: .AMDGPU.config
Type: SHT_PROGBITS
AddressAlign: 0x1
Content: 48B80000000000004CB800000000000060B800000000000004000000000000000800000000000000
- Name: .note.GNU-stack
Type: SHT_PROGBITS
AddressAlign: 0x1
- Name: .note
Type: SHT_NOTE
AddressAlign: 0x4
Notes:
- Name: AMD
Desc: 616D6467636E2D756E6B6E6F776E2D6C696E75782D676E752D67667831303330
Type: NT_FREEBSD_PROCSTAT_GROUPS
- Type: SectionHeaderTable
Sections:
- Name: .strtab
- Name: .shstrtab
- Name: .text
- Name: .AMDGPU.config
- Name: .note.GNU-stack
- Name: .note
- Name: .symtab
Symbols:
- Name: break_cond_is_arg
Type: STT_FUNC
Section: .text
Binding: STB_GLOBAL
Size: 0x5C
...

View File

@ -972,6 +972,62 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
}
}
// Create an MCSymbolizer for the target and add it to the MCDisassembler.
// This is currently only used on AMDGPU, and assumes the format of the
// void * argument passed to AMDGPU's createMCSymbolizer.
static void addSymbolizer(MCContext &Ctx, const Target *Target,
StringRef TripleName, MCDisassembler *DisAsm,
uint64_t SectionAddr, ArrayRef<uint8_t> Bytes,
SectionSymbolsTy &Symbols,
std::vector<std::string *> &SynthesizedLabelNames) {
std::unique_ptr<MCRelocationInfo> RelInfo(
Target->createMCRelocationInfo(TripleName, Ctx));
if (!RelInfo)
return;
std::unique_ptr<MCSymbolizer> Symbolizer(Target->createMCSymbolizer(
TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
MCSymbolizer *SymbolizerPtr = &*Symbolizer;
DisAsm->setSymbolizer(std::move(Symbolizer));
if (!SymbolizeOperands)
return;
// Synthesize labels referenced by branch instructions by
// disassembling, discarding the output, and collecting the referenced
// addresses from the symbolizer.
for (size_t Index = 0; Index != Bytes.size();) {
MCInst Inst;
uint64_t Size;
DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), SectionAddr + Index,
nulls());
if (Size == 0)
Size = 1;
Index += Size;
}
ArrayRef<uint64_t> LabelAddrsRef = SymbolizerPtr->getReferencedAddresses();
// Copy and sort to remove duplicates.
std::vector<uint64_t> LabelAddrs;
LabelAddrs.insert(LabelAddrs.end(), LabelAddrsRef.begin(),
LabelAddrsRef.end());
llvm::sort(LabelAddrs);
LabelAddrs.resize(std::unique(LabelAddrs.begin(), LabelAddrs.end()) -
LabelAddrs.begin());
// Add the labels.
for (unsigned LabelNum = 0; LabelNum != LabelAddrs.size(); ++LabelNum) {
SynthesizedLabelNames.push_back(
new std::string((Twine("L") + Twine(LabelNum)).str()));
Symbols.push_back(SymbolInfoTy(
LabelAddrs[LabelNum], *SynthesizedLabelNames.back(), ELF::STT_NOTYPE));
}
llvm::stable_sort(Symbols);
// Recreate the symbolizer with the new symbols list.
RelInfo.reset(Target->createMCRelocationInfo(TripleName, Ctx));
Symbolizer.reset(Target->createMCSymbolizer(
TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
DisAsm->setSymbolizer(std::move(Symbolizer));
}
static StringRef getSegmentName(const MachOObjectFile *MachO,
const SectionRef &Section) {
if (MachO) {
@ -1134,16 +1190,14 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
llvm::sort(MappingSymbols);
ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
unwrapOrError(Section.getContents(), Obj->getFileName()));
std::vector<std::string *> SynthesizedLabelNames;
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
// AMDGPU disassembler uses symbolizer for printing labels
std::unique_ptr<MCRelocationInfo> RelInfo(
TheTarget->createMCRelocationInfo(TripleName, Ctx));
if (RelInfo) {
std::unique_ptr<MCSymbolizer> Symbolizer(
TheTarget->createMCSymbolizer(
TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
DisAsm->setSymbolizer(std::move(Symbolizer));
}
addSymbolizer(Ctx, TheTarget, TripleName, DisAsm, SectionAddr, Bytes,
Symbols, SynthesizedLabelNames);
}
StringRef SegmentName = getSegmentName(MachO, Section);
@ -1159,9 +1213,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
SmallString<40> Comments;
raw_svector_ostream CommentStream(Comments);
ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
unwrapOrError(Section.getContents(), Obj->getFileName()));
uint64_t VMAAdjustment = 0;
if (shouldAdjustVA(Section))
VMAAdjustment = AdjustVMA;