forked from OSchip/llvm-project
[MC][AMDGPU][llvm-objdump] Synthesized local labels in disassembly
1. Add an accessor function to MCSymbolizer to retrieve addresses referenced by a symbolizable operand, but not resolved to a symbol. That way, the caller can synthesize labels at those addresses and then retry disassembling the section. 2. Implement that in AMDGPU -- a failed symbol lookup results in the address being added to a vector returned by the new function. 3. Use that in llvm-objdump when using MCSymbolizer (which only happens on AMDGPU) and SymbolizeOperands is on. Differential Revision: https://reviews.llvm.org/D101145 Change-Id: I19087c3bbfece64bad5a56ee88bcc9110d83989e
This commit is contained in:
parent
9e8cde590f
commit
8710eff6c3
|
@ -15,6 +15,7 @@
|
|||
#ifndef LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
|
||||
#define LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
|
@ -75,6 +76,17 @@ public:
|
|||
virtual void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
|
||||
int64_t Value,
|
||||
uint64_t Address) = 0;
|
||||
|
||||
/// Get the MCSymbolizer's list of addresses that were referenced by
|
||||
/// symbolizable operands but not resolved to a symbol. The caller (some
|
||||
/// code that is disassembling a section or other chunk of code) would
|
||||
/// typically create a synthetic label at each address and add them to its
|
||||
/// list of symbols in the section, before creating a new MCSymbolizer with
|
||||
/// the enhanced symbol list and retrying disassembling the section.
|
||||
/// The returned array is unordered and may have duplicates.
|
||||
/// The returned ArrayRef stops being valid on any call to or destruction of
|
||||
/// the MCSymbolizer object.
|
||||
virtual ArrayRef<uint64_t> getReferencedAddresses() const { return {}; }
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -1834,6 +1834,8 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
|
|||
Inst.addOperand(MCOperand::createExpr(Add));
|
||||
return true;
|
||||
}
|
||||
// Add to list of referenced addresses, so caller can synthesize a label.
|
||||
ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -183,6 +183,7 @@ public:
|
|||
class AMDGPUSymbolizer : public MCSymbolizer {
|
||||
private:
|
||||
void *DisInfo;
|
||||
std::vector<uint64_t> ReferencedAddresses;
|
||||
|
||||
public:
|
||||
AMDGPUSymbolizer(MCContext &Ctx, std::unique_ptr<MCRelocationInfo> &&RelInfo,
|
||||
|
@ -197,6 +198,10 @@ public:
|
|||
void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
|
||||
int64_t Value,
|
||||
uint64_t Address) override;
|
||||
|
||||
ArrayRef<uint64_t> getReferencedAddresses() const override {
|
||||
return ReferencedAddresses;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
# RUN: yaml2obj %s -o %t
|
||||
# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
|
||||
# RUN: FileCheck %s
|
||||
|
||||
## Expect to find the branch labels.
|
||||
# CHECK: <break_cond_is_arg>:
|
||||
# CHECK: s_branch L1
|
||||
# CHECK: <L0>:
|
||||
# CHECK: s_cbranch_execz L2
|
||||
# CHECK: <L1>:
|
||||
# CHECK: s_branch L0
|
||||
# CHECKL <L2>:
|
||||
|
||||
# I created this YAML starting with this LLVM IR:
|
||||
#
|
||||
# define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
|
||||
# entry:
|
||||
# br label %loop
|
||||
# loop:
|
||||
# %tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ]
|
||||
# %tmp23 = add nuw i32 %tmp23phi, 1
|
||||
# %tmp27 = icmp ult i32 %arg, %tmp23
|
||||
# br i1 %tmp27, label %then, label %endif
|
||||
# then: ; preds = %bb
|
||||
# call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
|
||||
# br label %endif
|
||||
# endif: ; preds = %bb28, %bb
|
||||
# br i1 %breakcond, label %loop, label %loopexit
|
||||
# loopexit:
|
||||
# ret void
|
||||
# }
|
||||
#
|
||||
# declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
|
||||
#
|
||||
# attributes #0 = { nounwind writeonly }
|
||||
#
|
||||
# I compiled it to a relocatable ELF:
|
||||
#
|
||||
# llc -march=amdgcn -mcpu=gfx1030 llvm/a.ll -filetype=obj -o a.elf
|
||||
#
|
||||
# then converted it to YAML:
|
||||
#
|
||||
# obj2yaml a.elf
|
||||
#
|
||||
# then manually removed the BB0_1 etc local symbols.
|
||||
|
||||
--- !ELF
|
||||
FileHeader:
|
||||
Class: ELFCLASS64
|
||||
Data: ELFDATA2LSB
|
||||
Type: ET_REL
|
||||
Machine: EM_AMDGPU
|
||||
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1030 ]
|
||||
Sections:
|
||||
- Name: .text
|
||||
Type: SHT_PROGBITS
|
||||
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
|
||||
AddressAlign: 0x4
|
||||
Content: 00008CBF0000FDBB81020236810385BE800384BE8102847D6AC10689040082BF7E077E88058105817E047E8A080088BF0500887D7E060787070404886A3C87BEF7FF88BF000070E000000104F4FF82BF7E047E880000FDBB1E2080BE
|
||||
- Name: .AMDGPU.config
|
||||
Type: SHT_PROGBITS
|
||||
AddressAlign: 0x1
|
||||
Content: 48B80000000000004CB800000000000060B800000000000004000000000000000800000000000000
|
||||
- Name: .note.GNU-stack
|
||||
Type: SHT_PROGBITS
|
||||
AddressAlign: 0x1
|
||||
- Name: .note
|
||||
Type: SHT_NOTE
|
||||
AddressAlign: 0x4
|
||||
Notes:
|
||||
- Name: AMD
|
||||
Desc: 616D6467636E2D756E6B6E6F776E2D6C696E75782D676E752D67667831303330
|
||||
Type: NT_FREEBSD_PROCSTAT_GROUPS
|
||||
- Type: SectionHeaderTable
|
||||
Sections:
|
||||
- Name: .strtab
|
||||
- Name: .shstrtab
|
||||
- Name: .text
|
||||
- Name: .AMDGPU.config
|
||||
- Name: .note.GNU-stack
|
||||
- Name: .note
|
||||
- Name: .symtab
|
||||
Symbols:
|
||||
- Name: break_cond_is_arg
|
||||
Type: STT_FUNC
|
||||
Section: .text
|
||||
Binding: STB_GLOBAL
|
||||
Size: 0x5C
|
||||
...
|
|
@ -972,6 +972,62 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
|
|||
}
|
||||
}
|
||||
|
||||
// Create an MCSymbolizer for the target and add it to the MCDisassembler.
|
||||
// This is currently only used on AMDGPU, and assumes the format of the
|
||||
// void * argument passed to AMDGPU's createMCSymbolizer.
|
||||
static void addSymbolizer(MCContext &Ctx, const Target *Target,
|
||||
StringRef TripleName, MCDisassembler *DisAsm,
|
||||
uint64_t SectionAddr, ArrayRef<uint8_t> Bytes,
|
||||
SectionSymbolsTy &Symbols,
|
||||
std::vector<std::string *> &SynthesizedLabelNames) {
|
||||
|
||||
std::unique_ptr<MCRelocationInfo> RelInfo(
|
||||
Target->createMCRelocationInfo(TripleName, Ctx));
|
||||
if (!RelInfo)
|
||||
return;
|
||||
std::unique_ptr<MCSymbolizer> Symbolizer(Target->createMCSymbolizer(
|
||||
TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
|
||||
MCSymbolizer *SymbolizerPtr = &*Symbolizer;
|
||||
DisAsm->setSymbolizer(std::move(Symbolizer));
|
||||
|
||||
if (!SymbolizeOperands)
|
||||
return;
|
||||
|
||||
// Synthesize labels referenced by branch instructions by
|
||||
// disassembling, discarding the output, and collecting the referenced
|
||||
// addresses from the symbolizer.
|
||||
for (size_t Index = 0; Index != Bytes.size();) {
|
||||
MCInst Inst;
|
||||
uint64_t Size;
|
||||
DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), SectionAddr + Index,
|
||||
nulls());
|
||||
if (Size == 0)
|
||||
Size = 1;
|
||||
Index += Size;
|
||||
}
|
||||
ArrayRef<uint64_t> LabelAddrsRef = SymbolizerPtr->getReferencedAddresses();
|
||||
// Copy and sort to remove duplicates.
|
||||
std::vector<uint64_t> LabelAddrs;
|
||||
LabelAddrs.insert(LabelAddrs.end(), LabelAddrsRef.begin(),
|
||||
LabelAddrsRef.end());
|
||||
llvm::sort(LabelAddrs);
|
||||
LabelAddrs.resize(std::unique(LabelAddrs.begin(), LabelAddrs.end()) -
|
||||
LabelAddrs.begin());
|
||||
// Add the labels.
|
||||
for (unsigned LabelNum = 0; LabelNum != LabelAddrs.size(); ++LabelNum) {
|
||||
SynthesizedLabelNames.push_back(
|
||||
new std::string((Twine("L") + Twine(LabelNum)).str()));
|
||||
Symbols.push_back(SymbolInfoTy(
|
||||
LabelAddrs[LabelNum], *SynthesizedLabelNames.back(), ELF::STT_NOTYPE));
|
||||
}
|
||||
llvm::stable_sort(Symbols);
|
||||
// Recreate the symbolizer with the new symbols list.
|
||||
RelInfo.reset(Target->createMCRelocationInfo(TripleName, Ctx));
|
||||
Symbolizer.reset(Target->createMCSymbolizer(
|
||||
TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
|
||||
DisAsm->setSymbolizer(std::move(Symbolizer));
|
||||
}
|
||||
|
||||
static StringRef getSegmentName(const MachOObjectFile *MachO,
|
||||
const SectionRef &Section) {
|
||||
if (MachO) {
|
||||
|
@ -1134,16 +1190,14 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
|
|||
|
||||
llvm::sort(MappingSymbols);
|
||||
|
||||
ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
|
||||
unwrapOrError(Section.getContents(), Obj->getFileName()));
|
||||
|
||||
std::vector<std::string *> SynthesizedLabelNames;
|
||||
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
|
||||
// AMDGPU disassembler uses symbolizer for printing labels
|
||||
std::unique_ptr<MCRelocationInfo> RelInfo(
|
||||
TheTarget->createMCRelocationInfo(TripleName, Ctx));
|
||||
if (RelInfo) {
|
||||
std::unique_ptr<MCSymbolizer> Symbolizer(
|
||||
TheTarget->createMCSymbolizer(
|
||||
TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
|
||||
DisAsm->setSymbolizer(std::move(Symbolizer));
|
||||
}
|
||||
addSymbolizer(Ctx, TheTarget, TripleName, DisAsm, SectionAddr, Bytes,
|
||||
Symbols, SynthesizedLabelNames);
|
||||
}
|
||||
|
||||
StringRef SegmentName = getSegmentName(MachO, Section);
|
||||
|
@ -1159,9 +1213,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
|
|||
SmallString<40> Comments;
|
||||
raw_svector_ostream CommentStream(Comments);
|
||||
|
||||
ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
|
||||
unwrapOrError(Section.getContents(), Obj->getFileName()));
|
||||
|
||||
uint64_t VMAAdjustment = 0;
|
||||
if (shouldAdjustVA(Section))
|
||||
VMAAdjustment = AdjustVMA;
|
||||
|
|
Loading…
Reference in New Issue