[llvm-objdump] Split disassembleObject and simplify --{start,stop}-address handling

The main disassembly loop is hard to read due to special handling of ARM
ELF data & ELF data. Split off the logic into two functions
dumpARMELFData and dumpELFData. Hoist some checks outside of the loop.

--start-address --stop-address have redundant checks and minor off-by-1
issues. Fix them.

llvm-svn: 357869
This commit is contained in:
Fangrui Song 2019-04-07 16:33:24 +00:00
parent 32a8e742e2
commit 32087b65e7
1 changed files with 116 additions and 109 deletions

View File

@ -910,6 +910,76 @@ static bool shouldAdjustVA(const SectionRef &Section) {
return false;
}
static uint64_t
dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
const ObjectFile *Obj, ArrayRef<uint8_t> Bytes,
const std::vector<uint64_t> &TextMappingSymsAddr) {
support::endianness Endian =
Obj->isLittleEndian() ? support::little : support::big;
while (Index < End) {
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
outs() << "\t";
if (Index + 4 <= End) {
dumpBytes(Bytes.slice(Index, 4), outs());
outs() << "\t.word\t"
<< format_hex(
support::endian::read32(Bytes.data() + Index, Endian), 10);
Index += 4;
} else if (Index + 2 <= End) {
dumpBytes(Bytes.slice(Index, 2), outs());
outs() << "\t\t.short\t"
<< format_hex(
support::endian::read16(Bytes.data() + Index, Endian), 6);
Index += 2;
} else {
dumpBytes(Bytes.slice(Index, 1), outs());
outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4);
++Index;
}
outs() << "\n";
if (std::binary_search(TextMappingSymsAddr.begin(),
TextMappingSymsAddr.end(), Index))
break;
}
return Index;
}
static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
ArrayRef<uint8_t> Bytes) {
// print out data up to 8 bytes at a time in hex and ascii
uint8_t AsciiData[9] = {'\0'};
uint8_t Byte;
int NumBytes = 0;
for (; Index < End; ++Index) {
if (NumBytes == 0) {
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
outs() << "\t";
}
Byte = Bytes.slice(Index)[0];
outs() << format(" %02x", Byte);
AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
uint8_t IndentOffset = 0;
NumBytes++;
if (Index == End - 1 || NumBytes > 8) {
// Indent the space for less than 8 bytes data.
// 2 spaces for byte and one for space between bytes
IndentOffset = 3 * (8 - NumBytes);
for (int Excess = NumBytes; Excess < 8; Excess++)
AsciiData[Excess] = '\0';
NumBytes = 8;
}
if (NumBytes == 8) {
AsciiData[8] = '\0';
outs() << std::string(IndentOffset, ' ') << " ";
outs() << reinterpret_cast<char *>(AsciiData);
outs() << '\n';
NumBytes = 0;
}
}
}
static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
MCContext &Ctx, MCDisassembler *DisAsm,
const MCInstrAnalysis *MIA, MCInstPrinter *IP,
@ -1081,10 +1151,13 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
// Check if we need to skip symbol
// Skip if the symbol's data is not between StartAddress and StopAddress
if (End + SectionAddr < StartAddress ||
Start + SectionAddr > StopAddress) {
if (End + SectionAddr <= StartAddress ||
Start + SectionAddr >= StopAddress)
continue;
}
// Stop disassembly at the stop address specified
if (End + SectionAddr > StopAddress)
End = StopAddress - SectionAddr;
/// Skip if user requested specific symbols and this is not in the list
if (!DisasmFuncsSet.empty() &&
@ -1099,10 +1172,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
outs() << SectionName << ':';
}
// Stop disassembly at the stop address specified
if (End + SectionAddr > StopAddress)
End = StopAddress - SectionAddr;
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
if (std::get<2>(Symbols[SI]) == ELF::STT_AMDGPU_HSA_KERNEL) {
// skip amd_kernel_code_t at the begining of kernel symbol (256 bytes)
@ -1150,102 +1219,38 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
SectionAddr + Start, DebugOut, CommentStream);
Start += Size;
for (Index = Start; Index < End; Index += Size) {
MCInst Inst;
Index = Start;
if (SectionAddr < StartAddress)
Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);
if (Index + SectionAddr < StartAddress ||
Index + SectionAddr > StopAddress) {
// skip byte by byte till StartAddress is reached
Size = 1;
continue;
}
// AArch64 ELF binaries can interleave data and text in the
// same section. We rely on the markers introduced to
// understand what we need to dump. If the data marker is within a
// function, it is denoted as a word/short etc
if (isArmElf(Obj) && std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
!DisassembleAll &&
// If there is a data symbol inside an ELF text section and we are
// only disassembling text (applicable all architectures), we are in a
// situation where we must print the data and not disassemble it.
if (Obj->isELF() && std::get<2>(Symbols[SI]) == ELF::STT_OBJECT &&
!DisassembleAll && Section.isText()) {
dumpELFData(SectionAddr, Index, End, Bytes);
Index = End;
}
bool CheckARMELFData = isArmElf(Obj) &&
std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
!DisassembleAll;
MCInst Inst;
while (Index < End) {
// AArch64 ELF binaries can interleave data and text in the same
// section. We rely on the markers introduced to understand what we
// need to dump. If the data marker is within a function, it is
// denoted as a word/short etc.
if (CheckARMELFData &&
std::binary_search(DataMappingSymsAddr.begin(),
DataMappingSymsAddr.end(), Index)) {
// Switch to data.
support::endianness Endian =
Obj->isLittleEndian() ? support::little : support::big;
while (Index < End) {
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
outs() << "\t";
if (Index + 4 <= End) {
dumpBytes(Bytes.slice(Index, 4), outs());
outs() << "\t.word\t"
<< format_hex(support::endian::read32(Bytes.data() + Index,
Endian),
10);
Index += 4;
} else if (Index + 2 <= End) {
dumpBytes(Bytes.slice(Index, 2), outs());
outs() << "\t\t.short\t"
<< format_hex(support::endian::read16(Bytes.data() + Index,
Endian),
6);
Index += 2;
} else {
dumpBytes(Bytes.slice(Index, 1), outs());
outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4);
++Index;
}
outs() << "\n";
if (std::binary_search(TextMappingSymsAddr.begin(),
TextMappingSymsAddr.end(), Index))
break;
}
Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
TextMappingSymsAddr);
continue;
}
// If there is a data symbol inside an ELF text section and we are only
// disassembling text (applicable all architectures),
// we are in a situation where we must print the data and not
// disassemble it.
if (Obj->isELF() && std::get<2>(Symbols[SI]) == ELF::STT_OBJECT &&
!DisassembleAll && Section.isText()) {
// print out data up to 8 bytes at a time in hex and ascii
uint8_t AsciiData[9] = {'\0'};
uint8_t Byte;
int NumBytes = 0;
for (Index = Start; Index < End; Index += 1) {
if (((SectionAddr + Index) < StartAddress) ||
((SectionAddr + Index) > StopAddress))
continue;
if (NumBytes == 0) {
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
outs() << "\t";
}
Byte = Bytes.slice(Index)[0];
outs() << format(" %02x", Byte);
AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
uint8_t IndentOffset = 0;
NumBytes++;
if (Index == End - 1 || NumBytes > 8) {
// Indent the space for less than 8 bytes data.
// 2 spaces for byte and one for space between bytes
IndentOffset = 3 * (8 - NumBytes);
for (int Excess = NumBytes; Excess < 8; Excess++)
AsciiData[Excess] = '\0';
NumBytes = 8;
}
if (NumBytes == 8) {
AsciiData[8] = '\0';
outs() << std::string(IndentOffset, ' ') << " ";
outs() << reinterpret_cast<char *>(AsciiData);
outs() << '\n';
NumBytes = 0;
}
}
}
if (Index >= End)
break;
// When -z or --disassemble-zeroes are given we always dissasemble them.
// Otherwise we might want to skip zero bytes we see.
// When -z or --disassemble-zeroes are given we always dissasemble
// them. Otherwise we might want to skip zero bytes we see.
if (!DisassembleZeroes) {
uint64_t MaxOffset = End - Index;
// For -reloc: print zero blocks patched by relocations, so that
@ -1257,23 +1262,23 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
outs() << "\t\t..." << '\n';
Index += N;
if (Index >= End)
break;
continue;
}
}
// Disassemble a real instruction or a data when disassemble all is
// provided
bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
SectionAddr + Index, DebugOut,
CommentStream);
Inst.clear();
bool Disassembled = DisAsm->getInstruction(
Inst, Size, Bytes.slice(Index), SectionAddr + Index, DebugOut,
CommentStream);
if (Size == 0)
Size = 1;
PIP.printInst(*IP, Disassembled ? &Inst : nullptr,
Bytes.slice(Index, Size),
{SectionAddr + Index + VMAAdjustment, Section.getIndex()},
outs(), "", *STI, &SP, &Rels);
PIP.printInst(
*IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size),
{SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(),
"", *STI, &SP, &Rels);
outs() << CommentStream.str();
Comments.clear();
@ -1343,7 +1348,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
while (RelCur != RelEnd) {
uint64_t Offset = RelCur->getOffset();
// If this relocation is hidden, skip it.
if (getHidden(*RelCur) || ((SectionAddr + Offset) < StartAddress)) {
if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
++RelCur;
continue;
}
@ -1357,7 +1362,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
Expected<section_iterator> SymSI =
RelCur->getSymbol()->getSection();
if (SymSI && *SymSI != Obj->section_end() &&
(shouldAdjustVA(**SymSI)))
shouldAdjustVA(**SymSI))
Offset += AdjustVMA;
}
@ -1366,6 +1371,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
++RelCur;
}
}
Index += Size;
}
}
}