Object/llvm-objdump: allow dumping of mach-o exports trie

MachOObjectFile in lib/Object currently has no support for parsing the rebase, 
binding, and export information from the LC_DYLD_INFO load command in final 
linked mach-o images. This patch adds support for parsing the exports trie data
structure. It also adds an option to llvm-objdump to dump that export info.

I did the exports parsing first because it is the hardest. The information is 
encoded in a trie structure, but the standard ObjectFile way to inspect content 
is through iterators. So I needed to make an iterator that would do a 
non-recursive walk through the trie and maintain the concatenation of edges 
needed for the current string prefix.

I plan to add similar support in MachOObjectFile and llvm-objdump to 
parse/display the rebasing and binding info too.

llvm-svn: 216808
This commit is contained in:
Nick Kledzik 2014-08-30 00:20:14 +00:00
parent f7765ac9b9
commit d04bc35852
8 changed files with 397 additions and 7 deletions

View File

@ -49,6 +49,57 @@ public:
};
typedef content_iterator<DiceRef> dice_iterator;
/// ExportEntry encapsulates the current-state-of-the-walk used when doing a
/// non-recursive walk of the trie data structure. This allows you to iterate
/// across all exported symbols using:
/// for (const llvm::object::ExportEntry &AnExport : Obj->exports()) {
/// }
class ExportEntry {
public:
ExportEntry(ArrayRef<uint8_t> Trie);
StringRef name() const;
uint64_t flags() const;
uint64_t address() const;
uint64_t other() const;
StringRef otherName() const;
uint32_t nodeOffset() const;
bool operator==(const ExportEntry &) const;
void moveNext();
private:
friend class MachOObjectFile;
void moveToFirst();
void moveToEnd();
uint64_t readULEB128(const uint8_t *&p);
void pushDownUntilBottom();
void pushNode(uint64_t Offset);
// Represents a node in the mach-o exports trie.
struct NodeState {
NodeState(const uint8_t *Ptr);
const uint8_t *Start;
const uint8_t *Current;
uint64_t Flags;
uint64_t Address;
uint64_t Other;
const char *ImportName;
unsigned ChildCount;
unsigned NextChildIndex;
unsigned ParentStringLength;
bool IsExportNode;
};
ArrayRef<uint8_t> Trie;
SmallString<256> CumulativeString;
SmallVector<NodeState, 16> Stack;
bool Malformed;
bool Done;
};
typedef content_iterator<ExportEntry> export_iterator;
class MachOObjectFile : public ObjectFile {
public:
struct LoadCommandInfo {
@ -119,7 +170,7 @@ public:
bool &Result) const override;
// MachO specific.
std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &Res);
std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &) const;
// TODO: Would be useful to have an iterator based version
// of the load command interface too.
@ -144,6 +195,12 @@ public:
dice_iterator begin_dices() const;
dice_iterator end_dices() const;
/// For use iterating over all exported symbols.
iterator_range<export_iterator> exports() const;
/// For use examining a trie not in a MachOObjectFile.
static iterator_range<export_iterator> exports(ArrayRef<uint8_t> Trie);
// In a MachO file, sections have a segment name. This is used in the .o
// files. They have a single segment, but this field specifies which segment
@ -207,6 +264,11 @@ public:
MachO::symtab_command getSymtabLoadCommand() const;
MachO::dysymtab_command getDysymtabLoadCommand() const;
MachO::linkedit_data_command getDataInCodeLoadCommand() const;
ArrayRef<uint8_t> getDyldInfoRebaseOpcodes() const;
ArrayRef<uint8_t> getDyldInfoBindOpcodes() const;
ArrayRef<uint8_t> getDyldInfoWeakBindOpcodes() const;
ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const;
ArrayRef<uint8_t> getDyldInfoExportsTrie() const;
StringRef getStringTableData() const;
bool is64Bit() const;
@ -237,10 +299,11 @@ private:
typedef SmallVector<const char*, 1> LibraryList;
LibraryList Libraries;
typedef SmallVector<StringRef, 1> LibraryShortName;
LibraryShortName LibrariesShortNames;
mutable LibraryShortName LibrariesShortNames;
const char *SymtabLoadCmd;
const char *DysymtabLoadCmd;
const char *DataInCodeLoadCmd;
const char *DyldInfoLoadCmd;
};
/// DiceRef

View File

@ -322,13 +322,13 @@ namespace llvm {
};
enum {
EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u,
EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION = 0x04u,
EXPORT_SYMBOL_FLAGS_REEXPORT = 0x08u,
EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER = 0x10u
};
enum ExportSymbolKind {
EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u,
EXPORT_SYMBOL_FLAGS_KIND_REGULAR = 0x00u,
EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL = 0x01u,
EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u

View File

@ -19,6 +19,8 @@
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MachO.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
@ -226,7 +228,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
bool Is64bits, std::error_code &EC)
: ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr),
DataInCodeLoadCmd(nullptr) {
DataInCodeLoadCmd(nullptr), DyldInfoLoadCmd(nullptr) {
uint32_t LoadCommandCount = this->getHeader().ncmds;
MachO::LoadCommandType SegmentLoadType = is64Bit() ?
MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT;
@ -242,6 +244,10 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
} else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) {
assert(!DataInCodeLoadCmd && "Multiple data in code tables");
DataInCodeLoadCmd = Load.Ptr;
} else if (Load.C.cmd == MachO::LC_DYLD_INFO ||
Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) {
assert(!DyldInfoLoadCmd && "Multiple dyldinfo load commands");
DyldInfoLoadCmd = Load.Ptr;
} else if (Load.C.cmd == SegmentLoadType) {
uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
for (unsigned J = 0; J < NumSections; ++J) {
@ -1159,7 +1165,7 @@ guess_qtx:
// It is passed the index (0 - based) of the library as translated from
// GET_LIBRARY_ORDINAL (1 - based).
std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
StringRef &Res) {
StringRef &Res) const {
if (Index >= Libraries.size())
return object_error::parse_failed;
@ -1505,6 +1511,183 @@ dice_iterator MachOObjectFile::end_dices() const {
return dice_iterator(DiceRef(DRI, this));
}
ExportEntry::ExportEntry(ArrayRef<uint8_t> T)
: Trie(T), Malformed(false), Done(false) { }
void ExportEntry::moveToFirst() {
pushNode(0);
pushDownUntilBottom();
}
void ExportEntry::moveToEnd() {
Stack.clear();
Done = true;
}
bool ExportEntry::operator==(const ExportEntry &Other) const {
// Common case, one at end, other iterating from begin.
if (Done || Other.Done)
return (Done == Other.Done);
// Not equal if different stack sizes.
if (Stack.size() != Other.Stack.size())
return false;
// Not equal if different cumulative strings.
if (!CumulativeString.str().equals(CumulativeString.str()))
return false;
// Equal if all nodes in both stacks match.
for (unsigned i=0; i < Stack.size(); ++i) {
if (Stack[i].Start != Other.Stack[i].Start)
return false;
}
return true;
}
uint64_t ExportEntry::readULEB128(const uint8_t *&p) {
unsigned count;
uint64_t result = decodeULEB128(p, &count);
p += count;
if (p > Trie.end()) {
p = Trie.end();
Malformed = true;
}
return result;
}
StringRef ExportEntry::name() const {
return CumulativeString.str();
}
uint64_t ExportEntry::flags() const {
return Stack.back().Flags;
}
uint64_t ExportEntry::address() const {
return Stack.back().Address;
}
uint64_t ExportEntry::other() const {
return Stack.back().Other;
}
StringRef ExportEntry::otherName() const {
const char* ImportName = Stack.back().ImportName;
if (ImportName)
return StringRef(ImportName);
return StringRef();
}
uint32_t ExportEntry::nodeOffset() const {
return Stack.back().Start - Trie.begin();
}
ExportEntry::NodeState::NodeState(const uint8_t *Ptr)
: Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0),
ImportName(nullptr), ChildCount(0), NextChildIndex(0),
ParentStringLength(0), IsExportNode(false) {
}
void ExportEntry::pushNode(uint64_t offset) {
const uint8_t *Ptr = Trie.begin() + offset;
NodeState State(Ptr);
uint64_t ExportInfoSize = readULEB128(State.Current);
State.IsExportNode = (ExportInfoSize != 0);
const uint8_t* Children = State.Current + ExportInfoSize;
if (State.IsExportNode) {
State.Flags = readULEB128(State.Current);
if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
State.Address = 0;
State.Other = readULEB128(State.Current); // dylib ordinal
State.ImportName = reinterpret_cast<const char*>(State.Current);
} else {
State.Address = readULEB128(State.Current);
if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER)
State.Other = readULEB128(State.Current);
}
}
State.ChildCount = *Children;
State.Current = Children + 1;
State.NextChildIndex = 0;
State.ParentStringLength = CumulativeString.size();
Stack.push_back(State);
}
void ExportEntry::pushDownUntilBottom() {
while (Stack.back().NextChildIndex < Stack.back().ChildCount) {
NodeState &Top = Stack.back();
CumulativeString.resize(Top.ParentStringLength);
for (;*Top.Current != 0; Top.Current++) {
char c = *Top.Current;
CumulativeString.push_back(c);
}
Top.Current += 1;
uint64_t childNodeIndex = readULEB128(Top.Current);
Top.NextChildIndex += 1;
pushNode(childNodeIndex);
}
if (!Stack.back().IsExportNode) {
Malformed = true;
moveToEnd();
}
}
// We have a trie data structure and need a way to walk it that is compatible
// with the C++ iterator model. The solution is a non-recursive depth first
// traversal where the iterator contains a stack of parent nodes along with a
// string that is the accumulation of all edge strings along the parent chain
// to this point.
//
// There is one “export” node for each exported symbol. But because some
// symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export
// node may have child nodes too.
//
// The algorithm for moveNext() is to keep moving down the leftmost unvisited
// child until hitting a node with no children (which is an export node or
// else the trie is malformed). On the way down, each node is pushed on the
// stack ivar. If there is no more ways down, it pops up one and tries to go
// down a sibling path until a childless node is reached.
void ExportEntry::moveNext() {
if (Stack.empty() || !Stack.back().IsExportNode) {
Malformed = true;
moveToEnd();
return;
}
Stack.pop_back();
while (!Stack.empty()) {
NodeState &Top = Stack.back();
if (Top.NextChildIndex < Top.ChildCount) {
pushDownUntilBottom();
// Now at the next export node.
return;
} else {
if (Top.IsExportNode) {
// This node has no children but is itself an export node.
CumulativeString.resize(Top.ParentStringLength);
return;
}
Stack.pop_back();
}
}
Done = true;
}
iterator_range<export_iterator>
MachOObjectFile::exports(ArrayRef<uint8_t> Trie) {
ExportEntry Start(Trie);
Start.moveToFirst();
ExportEntry Finish(Trie);
Finish.moveToEnd();
return iterator_range<export_iterator>(export_iterator(Start),
export_iterator(Finish));
}
iterator_range<export_iterator> MachOObjectFile::exports() const {
return exports(getDyldInfoExportsTrie());
}
StringRef
MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
@ -1748,6 +1931,62 @@ MachOObjectFile::getDataInCodeLoadCommand() const {
return Cmd;
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const {
if (!DyldInfoLoadCmd)
return ArrayRef<uint8_t>();
MachO::dyld_info_command DyldInfo
= getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
getPtr(this, DyldInfo.rebase_off));
return ArrayRef<uint8_t>(Ptr, DyldInfo.rebase_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const {
if (!DyldInfoLoadCmd)
return ArrayRef<uint8_t>();
MachO::dyld_info_command DyldInfo
= getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
getPtr(this, DyldInfo.bind_off));
return ArrayRef<uint8_t>(Ptr, DyldInfo.bind_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const {
if (!DyldInfoLoadCmd)
return ArrayRef<uint8_t>();
MachO::dyld_info_command DyldInfo
= getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
getPtr(this, DyldInfo.weak_bind_off));
return ArrayRef<uint8_t>(Ptr, DyldInfo.weak_bind_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
if (!DyldInfoLoadCmd)
return ArrayRef<uint8_t>();
MachO::dyld_info_command DyldInfo
= getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
getPtr(this, DyldInfo.lazy_bind_off));
return ArrayRef<uint8_t>(Ptr, DyldInfo.lazy_bind_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
if (!DyldInfoLoadCmd)
return ArrayRef<uint8_t>();
MachO::dyld_info_command DyldInfo
= getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
getPtr(this, DyldInfo.export_off));
return ArrayRef<uint8_t>(Ptr, DyldInfo.export_size);
}
StringRef MachOObjectFile::getStringTableData() const {
MachO::symtab_command S = getSymtabLoadCommand();
return getData().substr(S.stroff, S.strsize);

View File

@ -0,0 +1,11 @@
# RUN: llvm-objdump -macho -exports-trie -arch x86_64 \
# RUN: %p/Inputs/exports-trie.macho-x86_64 2>/dev/null | FileCheck %s
# CHECK:[re-export] _malloc (from libSystem)
# CHECK:[re-export] _myfree (_free from libSystem)
# CHECK:0x00000F70 _myWeakweak_def]
# CHECK:0x00001018 _myTLVper-thread]
# CHECK:0x12345678 _myAbsabsolute]
# CHECK:0x00000F60 _foo

View File

@ -1783,3 +1783,63 @@ void llvm::printMachOFileHeader(const object::ObjectFile *Obj) {
getAndPrintMachHeader(file, ncmds, filetype, cputype, true);
PrintLoadCommands(file, ncmds, filetype, cputype, true);
}
//===----------------------------------------------------------------------===//
// export trie dumping
//===----------------------------------------------------------------------===//
void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) {
for (const llvm::object::ExportEntry &entry : Obj->exports()) {
uint64_t Flags = entry.flags();
bool ReExport = (Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT);
bool WeakDef = (Flags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
bool ThreadLocal = ((Flags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) ==
MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL);
bool Abs = ((Flags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) ==
MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE);
bool Resolver = (Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER);
if (ReExport)
outs() << "[re-export] ";
else
outs()
<< format("0x%08llX ", entry.address()); // FIXME:add in base address
outs() << entry.name();
if (WeakDef || ThreadLocal || Resolver || Abs) {
bool needComma = false;
printf(" [");
if (WeakDef) {
outs() << "weak_def";
needComma = true;
}
if (ThreadLocal) {
if (needComma)
outs() << ", ";
outs() << "per-thread";
needComma = true;
}
if (Abs) {
if (needComma)
outs() << ", ";
outs() << "absolute";
needComma = true;
}
if (Resolver) {
if (needComma)
outs() << ", ";
outs() << format("resolver=0x%08llX", entry.other());
needComma = true;
}
outs() << "]";
}
if (ReExport) {
StringRef DylibName = "unknown";
int ordinal = entry.other() - 1;
Obj->getLibraryShortNameByIndex(ordinal, DylibName);
if (entry.otherName().empty())
outs() << " (from " << DylibName << ")";
else
outs() << " (" << entry.otherName() << " from " << DylibName << ")";
}
outs() << "\n";
}
}

View File

@ -84,6 +84,9 @@ SectionContents("s", cl::desc("Display the content of each section"));
static cl::opt<bool>
SymbolTable("t", cl::desc("Display the symbol table"));
static cl::opt<bool>
ExportsTrie("exports-trie", cl::desc("Display mach-o exported symbols"));
static cl::opt<bool>
MachOOpt("macho", cl::desc("Use MachO specific object file parser"));
static cl::alias
@ -829,6 +832,17 @@ static void PrintUnwindInfo(const ObjectFile *o) {
}
}
static void printExportsTrie(const ObjectFile *o) {
outs() << "Exports trie:\n";
if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
printMachOExportsTrie(MachO);
else {
errs() << "This operation is only currently supported "
"for Mach-O executable files.\n";
return;
}
}
static void printPrivateFileHeader(const ObjectFile *o) {
if (o->isELF()) {
printELFFileHeader(o);
@ -858,6 +872,8 @@ static void DumpObject(const ObjectFile *o) {
PrintUnwindInfo(o);
if (PrivateHeaders)
printPrivateFileHeader(o);
if (ExportsTrie)
printExportsTrie(o);
}
/// @brief Dump each object file in \a a;
@ -939,7 +955,8 @@ int main(int argc, char **argv) {
&& !SectionContents
&& !SymbolTable
&& !UnwindInfo
&& !PrivateHeaders) {
&& !PrivateHeaders
&& !ExportsTrie) {
cl::PrintHelpMessage();
return 2;
}

View File

@ -35,7 +35,7 @@ void DumpBytes(StringRef bytes);
void DisassembleInputMachO(StringRef Filename);
void printCOFFUnwindInfo(const object::COFFObjectFile* o);
void printMachOUnwindInfo(const object::MachOObjectFile* o);
void printMachOExportsTrie(const object::MachOObjectFile* o);
void printELFFileHeader(const object::ObjectFile *o);
void printCOFFFileHeader(const object::ObjectFile *o);
void printMachOFileHeader(const object::ObjectFile *o);