forked from OSchip/llvm-project
Update llvm-objdump’s Mach-O symbolizer code for Objective-C references.
This prints disassembly comments for Objective-C references to CFStrings, Selectors, Classes and method calls. llvm-svn: 220500
This commit is contained in:
parent
52b249b9f4
commit
6f326ce75b
|
@ -2345,11 +2345,47 @@ MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset,
|
|||
}
|
||||
|
||||
MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const {
|
||||
return getStruct<MachO::symtab_command>(this, SymtabLoadCmd);
|
||||
if (SymtabLoadCmd)
|
||||
return getStruct<MachO::symtab_command>(this, SymtabLoadCmd);
|
||||
|
||||
// If there is no SymtabLoadCmd return a load command with zero'ed fields.
|
||||
MachO::symtab_command Cmd;
|
||||
Cmd.cmd = MachO::LC_SYMTAB;
|
||||
Cmd.cmdsize = sizeof(MachO::symtab_command);
|
||||
Cmd.symoff = 0;
|
||||
Cmd.nsyms = 0;
|
||||
Cmd.stroff = 0;
|
||||
Cmd.strsize = 0;
|
||||
return Cmd;
|
||||
}
|
||||
|
||||
MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const {
|
||||
return getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd);
|
||||
if (DysymtabLoadCmd)
|
||||
return getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd);
|
||||
|
||||
// If there is no DysymtabLoadCmd return a load command with zero'ed fields.
|
||||
MachO::dysymtab_command Cmd;
|
||||
Cmd.cmd = MachO::LC_DYSYMTAB;
|
||||
Cmd.cmdsize = sizeof(MachO::dysymtab_command);
|
||||
Cmd.ilocalsym = 0;
|
||||
Cmd.nlocalsym = 0;
|
||||
Cmd.iextdefsym = 0;
|
||||
Cmd.nextdefsym = 0;
|
||||
Cmd.iundefsym = 0;
|
||||
Cmd.nundefsym = 0;
|
||||
Cmd.tocoff = 0;
|
||||
Cmd.ntoc = 0;
|
||||
Cmd.modtaboff = 0;
|
||||
Cmd.nmodtab = 0;
|
||||
Cmd.extrefsymoff = 0;
|
||||
Cmd.nextrefsyms = 0;
|
||||
Cmd.indirectsymoff = 0;
|
||||
Cmd.nindirectsyms = 0;
|
||||
Cmd.extreloff = 0;
|
||||
Cmd.nextrel = 0;
|
||||
Cmd.locreloff = 0;
|
||||
Cmd.nlocrel = 0;
|
||||
return Cmd;
|
||||
}
|
||||
|
||||
MachO::linkedit_data_command
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -1,8 +1,25 @@
|
|||
// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s -check-prefix=OBJ
|
||||
// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.exe.macho-x86_64 | FileCheck %s -check-prefix=EXE
|
||||
// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/ObjC.obj.macho-x86_64 | FileCheck %s -check-prefix=ObjC-OBJ
|
||||
// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/ObjC.exe.macho-x86_64 | FileCheck %s -check-prefix=ObjC-EXE
|
||||
|
||||
OBJ: 0000000000000008 leaq L_.str(%rip), %rax ## literal pool for: "Hello world\n"
|
||||
OBJ: 0000000000000026 callq _printf
|
||||
|
||||
EXE: 0000000100000f38 leaq 0x4f(%rip), %rax ## literal pool for: "Hello world\n"
|
||||
EXE: 0000000100000f56 callq 0x100000f6c ## symbol stub for: _printf
|
||||
|
||||
ObjC-OBJ: 0000000000000008 leaq 0xb1(%rip), %rax ## Objc cfstring ref: @"The current date and time is: %@"
|
||||
ObjC-OBJ: 0000000000000016 movq 0x4b(%rip), %rcx ## Objc class ref: NSObject
|
||||
ObjC-OBJ: 000000000000001d movq 0x64(%rip), %rsi ## Objc selector ref: new
|
||||
ObjC-OBJ: 0000000000000034 movq 0x35(%rip), %rax ## Objc class ref: NSDate
|
||||
ObjC-OBJ: 000000000000003b movq 0x4e(%rip), %rsi ## Objc selector ref: date
|
||||
|
||||
ObjC-EXE: 0000000100000ee8 leaq 0x159(%rip), %rax ## Objc cfstring ref: @"The current date and time is: %@"
|
||||
ObjC-EXE: 0000000100000ef6 movq 0x13b(%rip), %rcx ## Objc class ref: _OBJC_CLASS_$_NSObject
|
||||
ObjC-EXE: 0000000100000efd movq 0x124(%rip), %rsi ## Objc selector ref: new
|
||||
ObjC-EXE: 0000000100000f0b callq 0x100000f4a ## Objc message: +[NSObject new]
|
||||
ObjC-EXE: 0000000100000f14 movq 0x125(%rip), %rax ## Objc class ref: _OBJC_CLASS_$_NSDate
|
||||
ObjC-EXE: 0000000100000f1b movq 0x10e(%rip), %rsi ## Objc selector ref: date
|
||||
ObjC-EXE: 0000000100000f25 callq 0x100000f4a ## Objc message: +[NSDate date]
|
||||
ObjC-EXE: 0000000100000f33 callq 0x100000f44 ## symbol stub for: _NSLog
|
||||
|
|
|
@ -235,6 +235,9 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
|
|||
}
|
||||
|
||||
typedef DenseMap<uint64_t, StringRef> SymbolAddressMap;
|
||||
typedef std::pair<uint64_t, const char *> BindInfoEntry;
|
||||
typedef std::vector<BindInfoEntry> BindTable;
|
||||
typedef BindTable::iterator bind_table_iterator;
|
||||
|
||||
// The block of info used by the Symbolizer call backs.
|
||||
struct DisassembleInfo {
|
||||
|
@ -242,6 +245,11 @@ struct DisassembleInfo {
|
|||
MachOObjectFile *O;
|
||||
SectionRef S;
|
||||
SymbolAddressMap *AddrMap;
|
||||
std::vector<SectionRef> *Sections;
|
||||
const char *class_name;
|
||||
const char *selector_name;
|
||||
char *method;
|
||||
BindTable *BindTable;
|
||||
};
|
||||
|
||||
// SymbolizerGetOpInfo() is the operand information call back function.
|
||||
|
@ -342,7 +350,7 @@ int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
|
|||
// TODO:
|
||||
// Second search the external relocation entries of a fully linked image
|
||||
// (if any) for an entry that matches this segment offset.
|
||||
//uint64_t seg_offset = (Pc + Offset);
|
||||
// uint64_t seg_offset = (Pc + Offset);
|
||||
return 0;
|
||||
} else if (Arch == Triple::arm) {
|
||||
return 0;
|
||||
|
@ -445,7 +453,7 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
|
|||
uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride;
|
||||
if (index < Dysymtab.nindirectsyms) {
|
||||
uint32_t indirect_symbol =
|
||||
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
|
||||
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
|
||||
if (indirect_symbol < Symtab.nsyms) {
|
||||
symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
|
||||
SymbolRef Symbol = *Sym;
|
||||
|
@ -479,7 +487,7 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
|
|||
uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride;
|
||||
if (index < Dysymtab.nindirectsyms) {
|
||||
uint32_t indirect_symbol =
|
||||
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
|
||||
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
|
||||
if (indirect_symbol < Symtab.nsyms) {
|
||||
symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
|
||||
SymbolRef Symbol = *Sym;
|
||||
|
@ -500,6 +508,401 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// method_reference() is called passing it the ReferenceName that might be
|
||||
// a reference it to an Objective-C method call. If so then it allocates and
|
||||
// assembles a method call string with the values last seen and saved in
|
||||
// the DisassembleInfo's class_name and selector_name fields. This is saved
|
||||
// into the method field of the info and any previous string is free'ed.
|
||||
// Then the class_name field in the info is set to nullptr. The method call
|
||||
// string is set into ReferenceName and ReferenceType is set to
|
||||
// LLVMDisassembler_ReferenceType_Out_Objc_Message. If this not a method call
|
||||
// then both ReferenceType and ReferenceName are left unchanged.
|
||||
static void method_reference(struct DisassembleInfo *info,
|
||||
uint64_t *ReferenceType,
|
||||
const char **ReferenceName) {
|
||||
if (*ReferenceName != nullptr) {
|
||||
if (strcmp(*ReferenceName, "_objc_msgSend") == 0) {
|
||||
if (info->selector_name != NULL) {
|
||||
if (info->method != nullptr)
|
||||
free(info->method);
|
||||
if (info->class_name != nullptr) {
|
||||
info->method = (char *)malloc(5 + strlen(info->class_name) +
|
||||
strlen(info->selector_name));
|
||||
if (info->method != nullptr) {
|
||||
strcpy(info->method, "+[");
|
||||
strcat(info->method, info->class_name);
|
||||
strcat(info->method, " ");
|
||||
strcat(info->method, info->selector_name);
|
||||
strcat(info->method, "]");
|
||||
*ReferenceName = info->method;
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
|
||||
}
|
||||
} else {
|
||||
info->method = (char *)malloc(9 + strlen(info->selector_name));
|
||||
if (info->method != nullptr) {
|
||||
strcpy(info->method, "-[%rdi ");
|
||||
strcat(info->method, info->selector_name);
|
||||
strcat(info->method, "]");
|
||||
*ReferenceName = info->method;
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
|
||||
}
|
||||
}
|
||||
info->class_name = nullptr;
|
||||
}
|
||||
} else if (strcmp(*ReferenceName, "_objc_msgSendSuper2") == 0) {
|
||||
if (info->selector_name != NULL) {
|
||||
if (info->method != nullptr)
|
||||
free(info->method);
|
||||
info->method = (char *)malloc(17 + strlen(info->selector_name));
|
||||
if (info->method != nullptr) {
|
||||
strcpy(info->method, "-[[%rdi super] ");
|
||||
strcat(info->method, info->selector_name);
|
||||
strcat(info->method, "]");
|
||||
*ReferenceName = info->method;
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
|
||||
}
|
||||
info->class_name = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GuessPointerPointer() is passed the address of what might be a pointer to
|
||||
// a reference to an Objective-C class, selector, message ref or cfstring.
|
||||
// If so the value of the pointer is returned and one of the booleans are set
|
||||
// to true. If not zero is returned and all the booleans are set to false.
|
||||
static uint64_t GuessPointerPointer(uint64_t ReferenceValue,
|
||||
struct DisassembleInfo *info,
|
||||
bool &classref, bool &selref, bool &msgref,
|
||||
bool &cfstring) {
|
||||
classref = false;
|
||||
selref = false;
|
||||
msgref = false;
|
||||
cfstring = false;
|
||||
uint32_t LoadCommandCount = info->O->getHeader().ncmds;
|
||||
MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo();
|
||||
for (unsigned I = 0;; ++I) {
|
||||
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
|
||||
MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
|
||||
for (unsigned J = 0; J < Seg.nsects; ++J) {
|
||||
MachO::section_64 Sec = info->O->getSection64(Load, J);
|
||||
if ((strncmp(Sec.sectname, "__objc_selrefs", 16) == 0 ||
|
||||
strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 ||
|
||||
strncmp(Sec.sectname, "__objc_superrefs", 16) == 0 ||
|
||||
strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 ||
|
||||
strncmp(Sec.sectname, "__cfstring", 16) == 0) &&
|
||||
ReferenceValue >= Sec.addr &&
|
||||
ReferenceValue < Sec.addr + Sec.size) {
|
||||
uint64_t sect_offset = ReferenceValue - Sec.addr;
|
||||
uint64_t object_offset = Sec.offset + sect_offset;
|
||||
StringRef MachOContents = info->O->getData();
|
||||
uint64_t object_size = MachOContents.size();
|
||||
const char *object_addr = (const char *)MachOContents.data();
|
||||
if (object_offset < object_size) {
|
||||
uint64_t pointer_value;
|
||||
memcpy(&pointer_value, object_addr + object_offset,
|
||||
sizeof(uint64_t));
|
||||
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
|
||||
sys::swapByteOrder(pointer_value);
|
||||
if (strncmp(Sec.sectname, "__objc_selrefs", 16) == 0)
|
||||
selref = true;
|
||||
else if (strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 ||
|
||||
strncmp(Sec.sectname, "__objc_superrefs", 16) == 0)
|
||||
classref = true;
|
||||
else if (strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 &&
|
||||
ReferenceValue + 8 < Sec.addr + Sec.size) {
|
||||
msgref = true;
|
||||
memcpy(&pointer_value, object_addr + object_offset + 8,
|
||||
sizeof(uint64_t));
|
||||
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
|
||||
sys::swapByteOrder(pointer_value);
|
||||
} else if (strncmp(Sec.sectname, "__cfstring", 16) == 0)
|
||||
cfstring = true;
|
||||
return pointer_value;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: Look for LC_SEGMENT for 32-bit Mach-O files.
|
||||
if (I == LoadCommandCount - 1)
|
||||
break;
|
||||
else
|
||||
Load = info->O->getNextLoadCommandInfo(Load);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// get_pointer_64 returns a pointer to the bytes in the object file at the
|
||||
// Address from a section in the Mach-O file. And indirectly returns the
|
||||
// offset into the section, number of bytes left in the section past the offset
|
||||
// and which section is was being referenced. If the Address is not in a
|
||||
// section nullptr is returned.
|
||||
const char *get_pointer_64(uint64_t Address, uint32_t &offset, uint32_t &left,
|
||||
SectionRef &S, DisassembleInfo *info) {
|
||||
offset = 0;
|
||||
left = 0;
|
||||
S = SectionRef();
|
||||
for (unsigned SectIdx = 0; SectIdx != info->Sections->size(); SectIdx++) {
|
||||
uint64_t SectAddress = ((*(info->Sections))[SectIdx]).getAddress();
|
||||
uint64_t SectSize = ((*(info->Sections))[SectIdx]).getSize();
|
||||
if (Address >= SectAddress && Address < SectAddress + SectSize) {
|
||||
S = (*(info->Sections))[SectIdx];
|
||||
offset = Address - SectAddress;
|
||||
left = SectSize - offset;
|
||||
StringRef SectContents;
|
||||
((*(info->Sections))[SectIdx]).getContents(SectContents);
|
||||
return SectContents.data() + offset;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// get_symbol_64() returns the name of a symbol (or nullptr) and the address of
|
||||
// the symbol indirectly through n_value. Based on the relocation information
|
||||
// for the specified section offset in the specified section reference.
|
||||
const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
|
||||
DisassembleInfo *info, uint64_t &n_value) {
|
||||
n_value = 0;
|
||||
if (info->verbose == false)
|
||||
return nullptr;
|
||||
|
||||
// See if there is an external relocation entry at the sect_offset.
|
||||
bool reloc_found = false;
|
||||
DataRefImpl Rel;
|
||||
MachO::any_relocation_info RE;
|
||||
bool isExtern = false;
|
||||
SymbolRef Symbol;
|
||||
for (const RelocationRef &Reloc : S.relocations()) {
|
||||
uint64_t RelocOffset;
|
||||
Reloc.getOffset(RelocOffset);
|
||||
if (RelocOffset == sect_offset) {
|
||||
Rel = Reloc.getRawDataRefImpl();
|
||||
RE = info->O->getRelocation(Rel);
|
||||
if (info->O->isRelocationScattered(RE))
|
||||
continue;
|
||||
isExtern = info->O->getPlainRelocationExternal(RE);
|
||||
if (isExtern) {
|
||||
symbol_iterator RelocSym = Reloc.getSymbol();
|
||||
Symbol = *RelocSym;
|
||||
}
|
||||
reloc_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If there is an external relocation entry for a symbol in this section
|
||||
// at this section_offset then use that symbol's value for the n_value
|
||||
// and return its name.
|
||||
const char *SymbolName = nullptr;
|
||||
if (reloc_found && isExtern) {
|
||||
Symbol.getAddress(n_value);
|
||||
StringRef name;
|
||||
Symbol.getName(name);
|
||||
if (!name.empty()) {
|
||||
SymbolName = name.data();
|
||||
return SymbolName;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: For fully linked images, look through the external relocation
|
||||
// entries off the dynamic symtab command. For these the r_offset is from the
|
||||
// start of the first writeable segment in the Mach-O file. So the offset
|
||||
// to this section from that segment is passed to this routine by the caller,
|
||||
// as the database_offset. Which is the difference of the section's starting
|
||||
// address and the first writable segment.
|
||||
//
|
||||
// NOTE: need add passing the database_offset to this routine.
|
||||
|
||||
// TODO: We did not find an external relocation entry so look up the
|
||||
// ReferenceValue as an address of a symbol and if found return that symbol's
|
||||
// name.
|
||||
//
|
||||
// NOTE: need add passing the ReferenceValue to this routine. Then that code
|
||||
// would simply be this:
|
||||
//
|
||||
// if (ReferenceValue != 0xffffffffffffffffLLU &&
|
||||
// ReferenceValue != 0xfffffffffffffffeLLU) {
|
||||
// StringRef name = info->AddrMap->lookup(ReferenceValue);
|
||||
// if (!name.empty())
|
||||
// SymbolName = name.data();
|
||||
// }
|
||||
|
||||
return SymbolName;
|
||||
}
|
||||
|
||||
// These are structs in the Objective-C meta data and read to produce the
|
||||
// comments for disassembly. While these are part of the ABI they are no
|
||||
// public defintions. So the are here not in include/llvm/Support/MachO.h .
|
||||
|
||||
// The cfstring object in a 64-bit Mach-O file.
|
||||
struct cfstring64_t {
|
||||
uint64_t isa; // class64_t * (64-bit pointer)
|
||||
uint64_t flags; // flag bits
|
||||
uint64_t characters; // char * (64-bit pointer)
|
||||
uint64_t length; // number of non-NULL characters in above
|
||||
};
|
||||
|
||||
// The class object in a 64-bit Mach-O file.
|
||||
struct class64_t {
|
||||
uint64_t isa; // class64_t * (64-bit pointer)
|
||||
uint64_t superclass; // class64_t * (64-bit pointer)
|
||||
uint64_t cache; // Cache (64-bit pointer)
|
||||
uint64_t vtable; // IMP * (64-bit pointer)
|
||||
uint64_t data; // class_ro64_t * (64-bit pointer)
|
||||
};
|
||||
|
||||
struct class_ro64_t {
|
||||
uint32_t flags;
|
||||
uint32_t instanceStart;
|
||||
uint32_t instanceSize;
|
||||
uint32_t reserved;
|
||||
uint64_t ivarLayout; // const uint8_t * (64-bit pointer)
|
||||
uint64_t name; // const char * (64-bit pointer)
|
||||
uint64_t baseMethods; // const method_list_t * (64-bit pointer)
|
||||
uint64_t baseProtocols; // const protocol_list_t * (64-bit pointer)
|
||||
uint64_t ivars; // const ivar_list_t * (64-bit pointer)
|
||||
uint64_t weakIvarLayout; // const uint8_t * (64-bit pointer)
|
||||
uint64_t baseProperties; // const struct objc_property_list (64-bit pointer)
|
||||
};
|
||||
|
||||
inline void swapStruct(struct cfstring64_t &cfs) {
|
||||
sys::swapByteOrder(cfs.isa);
|
||||
sys::swapByteOrder(cfs.flags);
|
||||
sys::swapByteOrder(cfs.characters);
|
||||
sys::swapByteOrder(cfs.length);
|
||||
}
|
||||
|
||||
inline void swapStruct(struct class64_t &c) {
|
||||
sys::swapByteOrder(c.isa);
|
||||
sys::swapByteOrder(c.superclass);
|
||||
sys::swapByteOrder(c.cache);
|
||||
sys::swapByteOrder(c.vtable);
|
||||
sys::swapByteOrder(c.data);
|
||||
}
|
||||
|
||||
inline void swapStruct(struct class_ro64_t &cro) {
|
||||
sys::swapByteOrder(cro.flags);
|
||||
sys::swapByteOrder(cro.instanceStart);
|
||||
sys::swapByteOrder(cro.instanceSize);
|
||||
sys::swapByteOrder(cro.reserved);
|
||||
sys::swapByteOrder(cro.ivarLayout);
|
||||
sys::swapByteOrder(cro.name);
|
||||
sys::swapByteOrder(cro.baseMethods);
|
||||
sys::swapByteOrder(cro.baseProtocols);
|
||||
sys::swapByteOrder(cro.ivars);
|
||||
sys::swapByteOrder(cro.weakIvarLayout);
|
||||
sys::swapByteOrder(cro.baseProperties);
|
||||
}
|
||||
|
||||
static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
|
||||
struct DisassembleInfo *info);
|
||||
|
||||
// get_objc2_64bit_class_name() is used for disassembly and is passed a pointer
|
||||
// to an Objective-C class and returns the class name. It is also passed the
|
||||
// address of the pointer, so when the pointer is zero as it can be in an .o
|
||||
// file, that is used to look for an external relocation entry with a symbol
|
||||
// name.
|
||||
const char *get_objc2_64bit_class_name(uint64_t pointer_value,
|
||||
uint64_t ReferenceValue,
|
||||
struct DisassembleInfo *info) {
|
||||
const char *r;
|
||||
uint32_t offset, left;
|
||||
SectionRef S;
|
||||
|
||||
// The pointer_value can be 0 in an object file and have a relocation
|
||||
// entry for the class symbol at the ReferenceValue (the address of the
|
||||
// pointer).
|
||||
if (pointer_value == 0) {
|
||||
r = get_pointer_64(ReferenceValue, offset, left, S, info);
|
||||
if (r == nullptr || left < sizeof(uint64_t))
|
||||
return nullptr;
|
||||
uint64_t n_value;
|
||||
const char *symbol_name = get_symbol_64(offset, S, info, n_value);
|
||||
if (symbol_name == nullptr)
|
||||
return nullptr;
|
||||
const char *class_name = rindex(symbol_name, '$');
|
||||
if (class_name != nullptr && class_name[1] == '_' && class_name[2] != '\0')
|
||||
return class_name + 2;
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// The case were the pointer_value is non-zero and points to a class defined
|
||||
// in this Mach-O file.
|
||||
r = get_pointer_64(pointer_value, offset, left, S, info);
|
||||
if (r == nullptr || left < sizeof(struct class64_t))
|
||||
return nullptr;
|
||||
struct class64_t c;
|
||||
memcpy(&c, r, sizeof(struct class64_t));
|
||||
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
|
||||
swapStruct(c);
|
||||
if (c.data == 0)
|
||||
return nullptr;
|
||||
r = get_pointer_64(c.data, offset, left, S, info);
|
||||
if (r == nullptr || left < sizeof(struct class_ro64_t))
|
||||
return nullptr;
|
||||
struct class_ro64_t cro;
|
||||
memcpy(&cro, r, sizeof(struct class_ro64_t));
|
||||
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
|
||||
swapStruct(cro);
|
||||
if (cro.name == 0)
|
||||
return nullptr;
|
||||
const char *name = get_pointer_64(cro.name, offset, left, S, info);
|
||||
return name;
|
||||
}
|
||||
|
||||
// get_objc2_64bit_cfstring_name is used for disassembly and is passed a
|
||||
// pointer to a cfstring and returns its name or nullptr.
|
||||
const char *get_objc2_64bit_cfstring_name(uint64_t ReferenceValue,
|
||||
struct DisassembleInfo *info) {
|
||||
const char *r, *name;
|
||||
uint32_t offset, left;
|
||||
SectionRef S;
|
||||
struct cfstring64_t cfs;
|
||||
uint64_t cfs_characters;
|
||||
|
||||
r = get_pointer_64(ReferenceValue, offset, left, S, info);
|
||||
if (r == nullptr || left < sizeof(struct cfstring64_t))
|
||||
return nullptr;
|
||||
memcpy(&cfs, r, sizeof(struct cfstring64_t));
|
||||
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
|
||||
swapStruct(cfs);
|
||||
if (cfs.characters == 0) {
|
||||
uint64_t n_value;
|
||||
const char *symbol_name = get_symbol_64(
|
||||
offset + offsetof(struct cfstring64_t, characters), S, info, n_value);
|
||||
if (symbol_name == nullptr)
|
||||
return nullptr;
|
||||
cfs_characters = n_value;
|
||||
} else
|
||||
cfs_characters = cfs.characters;
|
||||
name = get_pointer_64(cfs_characters, offset, left, S, info);
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
// get_objc2_64bit_selref() is used for disassembly and is passed a the address
|
||||
// of a pointer to an Objective-C selector reference when the pointer value is
|
||||
// zero as in a .o file and is likely to have a external relocation entry with
|
||||
// who's symbol's n_value is the real pointer to the selector name. If that is
|
||||
// the case the real pointer to the selector name is returned else 0 is
|
||||
// returned
|
||||
uint64_t get_objc2_64bit_selref(uint64_t ReferenceValue,
|
||||
struct DisassembleInfo *info) {
|
||||
uint32_t offset, left;
|
||||
SectionRef S;
|
||||
|
||||
const char *r = get_pointer_64(ReferenceValue, offset, left, S, info);
|
||||
if (r == nullptr || left < sizeof(uint64_t))
|
||||
return 0;
|
||||
uint64_t n_value;
|
||||
const char *symbol_name = get_symbol_64(offset, S, info, n_value);
|
||||
if (symbol_name == nullptr)
|
||||
return 0;
|
||||
return n_value;
|
||||
}
|
||||
|
||||
// GuessLiteralPointer returns a string which for the item in the Mach-O file
|
||||
// for the address passed in as ReferenceValue for printing as a comment with
|
||||
// the instruction and also returns the corresponding type of that item
|
||||
|
@ -509,13 +912,20 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
|
|||
// cstring is returned and ReferenceType is set to
|
||||
// LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr .
|
||||
//
|
||||
// TODO: other literals such as Objective-C CFStrings refs, Selector refs,
|
||||
// Message refs, Class refs and a Symbol address in a literal pool are yet
|
||||
// to be done here.
|
||||
// If ReferenceValue is an address of an Objective-C CFString, Selector ref or
|
||||
// Class ref that name is returned and the ReferenceType is set accordingly.
|
||||
//
|
||||
// Lastly, literals which are Symbol address in a literal pool are looked for
|
||||
// and if found the symbol name is returned and ReferenceType is set to
|
||||
// LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr .
|
||||
//
|
||||
// If there is no item in the Mach-O file for the address passed in as
|
||||
// ReferenceValue nullptr is returned and ReferenceType is unchanged.
|
||||
const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
|
||||
uint64_t *ReferenceType,
|
||||
struct DisassembleInfo *info) {
|
||||
// TODO: This rouine's code is only for an x86_64 Mach-O file for now.
|
||||
// TODO: This rouine's code and the routines it calls are only work with
|
||||
// x86_64 Mach-O files for now.
|
||||
unsigned int Arch = info->O->getArch();
|
||||
if (Arch != Triple::x86_64)
|
||||
return nullptr;
|
||||
|
@ -556,20 +966,71 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: the code to look for other literals such as Objective-C CFStrings
|
||||
// refs, Selector refs, Message refs, Class refs will be added here.
|
||||
// Look for literals such as Objective-C CFStrings refs, Selector refs,
|
||||
// Message refs and Class refs.
|
||||
bool classref, selref, msgref, cfstring;
|
||||
uint64_t pointer_value = GuessPointerPointer(ReferenceValue, info, classref,
|
||||
selref, msgref, cfstring);
|
||||
if (classref == true && pointer_value == 0) {
|
||||
// Note the ReferenceValue is a pointer into the __objc_classrefs section.
|
||||
// And the pointer_value in that section is typically zero as it will be
|
||||
// set by dyld as part of the "bind information".
|
||||
const char *name = get_dyld_bind_info_symbolname(ReferenceValue, info);
|
||||
if (name != nullptr) {
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref;
|
||||
const char *class_name = rindex(name, '$');
|
||||
if (class_name != nullptr && class_name[1] == '_' &&
|
||||
class_name[2] != '\0') {
|
||||
info->class_name = class_name + 2;
|
||||
return name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char *name = GuessCstringPointer(ReferenceValue, info);
|
||||
if (name) {
|
||||
// TODO: note when the code is added above for Selector refs and Message
|
||||
// refs we will need check for that here and set the ReferenceType
|
||||
// accordingly.
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr;
|
||||
if (classref == true) {
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref;
|
||||
const char *name =
|
||||
get_objc2_64bit_class_name(pointer_value, ReferenceValue, info);
|
||||
if (name != nullptr)
|
||||
info->class_name = name;
|
||||
else
|
||||
name = "bad class ref";
|
||||
return name;
|
||||
}
|
||||
|
||||
// TODO: look for an indirect symbol with this ReferenceValue which is in
|
||||
// a literal pool.
|
||||
if (cfstring == true) {
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref;
|
||||
const char *name = get_objc2_64bit_cfstring_name(ReferenceValue, info);
|
||||
return name;
|
||||
}
|
||||
|
||||
if (selref == true && pointer_value == 0)
|
||||
pointer_value = get_objc2_64bit_selref(ReferenceValue, info);
|
||||
|
||||
if (pointer_value != 0)
|
||||
ReferenceValue = pointer_value;
|
||||
|
||||
const char *name = GuessCstringPointer(ReferenceValue, info);
|
||||
if (name) {
|
||||
if (pointer_value != 0 && selref == true) {
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref;
|
||||
info->selector_name = name;
|
||||
} else if (pointer_value != 0 && msgref == true) {
|
||||
info->class_name = nullptr;
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref;
|
||||
info->selector_name = name;
|
||||
} else
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr;
|
||||
return name;
|
||||
}
|
||||
|
||||
// Lastly look for an indirect symbol with this ReferenceValue which is in
|
||||
// a literal pool. If found return that symbol name.
|
||||
name = GuessIndirectSymbol(ReferenceValue, info);
|
||||
if (name) {
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr;
|
||||
return name;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -584,7 +1045,7 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
|
|||
// Out type and the ReferenceName will also be set which is added as a comment
|
||||
// to the disassembled instruction.
|
||||
//
|
||||
// If the symbol name is a C++ mangled name then the demangled name is
|
||||
// TODO: If the symbol name is a C++ mangled name then the demangled name is
|
||||
// returned through ReferenceName and ReferenceType is set to
|
||||
// LLVMDisassembler_ReferenceType_DeMangled_Name .
|
||||
//
|
||||
|
@ -599,7 +1060,7 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
|
|||
// ReferenceType will be LLVMDisassembler_ReferenceType_In_PCrel_Load then the
|
||||
// SymbolValue is checked to be an address of literal pointer, symbol pointer,
|
||||
// or an Objective-C meta data reference. If so the output ReferenceType is
|
||||
// set to correspond to that as well as ReferenceName.
|
||||
// set to correspond to that as well as setting the ReferenceName.
|
||||
const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue,
|
||||
uint64_t *ReferenceType,
|
||||
uint64_t ReferencePC,
|
||||
|
@ -613,24 +1074,34 @@ const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue,
|
|||
}
|
||||
|
||||
const char *SymbolName = nullptr;
|
||||
StringRef name = info->AddrMap->lookup(ReferenceValue);
|
||||
if (!name.empty())
|
||||
SymbolName = name.data();
|
||||
if (ReferenceValue != 0xffffffffffffffffLLU &&
|
||||
ReferenceValue != 0xfffffffffffffffeLLU) {
|
||||
StringRef name = info->AddrMap->lookup(ReferenceValue);
|
||||
if (!name.empty())
|
||||
SymbolName = name.data();
|
||||
}
|
||||
|
||||
if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) {
|
||||
*ReferenceName = GuessIndirectSymbol(ReferenceValue, info);
|
||||
if (*ReferenceName) {
|
||||
method_reference(info, ReferenceType, ReferenceName);
|
||||
if (*ReferenceType != LLVMDisassembler_ReferenceType_Out_Objc_Message)
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_SymbolStub;
|
||||
} else
|
||||
// TODO: if SymbolName is not nullptr see if it is a C++ name
|
||||
// and demangle it.
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
|
||||
} else if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) {
|
||||
*ReferenceName =
|
||||
GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info);
|
||||
if (*ReferenceName)
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_Out_SymbolStub;
|
||||
method_reference(info, ReferenceType, ReferenceName);
|
||||
else
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
|
||||
}
|
||||
else if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) {
|
||||
*ReferenceName = GuessLiteralPointer(ReferenceValue, ReferencePC,
|
||||
ReferenceType, info);
|
||||
if (*ReferenceName == nullptr)
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
|
||||
// TODO: other types of references to be added.
|
||||
} else {
|
||||
// TODO: if SymbolName is not nullptr see if it is a C++ name
|
||||
// and demangle it.
|
||||
else {
|
||||
*ReferenceName = nullptr;
|
||||
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
|
||||
}
|
||||
|
@ -652,8 +1123,8 @@ class DisasmMemoryObject : public MemoryObject {
|
|||
uint64_t Size;
|
||||
uint64_t BasePC;
|
||||
public:
|
||||
DisasmMemoryObject(const uint8_t *bytes, uint64_t size, uint64_t basePC) :
|
||||
Bytes(bytes), Size(size), BasePC(basePC) {}
|
||||
DisasmMemoryObject(const uint8_t *bytes, uint64_t size, uint64_t basePC)
|
||||
: Bytes(bytes), Size(size), BasePC(basePC) {}
|
||||
|
||||
uint64_t getBase() const override { return BasePC; }
|
||||
uint64_t getExtent() const override { return Size; }
|
||||
|
@ -917,6 +1388,11 @@ static void DisassembleInputMachO2(StringRef Filename,
|
|||
SymbolizerInfo.O = MachOOF;
|
||||
SymbolizerInfo.S = Sections[SectIdx];
|
||||
SymbolizerInfo.AddrMap = &AddrMap;
|
||||
SymbolizerInfo.Sections = &Sections;
|
||||
SymbolizerInfo.class_name = nullptr;
|
||||
SymbolizerInfo.selector_name = nullptr;
|
||||
SymbolizerInfo.method = nullptr;
|
||||
SymbolizerInfo.BindTable = nullptr;
|
||||
|
||||
// Disassemble symbol by symbol.
|
||||
for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
|
||||
|
@ -962,6 +1438,9 @@ static void DisassembleInputMachO2(StringRef Filename,
|
|||
uint64_t Size;
|
||||
|
||||
symbolTableWorked = true;
|
||||
DisasmMemoryObject SectionMemoryObject((const uint8_t *)Bytes.data() +
|
||||
Start,
|
||||
End - Start, SectAddress + Start);
|
||||
|
||||
DataRefImpl Symb = Symbols[SymIdx].getRawDataRefImpl();
|
||||
bool isThumb =
|
||||
|
@ -976,7 +1455,7 @@ static void DisassembleInputMachO2(StringRef Filename,
|
|||
if (FullLeadingAddr) {
|
||||
if (MachOOF->is64Bit())
|
||||
outs() << format("%016" PRIx64, PC);
|
||||
else
|
||||
else
|
||||
outs() << format("%08" PRIx64, PC);
|
||||
} else {
|
||||
outs() << format("%8" PRIx64 ":", PC);
|
||||
|
@ -1006,10 +1485,10 @@ static void DisassembleInputMachO2(StringRef Filename,
|
|||
|
||||
bool gotInst;
|
||||
if (isThumb)
|
||||
gotInst = ThumbDisAsm->getInstruction(Inst, Size, MemoryObject, PC,
|
||||
DebugOut, Annotations);
|
||||
gotInst = ThumbDisAsm->getInstruction(Inst, Size, SectionMemoryObject,
|
||||
PC, DebugOut, Annotations);
|
||||
else
|
||||
gotInst = DisAsm->getInstruction(Inst, Size, MemoryObject, PC,
|
||||
gotInst = DisAsm->getInstruction(Inst, Size, SectionMemoryObject, PC,
|
||||
DebugOut, Annotations);
|
||||
if (gotInst) {
|
||||
if (!NoShowRawInsn) {
|
||||
|
@ -1036,9 +1515,16 @@ static void DisassembleInputMachO2(StringRef Filename,
|
|||
}
|
||||
outs() << "\n";
|
||||
} else {
|
||||
errs() << "llvm-objdump: warning: invalid instruction encoding\n";
|
||||
if (Size == 0)
|
||||
Size = 1; // skip illegible bytes
|
||||
unsigned int Arch = MachOOF->getArch();
|
||||
if (Arch == Triple::x86_64 || Arch == Triple::x86){
|
||||
outs() << format("\t.byte 0x%02x #bad opcode\n",
|
||||
*(Bytes.data() + Index) & 0xff);
|
||||
Size = 1; // skip exactly one illegible byte and move on.
|
||||
} else {
|
||||
errs() << "llvm-objdump: warning: invalid instruction encoding\n";
|
||||
if (Size == 0)
|
||||
Size = 1; // skip illegible bytes
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1051,12 +1537,12 @@ static void DisassembleInputMachO2(StringRef Filename,
|
|||
MCInst Inst;
|
||||
|
||||
uint64_t PC = SectAddress + Index;
|
||||
if (DisAsm->getInstruction(Inst, InstSize, MemoryObject, PC,
|
||||
DebugOut, nulls())) {
|
||||
if (DisAsm->getInstruction(Inst, InstSize, MemoryObject, PC, DebugOut,
|
||||
nulls())) {
|
||||
if (FullLeadingAddr) {
|
||||
if (MachOOF->is64Bit())
|
||||
outs() << format("%016" PRIx64, PC);
|
||||
else
|
||||
else
|
||||
outs() << format("%08" PRIx64, PC);
|
||||
} else {
|
||||
outs() << format("%8" PRIx64 ":", PC);
|
||||
|
@ -1068,12 +1554,23 @@ static void DisassembleInputMachO2(StringRef Filename,
|
|||
IP->printInst(&Inst, outs(), "");
|
||||
outs() << "\n";
|
||||
} else {
|
||||
errs() << "llvm-objdump: warning: invalid instruction encoding\n";
|
||||
if (InstSize == 0)
|
||||
InstSize = 1; // skip illegible bytes
|
||||
unsigned int Arch = MachOOF->getArch();
|
||||
if (Arch == Triple::x86_64 || Arch == Triple::x86){
|
||||
outs() << format("\t.byte 0x%02x #bad opcode\n",
|
||||
*(Bytes.data() + Index) & 0xff);
|
||||
InstSize = 1; // skip exactly one illegible byte and move on.
|
||||
} else {
|
||||
errs() << "llvm-objdump: warning: invalid instruction encoding\n";
|
||||
if (InstSize == 0)
|
||||
InstSize = 1; // skip illegible bytes
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (SymbolizerInfo.method != nullptr)
|
||||
free(SymbolizerInfo.method);
|
||||
if (SymbolizerInfo.BindTable != nullptr)
|
||||
delete SymbolizerInfo.BindTable;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2909,4 +3406,34 @@ void llvm::printMachOWeakBindTable(const object::MachOObjectFile *Obj) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// get_dyld_bind_info_symbolname() is used for disassembly and passed an
|
||||
// address, ReferenceValue, in the Mach-O file and looks in the dyld bind
|
||||
// information for that address. If the address is found its binding symbol
|
||||
// name is returned. If not nullptr is returned.
|
||||
static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
|
||||
struct DisassembleInfo *info) {
|
||||
if (info->BindTable == nullptr) {
|
||||
info->BindTable = new (BindTable);
|
||||
SegInfo sectionTable(info->O);
|
||||
for (const llvm::object::MachOBindEntry &Entry : info->O->bindTable()) {
|
||||
uint32_t SegIndex = Entry.segmentIndex();
|
||||
uint64_t OffsetInSeg = Entry.segmentOffset();
|
||||
uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg);
|
||||
const char *SymbolName = nullptr;
|
||||
StringRef name = Entry.symbolName();
|
||||
if (!name.empty())
|
||||
SymbolName = name.data();
|
||||
info->BindTable->push_back(std::make_pair(Address, SymbolName));
|
||||
}
|
||||
}
|
||||
for (bind_table_iterator BI = info->BindTable->begin(),
|
||||
BE = info->BindTable->end();
|
||||
BI != BE; ++BI) {
|
||||
uint64_t Address = BI->first;
|
||||
if (ReferenceValue == Address) {
|
||||
const char *SymbolName = BI->second;
|
||||
return SymbolName;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue