forked from OSchip/llvm-project
626 lines
16 KiB
C++
626 lines
16 KiB
C++
//===- lib/ReaderWriter/MachO/MachOFormat.hpp -----------------------------===//
|
|
//
|
|
// The LLVM Linker
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
// This file contains all the structs and constants needed to write a
|
|
// mach-o final linked image. The names of the structs and constants
|
|
// are the same as in the darwin native header <mach-o/loader.h> so
|
|
// they will be familiar to anyone who has used that header.
|
|
//
|
|
|
|
#include "llvm/Support/DataTypes.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/Memory.h"
|
|
|
|
#ifndef LLD_READER_WRITER_MACHO_FORMAT_H_
|
|
#define LLD_READER_WRITER_MACHO_FORMAT_H_
|
|
|
|
namespace lld {
|
|
namespace mach_o {
|
|
|
|
|
|
enum {
|
|
MH_MAGIC = 0xfeedface,
|
|
MH_MAGIC_64 = 0xfeedfacf
|
|
};
|
|
|
|
enum {
|
|
CPU_TYPE_ARM = 0x0000000C,
|
|
CPU_TYPE_I386 = 0x00000007,
|
|
CPU_TYPE_X86_64 = 0x01000007
|
|
};
|
|
|
|
enum {
|
|
CPU_SUBTYPE_X86_ALL = 0x00000003,
|
|
CPU_SUBTYPE_X86_64_ALL = 0x00000003,
|
|
CPU_SUBTYPE_ARM_V6 = 0x00000006,
|
|
CPU_SUBTYPE_ARM_V7 = 0x00000009
|
|
};
|
|
|
|
enum {
|
|
MH_OBJECT = 0x1,
|
|
MH_EXECUTE = 0x2,
|
|
MH_DYLIB = 0x6,
|
|
MH_DYLINKER = 0x7,
|
|
MH_BUNDLE = 0x8,
|
|
MH_DYLIB_STUB = 0x9,
|
|
MH_KEXT_BUNDLE= 0xB
|
|
};
|
|
|
|
|
|
//
|
|
// Every mach-o file starts with this header. The header size is
|
|
// 28 bytes for 32-bit architecures and 32-bytes for 64-bit architectures.
|
|
//
|
|
class mach_header {
|
|
public:
|
|
uint32_t magic;
|
|
uint32_t cputype;
|
|
uint32_t cpusubtype;
|
|
uint32_t filetype;
|
|
uint32_t ncmds;
|
|
uint32_t sizeofcmds;
|
|
uint32_t flags;
|
|
uint32_t reserved;
|
|
|
|
uint64_t size() {
|
|
return (magic == 0xfeedfacf) ? 32 : 28;
|
|
}
|
|
|
|
void copyTo(uint8_t *to, bool swap=false) {
|
|
::memcpy(to, (char*)&magic, this->size());
|
|
}
|
|
|
|
void recordLoadCommand(const class load_command *lc);
|
|
};
|
|
|
|
|
|
//
|
|
// Every mach-o file has a list of load commands after the mach_header.
|
|
// Each load command starts with a type and length, so you can iterate
|
|
// through the load commands even if you don't understand the content
|
|
// of a particular type.
|
|
//
|
|
// The model for handling endianness and 32 vs 64 bitness is that the in-memory
|
|
// object is always 64-bit and the native endianess. The endianess swapping
|
|
// and pointer sizing is done when writing (copyTo method) or when reading
|
|
// (constructor that takes a buffer).
|
|
//
|
|
// The load_command subclasses are designed so to mirror the traditional "C"
|
|
// structs, so you can get and set the same field names (e.g. seg->vmaddr = 0).
|
|
//
|
|
class load_command {
|
|
public:
|
|
const uint32_t cmd; // type of load command
|
|
const uint32_t cmdsize; // length of load command including this header
|
|
|
|
load_command(uint32_t cmdNumber, uint32_t sz, bool is64, bool align=false)
|
|
: cmd(cmdNumber), cmdsize(pointerAlign(sz, is64, align)) {
|
|
}
|
|
|
|
virtual ~load_command() {
|
|
}
|
|
|
|
virtual void copyTo(uint8_t *to, bool swap=false) = 0;
|
|
private:
|
|
// Load commands must be pointer-size aligned. Most load commands are
|
|
// a fixed size, so there is a runtime assert to check those. For variable
|
|
// length load commands, setting the align option to true will add padding
|
|
// at the end of the load command to round up its size for proper alignment.
|
|
uint32_t pointerAlign(uint32_t size, bool is64, bool align) {
|
|
if ( align ) {
|
|
if ( is64 )
|
|
return (size + 7) & (-8);
|
|
else
|
|
return (size + 3) & (-4);
|
|
}
|
|
else {
|
|
if ( is64 )
|
|
assert((size % 8) == 0);
|
|
else
|
|
assert((size % 4) == 0);
|
|
return size;
|
|
}
|
|
}
|
|
|
|
};
|
|
|
|
inline void mach_header::recordLoadCommand(const load_command *lc) {
|
|
++ncmds;
|
|
sizeofcmds += lc->cmdsize;
|
|
}
|
|
|
|
// Supported load command types
|
|
enum {
|
|
LC_SEGMENT = 0x00000001,
|
|
LC_SYMTAB = 0x00000002,
|
|
LC_UNIXTHREAD = 0x00000005,
|
|
LC_LOAD_DYLIB = 0x0000000C,
|
|
LC_LOAD_DYLINKER = 0x0000000E,
|
|
LC_SEGMENT_64 = 0x00000019,
|
|
LC_MAIN = 0x80000028,
|
|
LC_DYLD_INFO_ONLY = 0x80000022
|
|
};
|
|
|
|
// Memory protection bit used in segment_command.initprot
|
|
enum {
|
|
VM_PROT_NONE = 0x0,
|
|
VM_PROT_READ = 0x1,
|
|
VM_PROT_WRITE = 0x2,
|
|
VM_PROT_EXECUTE = 0x4,
|
|
};
|
|
|
|
// Bits for the section.flags field
|
|
enum {
|
|
// Section "type" is the low byte
|
|
SECTION_TYPE = 0x000000FF,
|
|
S_REGULAR = 0x00000000,
|
|
S_ZEROFILL = 0x00000001,
|
|
S_CSTRING_LITERALS = 0x00000002,
|
|
S_NON_LAZY_SYMBOL_POINTERS= 0x00000006,
|
|
S_LAZY_SYMBOL_POINTERS = 0x00000007,
|
|
S_SYMBOL_STUBS = 0x00000008,
|
|
|
|
// Other bits in section.flags
|
|
S_ATTR_PURE_INSTRUCTIONS = 0x80000000,
|
|
S_ATTR_SOME_INSTRUCTIONS = 0x00000400
|
|
};
|
|
|
|
|
|
// section record for 32-bit architectures
|
|
struct section {
|
|
char sectname[16];
|
|
char segname[16];
|
|
uint32_t addr;
|
|
uint32_t size;
|
|
uint32_t offset;
|
|
uint32_t align;
|
|
uint32_t reloff;
|
|
uint32_t nreloc;
|
|
uint32_t flags;
|
|
uint32_t reserved1;
|
|
uint32_t reserved2;
|
|
};
|
|
|
|
// section record for 64-bit architectures
|
|
struct section_64 {
|
|
char sectname[16];
|
|
char segname[16];
|
|
uint64_t addr;
|
|
uint64_t size;
|
|
uint32_t offset;
|
|
uint32_t align;
|
|
uint32_t reloff;
|
|
uint32_t nreloc;
|
|
uint32_t flags;
|
|
uint32_t reserved1;
|
|
uint32_t reserved2;
|
|
uint32_t reserved3;
|
|
};
|
|
|
|
|
|
//
|
|
// A segment load command has a fixed set of fields followed by an 'nsect'
|
|
// array of section records. The in-memory object uses a pointer to
|
|
// a dynamically allocated array of sections.
|
|
//
|
|
class segment_command : public load_command {
|
|
public:
|
|
char segname[16];
|
|
uint64_t vmaddr;
|
|
uint64_t vmsize;
|
|
uint64_t fileoff;
|
|
uint64_t filesize;
|
|
uint32_t maxprot;
|
|
uint32_t initprot;
|
|
uint32_t nsects;
|
|
uint32_t flags;
|
|
section_64 *sections;
|
|
|
|
segment_command(unsigned sectCount, bool is64)
|
|
: load_command((is64 ? LC_SEGMENT_64 : LC_SEGMENT),
|
|
(is64 ? (72 + sectCount*80) : (56 + sectCount*68)),
|
|
is64),
|
|
vmaddr(0), vmsize(0), fileoff(0), filesize(0),
|
|
maxprot(0), initprot(0), nsects(sectCount), flags(0) {
|
|
sections = new section_64[sectCount];
|
|
this->nsects = sectCount;
|
|
}
|
|
|
|
~segment_command() {
|
|
delete sections;
|
|
}
|
|
|
|
void copyTo(uint8_t *to, bool swap) {
|
|
if ( swap ) {
|
|
assert(0 && "non-native endianness not supported yet");
|
|
}
|
|
else {
|
|
if( is64() ) {
|
|
// in-memory matches on-disk, so copy segment fields followed by sections
|
|
::memcpy(to, (uint8_t*)&cmd, 72);
|
|
if ( nsects != 0 )
|
|
::memcpy(&to[72], sections, sizeof(section_64)*nsects);
|
|
}
|
|
else {
|
|
// on-disk is 32-bit struct, so copy each field
|
|
::memcpy(to, (uint8_t*)&cmd, 24);
|
|
copy32(to, 24, vmaddr);
|
|
copy32(to, 28, vmsize);
|
|
copy32(to, 32, fileoff);
|
|
copy32(to, 36, filesize);
|
|
copy32(to, 40, maxprot);
|
|
copy32(to, 44, initprot);
|
|
copy32(to, 48, nsects);
|
|
copy32(to, 52, flags);
|
|
for(uint32_t i=0; i < nsects; ++i) {
|
|
unsigned off = 56+i*68;
|
|
::memcpy(&to[off], sections[i].sectname, 32);
|
|
copy32(to, off+32, sections[i].addr);
|
|
copy32(to, off+36, sections[i].size);
|
|
copy32(to, off+40, sections[i].offset);
|
|
copy32(to, off+44, sections[i].align);
|
|
copy32(to, off+48, sections[i].reloff);
|
|
copy32(to, off+52, sections[i].nreloc);
|
|
copy32(to, off+56, sections[i].flags);
|
|
copy32(to, off+60, sections[i].reserved1);
|
|
copy32(to, off+64, sections[i].reserved2);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
void copy32(uint8_t *to, unsigned offset, uint64_t value) {
|
|
uint32_t value32 = value; // FIXME: range check
|
|
::memcpy(&to[offset], &value32, sizeof(uint32_t));
|
|
}
|
|
|
|
bool is64() {
|
|
return (cmd == LC_SEGMENT_64);
|
|
}
|
|
};
|
|
|
|
|
|
|
|
//
|
|
// The dylinker_command contains the path to the dynamic loader to use
|
|
// with the program (e.g. "/usr/lib/dyld"). So, it is variable length.
|
|
// But load commands must be pointer size aligned.
|
|
//
|
|
//
|
|
class dylinker_command : public load_command {
|
|
public:
|
|
uint32_t name_offset;
|
|
private:
|
|
StringRef _name;
|
|
public:
|
|
dylinker_command(StringRef path, bool is64)
|
|
: load_command(LC_LOAD_DYLINKER,12 + path.size(), is64, true),
|
|
name_offset(12), _name(path) {
|
|
}
|
|
|
|
virtual void copyTo(uint8_t *to, bool swap=false) {
|
|
if ( swap ) {
|
|
assert(0 && "non-native endianness not supported yet");
|
|
}
|
|
else {
|
|
// in-memory matches on-disk, so copy first fields followed by path
|
|
::memcpy(to, (uint8_t*)&cmd, 12);
|
|
::memcpy(&to[12], _name.data(), _name.size());
|
|
::memset(&to[12+_name.size()], 0, cmdsize-(12+_name.size()));
|
|
}
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
//
|
|
// The symtab_command just holds the offset to the array of nlist structs
|
|
// and the offsets to the string pool for all symbol names.
|
|
//
|
|
class symtab_command : public load_command {
|
|
public:
|
|
uint32_t symoff;
|
|
uint32_t nsyms;
|
|
uint32_t stroff;
|
|
uint32_t strsize;
|
|
|
|
symtab_command(bool is64)
|
|
: load_command(LC_SYMTAB, 24, is64),
|
|
symoff(0), nsyms(0), stroff(0), strsize(0) {
|
|
}
|
|
|
|
virtual void copyTo(uint8_t *to, bool swap=false) {
|
|
if ( swap ) {
|
|
assert(0 && "non-native endianness not supported yet");
|
|
}
|
|
else {
|
|
// in-memory matches on-disk, so copy fields
|
|
::memcpy(to, (uint8_t*)&cmd, 24);
|
|
}
|
|
}
|
|
|
|
};
|
|
|
|
|
|
//
|
|
// The entry_point_command load command holds the offset to the function
|
|
// _main in a dynamic executable.
|
|
//
|
|
class entry_point_command : public load_command {
|
|
public:
|
|
uint64_t entryoff;
|
|
uint64_t stacksize;
|
|
|
|
entry_point_command(bool is64)
|
|
: load_command(LC_MAIN, 24, is64), entryoff(0), stacksize(0) {
|
|
}
|
|
|
|
virtual void copyTo(uint8_t *to, bool swap=false) {
|
|
if ( swap ) {
|
|
assert(0 && "non-native endianness not supported yet");
|
|
}
|
|
else {
|
|
// in-memory matches on-disk, so copy fields
|
|
::memcpy(to, (uint8_t*)&cmd, 24);
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
//
|
|
// The thread_command load command holds the set of initial register values
|
|
// for a dynamic executable. In reality, only the PC and SP are used.
|
|
//
|
|
class thread_command : public load_command {
|
|
public:
|
|
uint32_t fields_flavor;
|
|
uint32_t fields_count;
|
|
private:
|
|
uint32_t _cpuType;
|
|
uint8_t *_registerArray;
|
|
|
|
public:
|
|
thread_command(uint32_t cpuType, bool is64)
|
|
: load_command(LC_UNIXTHREAD, 16+registersBufferSize(cpuType), is64),
|
|
fields_count(registersBufferSize(cpuType)/4), _cpuType(cpuType) {
|
|
switch ( cpuType ) {
|
|
case CPU_TYPE_I386:
|
|
fields_flavor = 1; // i386_THREAD_STATE
|
|
break;
|
|
case CPU_TYPE_X86_64:
|
|
fields_flavor = 4; // x86_THREAD_STATE64;
|
|
break;
|
|
case CPU_TYPE_ARM:
|
|
fields_flavor = 1; // ARM_THREAD_STATE
|
|
break;
|
|
default:
|
|
assert(0 && "unsupported cpu type");
|
|
}
|
|
_registerArray = reinterpret_cast<uint8_t*>(
|
|
::calloc(registersBufferSize(cpuType), 1));
|
|
assert(_registerArray);
|
|
}
|
|
|
|
virtual void copyTo(uint8_t *to, bool swap=false) {
|
|
if ( swap ) {
|
|
assert(0 && "non-native endianness not supported yet");
|
|
}
|
|
else {
|
|
// in-memory matches on-disk, so copy fixed fields
|
|
::memcpy(to, (uint8_t*)&cmd, 16);
|
|
// that register array
|
|
::memcpy(&to[16], _registerArray, registersBufferSize(_cpuType));
|
|
}
|
|
}
|
|
|
|
void setPC(uint64_t pc) {
|
|
uint32_t *regs32 = reinterpret_cast<uint32_t*>(_registerArray);
|
|
uint64_t *regs64 = reinterpret_cast<uint64_t*>(_registerArray);
|
|
switch ( _cpuType ) {
|
|
case CPU_TYPE_I386:
|
|
regs32[10] = pc;
|
|
break;
|
|
case CPU_TYPE_X86_64:
|
|
regs64[16] = pc;
|
|
break;
|
|
case CPU_TYPE_ARM:
|
|
regs32[15] = pc;
|
|
break;
|
|
default:
|
|
assert(0 && "unsupported cpu type");
|
|
}
|
|
}
|
|
|
|
virtual ~thread_command() {
|
|
::free(_registerArray);
|
|
}
|
|
|
|
private:
|
|
uint32_t registersBufferSize(uint32_t cpuType) {
|
|
switch ( cpuType ) {
|
|
case CPU_TYPE_I386:
|
|
return 64; // i386_THREAD_STATE_COUNT * 4
|
|
case CPU_TYPE_X86_64:
|
|
return 168; // x86_THREAD_STATE64_COUNT * 4
|
|
case CPU_TYPE_ARM:
|
|
return 68; // ARM_THREAD_STATE_COUNT * 4
|
|
}
|
|
assert(0 && "unsupported cpu type");
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
// The dylib_command load command holds the name/path of a dynamic shared
|
|
// library which this mach-o image depends on.
|
|
//
|
|
struct dylib_command : public load_command {
|
|
uint32_t name_offset;
|
|
uint32_t timestamp;
|
|
uint32_t current_version;
|
|
uint32_t compatibility_version;
|
|
private:
|
|
StringRef _loadPath;
|
|
public:
|
|
|
|
dylib_command(StringRef path, bool is64)
|
|
: load_command(LC_LOAD_DYLIB, 24 + path.size(), is64, true),
|
|
name_offset(24), timestamp(0),
|
|
current_version(0x10000), compatibility_version(0x10000),
|
|
_loadPath(path) {
|
|
}
|
|
|
|
virtual void copyTo(uint8_t *to, bool swap=false) {
|
|
if ( swap ) {
|
|
assert(0 && "non-native endianness not supported yet");
|
|
}
|
|
else {
|
|
// in-memory matches on-disk, so copy first fields followed by path
|
|
::memcpy(to, (uint8_t*)&cmd, 24);
|
|
::memcpy(&to[24], _loadPath.data(), _loadPath.size());
|
|
::memset(&to[12+_loadPath.size()], 0, cmdsize-(12+_loadPath.size()));
|
|
}
|
|
}
|
|
|
|
};
|
|
|
|
|
|
//
|
|
// The dyld_info_command load command holds the offsets to various tables
|
|
// of information needed by dyld to prepare the image for execution.
|
|
//
|
|
struct dyld_info_command : public load_command {
|
|
uint32_t rebase_off;
|
|
uint32_t rebase_size;
|
|
uint32_t bind_off;
|
|
uint32_t bind_size;
|
|
uint32_t weak_bind_off;
|
|
uint32_t weak_bind_size;
|
|
uint32_t lazy_bind_off;
|
|
uint32_t lazy_bind_size;
|
|
uint32_t export_off;
|
|
uint32_t export_size;
|
|
|
|
dyld_info_command(bool is64)
|
|
: load_command(LC_DYLD_INFO_ONLY, 48, is64),
|
|
rebase_off(0), rebase_size(0),
|
|
bind_off(0), bind_size(0), weak_bind_off(0), weak_bind_size(0),
|
|
lazy_bind_off(0), lazy_bind_size(0), export_off(0), export_size(0) {
|
|
}
|
|
|
|
virtual void copyTo(uint8_t *to, bool swap=false) {
|
|
if ( swap ) {
|
|
assert(0 && "non-native endianness not supported yet");
|
|
}
|
|
else {
|
|
// in-memory matches on-disk, so copy fields
|
|
::memcpy(to, (uint8_t*)&cmd, 48);
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
enum {
|
|
BIND_TYPE_POINTER = 1,
|
|
BIND_TYPE_TEXT_ABSOLUTE32 = 2,
|
|
BIND_TYPE_TEXT_PCREL32 = 3
|
|
};
|
|
|
|
enum {
|
|
BIND_SPECIAL_DYLIB_SELF = 0,
|
|
BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1,
|
|
BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2
|
|
};
|
|
|
|
enum {
|
|
BIND_SYMBOL_FLAGS_WEAK_IMPORT = 0x1,
|
|
BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION = 0x8
|
|
};
|
|
|
|
enum {
|
|
BIND_OPCODE_MASK = 0xF0,
|
|
BIND_IMMEDIATE_MASK = 0x0F,
|
|
BIND_OPCODE_DONE = 0x00,
|
|
BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10,
|
|
BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20,
|
|
BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30,
|
|
BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40,
|
|
BIND_OPCODE_SET_TYPE_IMM = 0x50,
|
|
BIND_OPCODE_SET_ADDEND_SLEB = 0x60,
|
|
BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70,
|
|
BIND_OPCODE_ADD_ADDR_ULEB = 0x80,
|
|
BIND_OPCODE_DO_BIND = 0x90,
|
|
BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0,
|
|
BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0,
|
|
BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0
|
|
};
|
|
|
|
|
|
|
|
|
|
enum {
|
|
N_UNDF = 0x00,
|
|
N_EXT = 0x01,
|
|
N_PEXT = 0x10,
|
|
N_SECT = 0x0e
|
|
};
|
|
|
|
class nlist {
|
|
public:
|
|
uint32_t n_strx;
|
|
uint8_t n_type;
|
|
uint8_t n_sect;
|
|
uint16_t n_desc;
|
|
uint64_t n_value;
|
|
|
|
static unsigned size(bool is64) {
|
|
return (is64 ? 16 : 12);
|
|
}
|
|
|
|
void copyTo(uint8_t *to, bool is64, bool swap=false) {
|
|
if ( swap ) {
|
|
assert(0 && "non-native endianness not supported yet");
|
|
}
|
|
else {
|
|
if ( is64 ) {
|
|
// in-memory matches on-disk, so just copy whole struct
|
|
::memcpy(to, (uint8_t*)&n_strx, 16);
|
|
}
|
|
else {
|
|
// on-disk uses 32-bit n_value, so special case n_value
|
|
::memcpy(to, (uint8_t*)&n_strx, 8);
|
|
uint32_t value32 = n_value; // FIXME: range check
|
|
::memcpy(&to[8], &value32, sizeof(uint32_t));
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace mach_o
|
|
} // namespace lld
|
|
|
|
|
|
|
|
#endif // LLD_READER_WRITER_MACHO_FORMAT_H_
|
|
|