llvm-project/lld/lib/ReaderWriter/MachO/MachOFormat.hpp

626 lines
16 KiB
C++

//===- lib/ReaderWriter/MachO/MachOFormat.hpp -----------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains all the structs and constants needed to write a
// mach-o final linked image. The names of the structs and constants
// are the same as in the darwin native header <mach-o/loader.h> so
// they will be familiar to anyone who has used that header.
//
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Memory.h"
#ifndef LLD_READER_WRITER_MACHO_FORMAT_H_
#define LLD_READER_WRITER_MACHO_FORMAT_H_
namespace lld {
namespace mach_o {
enum {
MH_MAGIC = 0xfeedface,
MH_MAGIC_64 = 0xfeedfacf
};
enum {
CPU_TYPE_ARM = 0x0000000C,
CPU_TYPE_I386 = 0x00000007,
CPU_TYPE_X86_64 = 0x01000007
};
enum {
CPU_SUBTYPE_X86_ALL = 0x00000003,
CPU_SUBTYPE_X86_64_ALL = 0x00000003,
CPU_SUBTYPE_ARM_V6 = 0x00000006,
CPU_SUBTYPE_ARM_V7 = 0x00000009
};
enum {
MH_OBJECT = 0x1,
MH_EXECUTE = 0x2,
MH_DYLIB = 0x6,
MH_DYLINKER = 0x7,
MH_BUNDLE = 0x8,
MH_DYLIB_STUB = 0x9,
MH_KEXT_BUNDLE= 0xB
};
//
// Every mach-o file starts with this header. The header size is
// 28 bytes for 32-bit architecures and 32-bytes for 64-bit architectures.
//
class mach_header {
public:
uint32_t magic;
uint32_t cputype;
uint32_t cpusubtype;
uint32_t filetype;
uint32_t ncmds;
uint32_t sizeofcmds;
uint32_t flags;
uint32_t reserved;
uint64_t size() {
return (magic == 0xfeedfacf) ? 32 : 28;
}
void copyTo(uint8_t *to, bool swap=false) {
::memcpy(to, (char*)&magic, this->size());
}
void recordLoadCommand(const class load_command *lc);
};
//
// Every mach-o file has a list of load commands after the mach_header.
// Each load command starts with a type and length, so you can iterate
// through the load commands even if you don't understand the content
// of a particular type.
//
// The model for handling endianness and 32 vs 64 bitness is that the in-memory
// object is always 64-bit and the native endianess. The endianess swapping
// and pointer sizing is done when writing (copyTo method) or when reading
// (constructor that takes a buffer).
//
// The load_command subclasses are designed so to mirror the traditional "C"
// structs, so you can get and set the same field names (e.g. seg->vmaddr = 0).
//
class load_command {
public:
const uint32_t cmd; // type of load command
const uint32_t cmdsize; // length of load command including this header
load_command(uint32_t cmdNumber, uint32_t sz, bool is64, bool align=false)
: cmd(cmdNumber), cmdsize(pointerAlign(sz, is64, align)) {
}
virtual ~load_command() {
}
virtual void copyTo(uint8_t *to, bool swap=false) = 0;
private:
// Load commands must be pointer-size aligned. Most load commands are
// a fixed size, so there is a runtime assert to check those. For variable
// length load commands, setting the align option to true will add padding
// at the end of the load command to round up its size for proper alignment.
uint32_t pointerAlign(uint32_t size, bool is64, bool align) {
if ( align ) {
if ( is64 )
return (size + 7) & (-8);
else
return (size + 3) & (-4);
}
else {
if ( is64 )
assert((size % 8) == 0);
else
assert((size % 4) == 0);
return size;
}
}
};
inline void mach_header::recordLoadCommand(const load_command *lc) {
++ncmds;
sizeofcmds += lc->cmdsize;
}
// Supported load command types
enum {
LC_SEGMENT = 0x00000001,
LC_SYMTAB = 0x00000002,
LC_UNIXTHREAD = 0x00000005,
LC_LOAD_DYLIB = 0x0000000C,
LC_LOAD_DYLINKER = 0x0000000E,
LC_SEGMENT_64 = 0x00000019,
LC_MAIN = 0x80000028,
LC_DYLD_INFO_ONLY = 0x80000022
};
// Memory protection bit used in segment_command.initprot
enum {
VM_PROT_NONE = 0x0,
VM_PROT_READ = 0x1,
VM_PROT_WRITE = 0x2,
VM_PROT_EXECUTE = 0x4,
};
// Bits for the section.flags field
enum {
// Section "type" is the low byte
SECTION_TYPE = 0x000000FF,
S_REGULAR = 0x00000000,
S_ZEROFILL = 0x00000001,
S_CSTRING_LITERALS = 0x00000002,
S_NON_LAZY_SYMBOL_POINTERS= 0x00000006,
S_LAZY_SYMBOL_POINTERS = 0x00000007,
S_SYMBOL_STUBS = 0x00000008,
// Other bits in section.flags
S_ATTR_PURE_INSTRUCTIONS = 0x80000000,
S_ATTR_SOME_INSTRUCTIONS = 0x00000400
};
// section record for 32-bit architectures
struct section {
char sectname[16];
char segname[16];
uint32_t addr;
uint32_t size;
uint32_t offset;
uint32_t align;
uint32_t reloff;
uint32_t nreloc;
uint32_t flags;
uint32_t reserved1;
uint32_t reserved2;
};
// section record for 64-bit architectures
struct section_64 {
char sectname[16];
char segname[16];
uint64_t addr;
uint64_t size;
uint32_t offset;
uint32_t align;
uint32_t reloff;
uint32_t nreloc;
uint32_t flags;
uint32_t reserved1;
uint32_t reserved2;
uint32_t reserved3;
};
//
// A segment load command has a fixed set of fields followed by an 'nsect'
// array of section records. The in-memory object uses a pointer to
// a dynamically allocated array of sections.
//
class segment_command : public load_command {
public:
char segname[16];
uint64_t vmaddr;
uint64_t vmsize;
uint64_t fileoff;
uint64_t filesize;
uint32_t maxprot;
uint32_t initprot;
uint32_t nsects;
uint32_t flags;
section_64 *sections;
segment_command(unsigned sectCount, bool is64)
: load_command((is64 ? LC_SEGMENT_64 : LC_SEGMENT),
(is64 ? (72 + sectCount*80) : (56 + sectCount*68)),
is64),
vmaddr(0), vmsize(0), fileoff(0), filesize(0),
maxprot(0), initprot(0), nsects(sectCount), flags(0) {
sections = new section_64[sectCount];
this->nsects = sectCount;
}
~segment_command() {
delete sections;
}
void copyTo(uint8_t *to, bool swap) {
if ( swap ) {
assert(0 && "non-native endianness not supported yet");
}
else {
if( is64() ) {
// in-memory matches on-disk, so copy segment fields followed by sections
::memcpy(to, (uint8_t*)&cmd, 72);
if ( nsects != 0 )
::memcpy(&to[72], sections, sizeof(section_64)*nsects);
}
else {
// on-disk is 32-bit struct, so copy each field
::memcpy(to, (uint8_t*)&cmd, 24);
copy32(to, 24, vmaddr);
copy32(to, 28, vmsize);
copy32(to, 32, fileoff);
copy32(to, 36, filesize);
copy32(to, 40, maxprot);
copy32(to, 44, initprot);
copy32(to, 48, nsects);
copy32(to, 52, flags);
for(uint32_t i=0; i < nsects; ++i) {
unsigned off = 56+i*68;
::memcpy(&to[off], sections[i].sectname, 32);
copy32(to, off+32, sections[i].addr);
copy32(to, off+36, sections[i].size);
copy32(to, off+40, sections[i].offset);
copy32(to, off+44, sections[i].align);
copy32(to, off+48, sections[i].reloff);
copy32(to, off+52, sections[i].nreloc);
copy32(to, off+56, sections[i].flags);
copy32(to, off+60, sections[i].reserved1);
copy32(to, off+64, sections[i].reserved2);
}
}
}
}
private:
void copy32(uint8_t *to, unsigned offset, uint64_t value) {
uint32_t value32 = value; // FIXME: range check
::memcpy(&to[offset], &value32, sizeof(uint32_t));
}
bool is64() {
return (cmd == LC_SEGMENT_64);
}
};
//
// The dylinker_command contains the path to the dynamic loader to use
// with the program (e.g. "/usr/lib/dyld"). So, it is variable length.
// But load commands must be pointer size aligned.
//
//
class dylinker_command : public load_command {
public:
uint32_t name_offset;
private:
StringRef _name;
public:
dylinker_command(StringRef path, bool is64)
: load_command(LC_LOAD_DYLINKER,12 + path.size(), is64, true),
name_offset(12), _name(path) {
}
virtual void copyTo(uint8_t *to, bool swap=false) {
if ( swap ) {
assert(0 && "non-native endianness not supported yet");
}
else {
// in-memory matches on-disk, so copy first fields followed by path
::memcpy(to, (uint8_t*)&cmd, 12);
::memcpy(&to[12], _name.data(), _name.size());
::memset(&to[12+_name.size()], 0, cmdsize-(12+_name.size()));
}
}
};
//
// The symtab_command just holds the offset to the array of nlist structs
// and the offsets to the string pool for all symbol names.
//
class symtab_command : public load_command {
public:
uint32_t symoff;
uint32_t nsyms;
uint32_t stroff;
uint32_t strsize;
symtab_command(bool is64)
: load_command(LC_SYMTAB, 24, is64),
symoff(0), nsyms(0), stroff(0), strsize(0) {
}
virtual void copyTo(uint8_t *to, bool swap=false) {
if ( swap ) {
assert(0 && "non-native endianness not supported yet");
}
else {
// in-memory matches on-disk, so copy fields
::memcpy(to, (uint8_t*)&cmd, 24);
}
}
};
//
// The entry_point_command load command holds the offset to the function
// _main in a dynamic executable.
//
class entry_point_command : public load_command {
public:
uint64_t entryoff;
uint64_t stacksize;
entry_point_command(bool is64)
: load_command(LC_MAIN, 24, is64), entryoff(0), stacksize(0) {
}
virtual void copyTo(uint8_t *to, bool swap=false) {
if ( swap ) {
assert(0 && "non-native endianness not supported yet");
}
else {
// in-memory matches on-disk, so copy fields
::memcpy(to, (uint8_t*)&cmd, 24);
}
}
};
//
// The thread_command load command holds the set of initial register values
// for a dynamic executable. In reality, only the PC and SP are used.
//
class thread_command : public load_command {
public:
uint32_t fields_flavor;
uint32_t fields_count;
private:
uint32_t _cpuType;
uint8_t *_registerArray;
public:
thread_command(uint32_t cpuType, bool is64)
: load_command(LC_UNIXTHREAD, 16+registersBufferSize(cpuType), is64),
fields_count(registersBufferSize(cpuType)/4), _cpuType(cpuType) {
switch ( cpuType ) {
case CPU_TYPE_I386:
fields_flavor = 1; // i386_THREAD_STATE
break;
case CPU_TYPE_X86_64:
fields_flavor = 4; // x86_THREAD_STATE64;
break;
case CPU_TYPE_ARM:
fields_flavor = 1; // ARM_THREAD_STATE
break;
default:
assert(0 && "unsupported cpu type");
}
_registerArray = reinterpret_cast<uint8_t*>(
::calloc(registersBufferSize(cpuType), 1));
assert(_registerArray);
}
virtual void copyTo(uint8_t *to, bool swap=false) {
if ( swap ) {
assert(0 && "non-native endianness not supported yet");
}
else {
// in-memory matches on-disk, so copy fixed fields
::memcpy(to, (uint8_t*)&cmd, 16);
// that register array
::memcpy(&to[16], _registerArray, registersBufferSize(_cpuType));
}
}
void setPC(uint64_t pc) {
uint32_t *regs32 = reinterpret_cast<uint32_t*>(_registerArray);
uint64_t *regs64 = reinterpret_cast<uint64_t*>(_registerArray);
switch ( _cpuType ) {
case CPU_TYPE_I386:
regs32[10] = pc;
break;
case CPU_TYPE_X86_64:
regs64[16] = pc;
break;
case CPU_TYPE_ARM:
regs32[15] = pc;
break;
default:
assert(0 && "unsupported cpu type");
}
}
virtual ~thread_command() {
::free(_registerArray);
}
private:
uint32_t registersBufferSize(uint32_t cpuType) {
switch ( cpuType ) {
case CPU_TYPE_I386:
return 64; // i386_THREAD_STATE_COUNT * 4
case CPU_TYPE_X86_64:
return 168; // x86_THREAD_STATE64_COUNT * 4
case CPU_TYPE_ARM:
return 68; // ARM_THREAD_STATE_COUNT * 4
}
assert(0 && "unsupported cpu type");
return 0;
}
};
//
// The dylib_command load command holds the name/path of a dynamic shared
// library which this mach-o image depends on.
//
struct dylib_command : public load_command {
uint32_t name_offset;
uint32_t timestamp;
uint32_t current_version;
uint32_t compatibility_version;
private:
StringRef _loadPath;
public:
dylib_command(StringRef path, bool is64)
: load_command(LC_LOAD_DYLIB, 24 + path.size(), is64, true),
name_offset(24), timestamp(0),
current_version(0x10000), compatibility_version(0x10000),
_loadPath(path) {
}
virtual void copyTo(uint8_t *to, bool swap=false) {
if ( swap ) {
assert(0 && "non-native endianness not supported yet");
}
else {
// in-memory matches on-disk, so copy first fields followed by path
::memcpy(to, (uint8_t*)&cmd, 24);
::memcpy(&to[24], _loadPath.data(), _loadPath.size());
::memset(&to[12+_loadPath.size()], 0, cmdsize-(12+_loadPath.size()));
}
}
};
//
// The dyld_info_command load command holds the offsets to various tables
// of information needed by dyld to prepare the image for execution.
//
struct dyld_info_command : public load_command {
uint32_t rebase_off;
uint32_t rebase_size;
uint32_t bind_off;
uint32_t bind_size;
uint32_t weak_bind_off;
uint32_t weak_bind_size;
uint32_t lazy_bind_off;
uint32_t lazy_bind_size;
uint32_t export_off;
uint32_t export_size;
dyld_info_command(bool is64)
: load_command(LC_DYLD_INFO_ONLY, 48, is64),
rebase_off(0), rebase_size(0),
bind_off(0), bind_size(0), weak_bind_off(0), weak_bind_size(0),
lazy_bind_off(0), lazy_bind_size(0), export_off(0), export_size(0) {
}
virtual void copyTo(uint8_t *to, bool swap=false) {
if ( swap ) {
assert(0 && "non-native endianness not supported yet");
}
else {
// in-memory matches on-disk, so copy fields
::memcpy(to, (uint8_t*)&cmd, 48);
}
}
};
enum {
BIND_TYPE_POINTER = 1,
BIND_TYPE_TEXT_ABSOLUTE32 = 2,
BIND_TYPE_TEXT_PCREL32 = 3
};
enum {
BIND_SPECIAL_DYLIB_SELF = 0,
BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1,
BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2
};
enum {
BIND_SYMBOL_FLAGS_WEAK_IMPORT = 0x1,
BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION = 0x8
};
enum {
BIND_OPCODE_MASK = 0xF0,
BIND_IMMEDIATE_MASK = 0x0F,
BIND_OPCODE_DONE = 0x00,
BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10,
BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20,
BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30,
BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40,
BIND_OPCODE_SET_TYPE_IMM = 0x50,
BIND_OPCODE_SET_ADDEND_SLEB = 0x60,
BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70,
BIND_OPCODE_ADD_ADDR_ULEB = 0x80,
BIND_OPCODE_DO_BIND = 0x90,
BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0,
BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0,
BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0
};
enum {
N_UNDF = 0x00,
N_EXT = 0x01,
N_PEXT = 0x10,
N_SECT = 0x0e
};
class nlist {
public:
uint32_t n_strx;
uint8_t n_type;
uint8_t n_sect;
uint16_t n_desc;
uint64_t n_value;
static unsigned size(bool is64) {
return (is64 ? 16 : 12);
}
void copyTo(uint8_t *to, bool is64, bool swap=false) {
if ( swap ) {
assert(0 && "non-native endianness not supported yet");
}
else {
if ( is64 ) {
// in-memory matches on-disk, so just copy whole struct
::memcpy(to, (uint8_t*)&n_strx, 16);
}
else {
// on-disk uses 32-bit n_value, so special case n_value
::memcpy(to, (uint8_t*)&n_strx, 8);
uint32_t value32 = n_value; // FIXME: range check
::memcpy(&to[8], &value32, sizeof(uint32_t));
}
}
}
};
} // namespace mach_o
} // namespace lld
#endif // LLD_READER_WRITER_MACHO_FORMAT_H_