llvm-project/lld/lib/ReaderWriter/MachO/MachOFormat.hpp

//===- lib/ReaderWriter/MachO/MachOFormat.hpp -----------------------------===//
//
//                             The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

//
// This file contains all the structs and constants needed to write a
// mach-o final linked image.  The names of the structs and constants
// are the same as in the darwin native header <mach-o/loader.h> so
// they will be familiar to anyone who has used that header.
//

#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Memory.h"

#ifndef LLD_READER_WRITER_MACHO_FORMAT_H_
#define LLD_READER_WRITER_MACHO_FORMAT_H_

namespace lld {
namespace mach_o {


enum {
  MH_MAGIC    = 0xfeedface,
  MH_MAGIC_64 = 0xfeedfacf
};

enum {
  CPU_TYPE_ARM  =   0x0000000C,
  CPU_TYPE_I386 =   0x00000007,
  CPU_TYPE_X86_64 = 0x01000007
};

enum {
  CPU_SUBTYPE_X86_ALL    = 0x00000003,
  CPU_SUBTYPE_X86_64_ALL = 0x00000003,
  CPU_SUBTYPE_ARM_V6     = 0x00000006,
  CPU_SUBTYPE_ARM_V7     = 0x00000009
};

enum {
  MH_OBJECT     = 0x1,
  MH_EXECUTE    = 0x2,
  MH_DYLIB      = 0x6,
  MH_DYLINKER   = 0x7,
  MH_BUNDLE     = 0x8,
  MH_DYLIB_STUB = 0x9,
  MH_KEXT_BUNDLE= 0xB
};


//
// Every mach-o file starts with this header.  The header size is
// 28 bytes for 32-bit architecures and 32-bytes for 64-bit architectures.
//
class mach_header {
public:
  uint32_t    magic;
  uint32_t    cputype;
  uint32_t    cpusubtype;
  uint32_t    filetype;
  uint32_t    ncmds;
  uint32_t    sizeofcmds;
  uint32_t    flags;
  uint32_t    reserved;

  uint64_t size() {
    return (magic == 0xfeedfacf) ? 32 : 28;
  }

  void copyTo(uint8_t *to, bool swap=false) {
    ::memcpy(to, (char*)&magic, this->size());
  }

  void recordLoadCommand(const class load_command *lc);
};


//
// Every mach-o file has a list of load commands after the mach_header.
// Each load command starts with a type and length, so you can iterate
// through the load commands even if you don't understand the content
// of a particular type.
//
// The model for handling endianness and 32 vs 64 bitness is that the in-memory
// object is always 64-bit and the native endianess.  The endianess swapping
// and pointer sizing is done when writing (copyTo method) or when reading
// (constructor that takes a buffer).
//
// The load_command subclasses are designed so to mirror the traditional "C"
// structs, so you can get and set the same field names (e.g. seg->vmaddr = 0).
//
class load_command {
public:
  const uint32_t  cmd;        // type of load command
  const uint32_t  cmdsize;    // length of load command including this header

  load_command(uint32_t cmdNumber, uint32_t sz, bool is64, bool align=false)
    : cmd(cmdNumber), cmdsize(pointerAlign(sz, is64, align)) {
  }

  virtual ~load_command() {
  }

  virtual void copyTo(uint8_t *to, bool swap=false) = 0;
private:
  // Load commands must be pointer-size aligned. Most load commands are
  // a fixed size, so there is a runtime assert to check those.  For variable
  // length load commands, setting the align option to true will add padding
  // at the end of the load command to round up its size for proper alignment.
  uint32_t pointerAlign(uint32_t size, bool is64, bool align) {
    if ( align ) {
       if ( is64 )
        return (size + 7) & (-8);
      else
        return (size + 3) & (-4);
    }
    else {
      if ( is64 )
        assert((size % 8) == 0);
      else
        assert((size % 4) == 0);
      return size;
    }
  }

};

inline void mach_header::recordLoadCommand(const load_command *lc) {
  ++ncmds;
  sizeofcmds += lc->cmdsize;
}

// Supported load command types
enum {
  LC_SEGMENT        = 0x00000001,
  LC_SYMTAB         = 0x00000002,
  LC_UNIXTHREAD     = 0x00000005,
  LC_LOAD_DYLIB     = 0x0000000C,
  LC_LOAD_DYLINKER  = 0x0000000E,
  LC_SEGMENT_64     = 0x00000019,
  LC_MAIN           = 0x80000028,
  LC_DYLD_INFO_ONLY = 0x80000022
};

// Memory protection bit used in segment_command.initprot
enum {
  VM_PROT_NONE    = 0x0,
  VM_PROT_READ    = 0x1,
  VM_PROT_WRITE   = 0x2,
  VM_PROT_EXECUTE = 0x4,
};

// Bits for the section.flags field
enum {
  // Section "type" is the low byte
  SECTION_TYPE              = 0x000000FF,
  S_REGULAR                 = 0x00000000,
  S_ZEROFILL                = 0x00000001,
  S_CSTRING_LITERALS        = 0x00000002,
  S_NON_LAZY_SYMBOL_POINTERS= 0x00000006,
  S_LAZY_SYMBOL_POINTERS    = 0x00000007,
  S_SYMBOL_STUBS            = 0x00000008,

  // Other bits in section.flags
  S_ATTR_PURE_INSTRUCTIONS  = 0x80000000,
  S_ATTR_SOME_INSTRUCTIONS  = 0x00000400
};


// section record for 32-bit architectures
struct section {
  char      sectname[16];
  char      segname[16];
  uint32_t  addr;
  uint32_t  size;
  uint32_t  offset;
  uint32_t  align;
  uint32_t  reloff;
  uint32_t  nreloc;
  uint32_t  flags;
  uint32_t  reserved1;
  uint32_t  reserved2;
};

// section record for 64-bit architectures
struct section_64 {
  char      sectname[16];
  char      segname[16];
  uint64_t  addr;
  uint64_t  size;
  uint32_t  offset;
  uint32_t  align;
  uint32_t  reloff;
  uint32_t  nreloc;
  uint32_t  flags;
  uint32_t  reserved1;
  uint32_t  reserved2;
  uint32_t  reserved3;
};


//
// A segment load command has a fixed set of fields followed by an 'nsect'
// array of section records.  The in-memory object uses a pointer to
// a dynamically allocated array of sections.
//
class segment_command : public load_command {
public:
  char      segname[16];
  uint64_t  vmaddr;
  uint64_t  vmsize;
  uint64_t  fileoff;
  uint64_t  filesize;
  uint32_t  maxprot;
  uint32_t  initprot;
  uint32_t  nsects;
  uint32_t  flags;
  section_64 *sections;

  segment_command(unsigned sectCount, bool is64)
    : load_command((is64 ? LC_SEGMENT_64 : LC_SEGMENT),
                   (is64 ? (72 + sectCount*80) : (56 + sectCount*68)),
                   is64),
     vmaddr(0), vmsize(0), fileoff(0), filesize(0),
      maxprot(0), initprot(0), nsects(sectCount), flags(0) {
    sections = new section_64[sectCount];
    this->nsects = sectCount;
  }

  ~segment_command() {
    delete sections;
  }

  void copyTo(uint8_t *to, bool swap) {
    if ( swap ) {
      assert(0 && "non-native endianness not supported yet");
    }
    else {
      if( is64() ) {
        // in-memory matches on-disk, so copy segment fields followed by sections
        ::memcpy(to, (uint8_t*)&cmd, 72);
        if ( nsects != 0 )
          ::memcpy(&to[72], sections, sizeof(section_64)*nsects);
      }
      else {
        // on-disk is 32-bit struct, so copy each field
        ::memcpy(to, (uint8_t*)&cmd, 24);
        copy32(to, 24, vmaddr);
        copy32(to, 28, vmsize);
        copy32(to, 32, fileoff);
        copy32(to, 36, filesize);
        copy32(to, 40, maxprot);
        copy32(to, 44, initprot);
        copy32(to, 48, nsects);
        copy32(to, 52, flags);
        for(uint32_t i=0; i < nsects; ++i) {
          unsigned off = 56+i*68;
          ::memcpy(&to[off], sections[i].sectname, 32);
          copy32(to, off+32, sections[i].addr);
          copy32(to, off+36, sections[i].size);
          copy32(to, off+40, sections[i].offset);
          copy32(to, off+44, sections[i].align);
          copy32(to, off+48, sections[i].reloff);
          copy32(to, off+52, sections[i].nreloc);
          copy32(to, off+56, sections[i].flags);
          copy32(to, off+60, sections[i].reserved1);
          copy32(to, off+64, sections[i].reserved2);
        }
      }
    }
  }

private:
  void copy32(uint8_t *to, unsigned offset, uint64_t value) {
    uint32_t value32 = value; // FIXME: range check
    ::memcpy(&to[offset], &value32, sizeof(uint32_t));
  }

  bool is64() {
    return (cmd == LC_SEGMENT_64);
  }
};


//
// The dylinker_command contains the path to the dynamic loader to use
// with the program (e.g. "/usr/lib/dyld"). So, it is variable length.
// But load commands must be pointer size aligned.
//
//
class dylinker_command : public load_command {
public:
  uint32_t  name_offset;
private:
  StringRef _name;
public:
  dylinker_command(StringRef path, bool is64)
    : load_command(LC_LOAD_DYLINKER,12 + path.size(), is64, true),
       name_offset(12), _name(path) {
  }

  virtual void copyTo(uint8_t *to, bool swap=false) {
    if ( swap ) {
      assert(0 && "non-native endianness not supported yet");
    }
    else {
      // in-memory matches on-disk, so copy first fields followed by path
      ::memcpy(to, (uint8_t*)&cmd, 12);
      ::memcpy(&to[12], _name.data(), _name.size());
      ::memset(&to[12+_name.size()], 0, cmdsize-(12+_name.size()));
    }
  }

};


//
// The symtab_command just holds the offset to the array of nlist structs
// and the offsets to the string pool for all symbol names.
//
class symtab_command : public load_command {
public:
  uint32_t  symoff;
  uint32_t  nsyms;
  uint32_t  stroff;
  uint32_t  strsize;

  symtab_command(bool is64)
    : load_command(LC_SYMTAB, 24, is64),
      symoff(0), nsyms(0), stroff(0), strsize(0) {
  }

  virtual void copyTo(uint8_t *to, bool swap=false) {
    if ( swap ) {
      assert(0 && "non-native endianness not supported yet");
    }
    else {
      // in-memory matches on-disk, so copy fields
      ::memcpy(to, (uint8_t*)&cmd, 24);
    }
  }

};


//
// The entry_point_command load command holds the offset to the function
// _main in a dynamic executable.
//
class entry_point_command : public load_command {
public:
  uint64_t  entryoff;
  uint64_t  stacksize;

  entry_point_command(bool is64)
    : load_command(LC_MAIN, 24, is64), entryoff(0), stacksize(0) {
  }

  virtual void copyTo(uint8_t *to, bool swap=false) {
    if ( swap ) {
      assert(0 && "non-native endianness not supported yet");
    }
    else {
      // in-memory matches on-disk, so copy fields
      ::memcpy(to, (uint8_t*)&cmd, 24);
    }
  }
};


//
// The thread_command load command holds the set of initial register values
// for a dynamic executable.  In reality, only the PC and SP are used.
//
class thread_command : public load_command {
public:
	uint32_t	fields_flavor;
	uint32_t	fields_count;
private:
  uint32_t   _cpuType;
  uint8_t   *_registerArray;

public:
  thread_command(uint32_t cpuType, bool is64)
    : load_command(LC_UNIXTHREAD, 16+registersBufferSize(cpuType), is64),
      fields_count(registersBufferSize(cpuType)/4), _cpuType(cpuType) {
    switch ( cpuType ) {
      case CPU_TYPE_I386:
        fields_flavor = 1;  // i386_THREAD_STATE
        break;
      case CPU_TYPE_X86_64:
        fields_flavor = 4;  // x86_THREAD_STATE64;
        break;
      case CPU_TYPE_ARM:
        fields_flavor = 1;  // ARM_THREAD_STATE
        break;
      default:
        assert(0 && "unsupported cpu type");
    }
    _registerArray = reinterpret_cast<uint8_t*>(
                                    ::calloc(registersBufferSize(cpuType), 1));
    assert(_registerArray);
  }

  virtual void copyTo(uint8_t *to, bool swap=false) {
    if ( swap ) {
      assert(0 && "non-native endianness not supported yet");
    }
    else {
      // in-memory matches on-disk, so copy fixed fields
      ::memcpy(to, (uint8_t*)&cmd, 16);
      // that register array
      ::memcpy(&to[16], _registerArray, registersBufferSize(_cpuType));
    }
  }

  void setPC(uint64_t pc) {
    uint32_t *regs32 = reinterpret_cast<uint32_t*>(_registerArray);
    uint64_t *regs64 = reinterpret_cast<uint64_t*>(_registerArray);
    switch ( _cpuType ) {
      case CPU_TYPE_I386:
        regs32[10] = pc;
        break;
      case CPU_TYPE_X86_64:
        regs64[16] = pc;
        break;
      case CPU_TYPE_ARM:
        regs32[15] = pc;
        break;
      default:
        assert(0 && "unsupported cpu type");
    }
  }

  virtual ~thread_command() {
    ::free(_registerArray);
  }

private:
  uint32_t registersBufferSize(uint32_t cpuType) {
    switch ( cpuType ) {
      case CPU_TYPE_I386:
        return 64;        // i386_THREAD_STATE_COUNT * 4
      case CPU_TYPE_X86_64:
        return 168;       // x86_THREAD_STATE64_COUNT * 4
      case CPU_TYPE_ARM:
        return 68;        // ARM_THREAD_STATE_COUNT * 4
    }
    assert(0 && "unsupported cpu type");
    return 0;
  }


};


//
// The dylib_command load command holds the name/path of a dynamic shared
// library which this mach-o image depends on.
//
struct dylib_command : public load_command {
  uint32_t  name_offset;
  uint32_t  timestamp;
  uint32_t  current_version;
  uint32_t  compatibility_version;
private:
  StringRef _loadPath;
public:

  dylib_command(StringRef path, bool is64)
    : load_command(LC_LOAD_DYLIB, 24 + path.size(), is64, true),
      name_offset(24), timestamp(0),
      current_version(0x10000), compatibility_version(0x10000),
      _loadPath(path) {
  }

  virtual void copyTo(uint8_t *to, bool swap=false) {
    if ( swap ) {
      assert(0 && "non-native endianness not supported yet");
    }
    else {
      // in-memory matches on-disk, so copy first fields followed by path
      ::memcpy(to, (uint8_t*)&cmd, 24);
      ::memcpy(&to[24], _loadPath.data(), _loadPath.size());
      ::memset(&to[12+_loadPath.size()], 0, cmdsize-(12+_loadPath.size()));
    }
  }

};


//
// The dyld_info_command load command holds the offsets to various tables
// of information needed by dyld to prepare the image for execution.
//
struct dyld_info_command : public load_command {
  uint32_t   rebase_off;
  uint32_t   rebase_size;
  uint32_t   bind_off;
  uint32_t   bind_size;
  uint32_t   weak_bind_off;
  uint32_t   weak_bind_size;
  uint32_t   lazy_bind_off;
  uint32_t   lazy_bind_size;
  uint32_t   export_off;
  uint32_t   export_size;

  dyld_info_command(bool is64)
    : load_command(LC_DYLD_INFO_ONLY, 48, is64),
        rebase_off(0), rebase_size(0),
        bind_off(0), bind_size(0), weak_bind_off(0), weak_bind_size(0),
        lazy_bind_off(0), lazy_bind_size(0), export_off(0), export_size(0) {
   }

  virtual void copyTo(uint8_t *to, bool swap=false) {
    if ( swap ) {
      assert(0 && "non-native endianness not supported yet");
    }
    else {
      // in-memory matches on-disk, so copy fields
      ::memcpy(to, (uint8_t*)&cmd, 48);
    }
  }
};


enum {
  BIND_TYPE_POINTER               = 1,
  BIND_TYPE_TEXT_ABSOLUTE32        = 2,
  BIND_TYPE_TEXT_PCREL32          = 3
};

enum {
  BIND_SPECIAL_DYLIB_SELF             = 0,
  BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE  = -1,
  BIND_SPECIAL_DYLIB_FLAT_LOOKUP      =  -2
};

enum {
  BIND_SYMBOL_FLAGS_WEAK_IMPORT           = 0x1,
  BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION    =  0x8
};

enum {
  BIND_OPCODE_MASK                              = 0xF0,
  BIND_IMMEDIATE_MASK                           = 0x0F,
  BIND_OPCODE_DONE                              = 0x00,
  BIND_OPCODE_SET_DYLIB_ORDINAL_IMM             = 0x10,
  BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB            = 0x20,
  BIND_OPCODE_SET_DYLIB_SPECIAL_IMM             = 0x30,
  BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM     = 0x40,
  BIND_OPCODE_SET_TYPE_IMM                      = 0x50,
  BIND_OPCODE_SET_ADDEND_SLEB                   = 0x60,
  BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB       = 0x70,
  BIND_OPCODE_ADD_ADDR_ULEB                     = 0x80,
  BIND_OPCODE_DO_BIND                           = 0x90,
  BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB             = 0xA0,
  BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED       = 0xB0,
  BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB  = 0xC0
};


enum {
  N_UNDF = 0x00,
  N_EXT  = 0x01,
  N_PEXT = 0x10,
  N_SECT = 0x0e
};

class nlist {
public:
  uint32_t  n_strx;
  uint8_t   n_type;
  uint8_t   n_sect;
  uint16_t  n_desc;
  uint64_t  n_value;

  static unsigned size(bool is64) {
    return (is64 ? 16 : 12);
  }

  void copyTo(uint8_t *to, bool is64, bool swap=false) {
    if ( swap ) {
      assert(0 && "non-native endianness not supported yet");
    }
    else {
      if ( is64 ) {
        // in-memory matches on-disk, so just copy whole struct
        ::memcpy(to, (uint8_t*)&n_strx, 16);
      }
      else {
        // on-disk uses 32-bit n_value, so special case n_value
        ::memcpy(to, (uint8_t*)&n_strx, 8);
        uint32_t value32 = n_value; // FIXME: range check
        ::memcpy(&to[8], &value32, sizeof(uint32_t));
      }
    }
  }
};


} // namespace mach_o
} // namespace lld


#endif // LLD_READER_WRITER_MACHO_FORMAT_H_