2013-11-07 05:36:55 +08:00
|
|
|
//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===//
|
|
|
|
//
|
|
|
|
// The LLVM Linker
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
///
|
|
|
|
/// \file For mach-o object files, this implementation converts from
|
|
|
|
/// mach-o on-disk binary format to in-memory normalized mach-o.
|
|
|
|
///
|
|
|
|
/// +---------------+
|
|
|
|
/// | binary mach-o |
|
|
|
|
/// +---------------+
|
|
|
|
/// |
|
|
|
|
/// |
|
|
|
|
/// v
|
|
|
|
/// +------------+
|
|
|
|
/// | normalized |
|
|
|
|
/// +------------+
|
|
|
|
|
|
|
|
#include "MachONormalizedFile.h"
|
2014-07-17 03:49:02 +08:00
|
|
|
#include "ArchHandler.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "MachONormalizedFileBinaryUtils.h"
|
|
|
|
#include "lld/Core/Error.h"
|
|
|
|
#include "lld/Core/LLVM.h"
|
2014-08-14 07:55:41 +08:00
|
|
|
#include "lld/Core/SharedLibraryFile.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "llvm/ADT/SmallString.h"
|
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/ADT/StringSwitch.h"
|
|
|
|
#include "llvm/ADT/Twine.h"
|
2014-09-04 03:52:50 +08:00
|
|
|
#include "llvm/Object/MachO.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "llvm/Support/Casting.h"
|
2014-06-14 01:20:48 +08:00
|
|
|
#include "llvm/Support/Errc.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#include "llvm/Support/FileOutputBuffer.h"
|
|
|
|
#include "llvm/Support/Host.h"
|
|
|
|
#include "llvm/Support/MachO.h"
|
|
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <functional>
|
2014-06-13 01:15:58 +08:00
|
|
|
#include <system_error>
|
2013-11-07 05:36:55 +08:00
|
|
|
|
|
|
|
using namespace llvm::MachO;
|
2014-09-04 03:52:50 +08:00
|
|
|
using llvm::object::ExportEntry;
|
|
|
|
using llvm::object::MachOObjectFile;
|
2013-11-07 05:36:55 +08:00
|
|
|
|
|
|
|
namespace lld {
|
|
|
|
namespace mach_o {
|
|
|
|
namespace normalized {
|
|
|
|
|
|
|
|
// Utility to call a lambda expression on each load command.
|
2014-06-12 22:53:47 +08:00
|
|
|
static std::error_code forEachLoadCommand(
|
|
|
|
StringRef lcRange, unsigned lcCount, bool swap, bool is64,
|
|
|
|
std::function<bool(uint32_t cmd, uint32_t size, const char *lc)> func) {
|
2013-11-07 05:36:55 +08:00
|
|
|
const char* p = lcRange.begin();
|
|
|
|
for (unsigned i=0; i < lcCount; ++i) {
|
|
|
|
const load_command *lc = reinterpret_cast<const load_command*>(p);
|
|
|
|
load_command lcCopy;
|
|
|
|
const load_command *slc = lc;
|
|
|
|
if (swap) {
|
|
|
|
memcpy(&lcCopy, lc, sizeof(load_command));
|
|
|
|
swapStruct(lcCopy);
|
|
|
|
slc = &lcCopy;
|
|
|
|
}
|
|
|
|
if ( (p + slc->cmdsize) > lcRange.end() )
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-01-27 11:09:26 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
if (func(slc->cmd, slc->cmdsize, p))
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2014-01-27 11:09:26 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
p += slc->cmdsize;
|
2014-01-27 11:09:26 +08:00
|
|
|
}
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2013-11-07 05:36:55 +08:00
|
|
|
}
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
static std::error_code appendRelocations(Relocations &relocs, StringRef buffer,
|
|
|
|
bool swap, bool bigEndian,
|
|
|
|
uint32_t reloff, uint32_t nreloc) {
|
2013-11-07 05:36:55 +08:00
|
|
|
if ((reloff + nreloc*8) > buffer.size())
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-01-27 11:09:26 +08:00
|
|
|
const any_relocation_info* relocsArray =
|
2014-01-15 06:32:38 +08:00
|
|
|
reinterpret_cast<const any_relocation_info*>(buffer.begin()+reloff);
|
2014-01-27 11:09:26 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
for(uint32_t i=0; i < nreloc; ++i) {
|
|
|
|
relocs.push_back(unpackRelocation(relocsArray[i], swap, bigEndian));
|
|
|
|
}
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2013-11-07 05:36:55 +08:00
|
|
|
}
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
static std::error_code
|
2014-05-28 09:16:35 +08:00
|
|
|
appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool swap,
|
|
|
|
bool bigEndian, uint32_t istOffset, uint32_t istCount,
|
|
|
|
uint32_t startIndex, uint32_t count) {
|
|
|
|
if ((istOffset + istCount*4) > buffer.size())
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-05-28 09:16:35 +08:00
|
|
|
if (startIndex+count > istCount)
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-05-28 09:16:35 +08:00
|
|
|
const uint32_t *indirectSymbolArray =
|
|
|
|
reinterpret_cast<const uint32_t*>(buffer.begin()+istOffset);
|
|
|
|
|
|
|
|
for(uint32_t i=0; i < count; ++i) {
|
|
|
|
isyms.push_back(read32(swap, indirectSymbolArray[startIndex+i]));
|
|
|
|
}
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2014-05-28 09:16:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-01-15 06:32:38 +08:00
|
|
|
template <typename T> static T readBigEndian(T t) {
|
|
|
|
if (llvm::sys::IsLittleEndianHost)
|
2014-06-14 21:26:14 +08:00
|
|
|
llvm::sys::swapByteOrder(t);
|
2014-01-15 06:32:38 +08:00
|
|
|
return t;
|
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
|
2014-09-05 04:08:30 +08:00
|
|
|
|
|
|
|
static bool isMachOHeader(const mach_header *mh, bool &is64, bool &swap) {
|
|
|
|
switch (mh->magic) {
|
|
|
|
case llvm::MachO::MH_MAGIC:
|
|
|
|
is64 = false;
|
|
|
|
swap = false;
|
|
|
|
return true;
|
|
|
|
case llvm::MachO::MH_MAGIC_64:
|
|
|
|
is64 = true;
|
|
|
|
swap = false;
|
|
|
|
return true;
|
|
|
|
case llvm::MachO::MH_CIGAM:
|
|
|
|
is64 = false;
|
|
|
|
swap = true;
|
|
|
|
return true;
|
|
|
|
case llvm::MachO::MH_CIGAM_64:
|
|
|
|
is64 = true;
|
|
|
|
swap = true;
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch) {
|
|
|
|
// Try opening and mapping file at path.
|
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> b = MemoryBuffer::getFileOrSTDIN(path);
|
|
|
|
if (b.getError())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// If file length < 32 it is too small to be mach-o object file.
|
|
|
|
StringRef fileBuffer = b->get()->getBuffer();
|
|
|
|
if (fileBuffer.size() < 32)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// If file buffer does not start with MH_MAGIC (and variants), not obj file.
|
|
|
|
const mach_header *mh = reinterpret_cast<const mach_header *>(
|
|
|
|
fileBuffer.begin());
|
|
|
|
bool is64, swap;
|
|
|
|
if (!isMachOHeader(mh, is64, swap))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// If not MH_OBJECT, not object file.
|
|
|
|
if (read32(swap, mh->filetype) != MH_OBJECT)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Lookup up arch from cpu/subtype pair.
|
|
|
|
arch = MachOLinkingContext::archFromCpuType(read32(swap, mh->cputype),
|
|
|
|
read32(swap, mh->cpusubtype));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-10-08 09:48:10 +08:00
|
|
|
|
|
|
|
bool sliceFromFatFile(const MemoryBuffer &mb, MachOLinkingContext::Arch arch,
|
|
|
|
uint32_t &offset, uint32_t &size) {
|
|
|
|
const char *start = mb.getBufferStart();
|
|
|
|
const llvm::MachO::fat_header *fh =
|
|
|
|
reinterpret_cast<const llvm::MachO::fat_header *>(start);
|
|
|
|
if (readBigEndian(fh->magic) != llvm::MachO::FAT_MAGIC)
|
|
|
|
return false;
|
|
|
|
uint32_t nfat_arch = readBigEndian(fh->nfat_arch);
|
|
|
|
const fat_arch *fstart =
|
|
|
|
reinterpret_cast<const fat_arch *>(start + sizeof(fat_header));
|
|
|
|
const fat_arch *fend =
|
|
|
|
reinterpret_cast<const fat_arch *>(start + sizeof(fat_header) +
|
|
|
|
sizeof(fat_arch) * nfat_arch);
|
|
|
|
const uint32_t reqCpuType = MachOLinkingContext::cpuTypeFromArch(arch);
|
|
|
|
const uint32_t reqCpuSubtype = MachOLinkingContext::cpuSubtypeFromArch(arch);
|
|
|
|
for (const fat_arch *fa = fstart; fa < fend; ++fa) {
|
|
|
|
if ((readBigEndian(fa->cputype) == reqCpuType) &&
|
|
|
|
(readBigEndian(fa->cpusubtype) == reqCpuSubtype)) {
|
|
|
|
offset = readBigEndian(fa->offset);
|
|
|
|
size = readBigEndian(fa->size);
|
|
|
|
if ((offset + size) > mb.getBufferSize())
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
/// Reads a mach-o file and produces an in-memory normalized view.
|
2014-01-15 06:32:38 +08:00
|
|
|
ErrorOr<std::unique_ptr<NormalizedFile>>
|
|
|
|
readBinary(std::unique_ptr<MemoryBuffer> &mb,
|
|
|
|
const MachOLinkingContext::Arch arch) {
|
2013-11-07 05:36:55 +08:00
|
|
|
// Make empty NormalizedFile.
|
|
|
|
std::unique_ptr<NormalizedFile> f(new NormalizedFile());
|
|
|
|
|
2014-01-15 06:32:38 +08:00
|
|
|
const char *start = mb->getBufferStart();
|
|
|
|
size_t objSize = mb->getBufferSize();
|
|
|
|
const mach_header *mh = reinterpret_cast<const mach_header *>(start);
|
2014-10-08 09:48:10 +08:00
|
|
|
|
|
|
|
uint32_t sliceOffset;
|
|
|
|
uint32_t sliceSize;
|
|
|
|
if (sliceFromFatFile(*mb, arch, sliceOffset, sliceSize)) {
|
|
|
|
start = &start[sliceOffset];
|
|
|
|
objSize = sliceSize;
|
2014-01-15 06:32:38 +08:00
|
|
|
mh = reinterpret_cast<const mach_header *>(start);
|
|
|
|
}
|
|
|
|
|
2014-10-08 09:48:10 +08:00
|
|
|
// Determine endianness and pointer size for mach-o file.
|
2013-11-07 05:36:55 +08:00
|
|
|
bool is64, swap;
|
2014-09-05 04:08:30 +08:00
|
|
|
if (!isMachOHeader(mh, is64, swap))
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2013-11-07 05:36:55 +08:00
|
|
|
|
|
|
|
// Endian swap header, if needed.
|
|
|
|
mach_header headerCopy;
|
|
|
|
const mach_header *smh = mh;
|
|
|
|
if (swap) {
|
|
|
|
memcpy(&headerCopy, mh, sizeof(mach_header));
|
|
|
|
swapStruct(headerCopy);
|
|
|
|
smh = &headerCopy;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Validate head and load commands fit in buffer.
|
|
|
|
const uint32_t lcCount = smh->ncmds;
|
2014-01-15 06:32:38 +08:00
|
|
|
const char *lcStart =
|
|
|
|
start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header));
|
2013-11-07 05:36:55 +08:00
|
|
|
StringRef lcRange(lcStart, smh->sizeofcmds);
|
2014-01-15 06:32:38 +08:00
|
|
|
if (lcRange.end() > (start + objSize))
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2013-11-07 05:36:55 +08:00
|
|
|
|
2014-07-01 06:57:33 +08:00
|
|
|
// Get architecture from mach_header.
|
2013-11-07 05:36:55 +08:00
|
|
|
f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype);
|
2014-07-01 06:57:33 +08:00
|
|
|
if (f->arch != arch) {
|
|
|
|
return make_dynamic_error_code(Twine("file is wrong architecture. Expected "
|
|
|
|
"(" + MachOLinkingContext::nameFromArch(arch)
|
|
|
|
+ ") found ("
|
|
|
|
+ MachOLinkingContext::nameFromArch(f->arch)
|
|
|
|
+ ")" ));
|
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
bool isBigEndianArch = MachOLinkingContext::isBigEndian(f->arch);
|
|
|
|
// Copy file type and flags
|
|
|
|
f->fileType = HeaderFileType(smh->filetype);
|
|
|
|
f->flags = smh->flags;
|
|
|
|
|
|
|
|
|
2014-05-28 09:16:35 +08:00
|
|
|
// Pre-scan load commands looking for indirect symbol table.
|
|
|
|
uint32_t indirectSymbolTableOffset = 0;
|
|
|
|
uint32_t indirectSymbolTableCount = 0;
|
2014-06-12 22:53:47 +08:00
|
|
|
std::error_code ec = forEachLoadCommand(lcRange, lcCount, swap, is64,
|
|
|
|
[&](uint32_t cmd, uint32_t size,
|
|
|
|
const char *lc) -> bool {
|
2014-05-28 09:16:35 +08:00
|
|
|
if (cmd == LC_DYSYMTAB) {
|
|
|
|
const dysymtab_command *d = reinterpret_cast<const dysymtab_command*>(lc);
|
|
|
|
indirectSymbolTableOffset = read32(swap, d->indirectsymoff);
|
|
|
|
indirectSymbolTableCount = read32(swap, d->nindirectsyms);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
});
|
|
|
|
if (ec)
|
|
|
|
return ec;
|
|
|
|
|
|
|
|
// Walk load commands looking for segments/sections and the symbol table.
|
2014-07-25 07:06:56 +08:00
|
|
|
const data_in_code_entry *dataInCode = nullptr;
|
2014-09-04 03:52:50 +08:00
|
|
|
const dyld_info_command *dyldInfo = nullptr;
|
2014-07-25 07:06:56 +08:00
|
|
|
uint32_t dataInCodeSize = 0;
|
2014-05-28 09:16:35 +08:00
|
|
|
ec = forEachLoadCommand(lcRange, lcCount, swap, is64,
|
|
|
|
[&] (uint32_t cmd, uint32_t size, const char* lc) -> bool {
|
2014-08-14 07:55:41 +08:00
|
|
|
switch(cmd) {
|
|
|
|
case LC_SEGMENT_64:
|
|
|
|
if (is64) {
|
2014-01-27 11:09:26 +08:00
|
|
|
const segment_command_64 *seg =
|
2013-11-07 05:36:55 +08:00
|
|
|
reinterpret_cast<const segment_command_64*>(lc);
|
2014-06-14 21:26:14 +08:00
|
|
|
const unsigned sectionCount = (swap
|
|
|
|
? llvm::sys::getSwappedBytes(seg->nsects)
|
|
|
|
: seg->nsects);
|
2013-11-07 05:36:55 +08:00
|
|
|
const section_64 *sects = reinterpret_cast<const section_64*>
|
|
|
|
(lc + sizeof(segment_command_64));
|
2014-01-27 11:09:26 +08:00
|
|
|
const unsigned lcSize = sizeof(segment_command_64)
|
2013-11-07 05:36:55 +08:00
|
|
|
+ sectionCount*sizeof(section_64);
|
|
|
|
// Verify sections don't extend beyond end of segment load command.
|
2014-01-27 11:09:26 +08:00
|
|
|
if (lcSize > size)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
for (unsigned i=0; i < sectionCount; ++i) {
|
|
|
|
const section_64 *sect = §s[i];
|
|
|
|
Section section;
|
|
|
|
section.segmentName = getString16(sect->segname);
|
|
|
|
section.sectionName = getString16(sect->sectname);
|
2014-01-27 11:09:26 +08:00
|
|
|
section.type = (SectionType)(read32(swap, sect->flags)
|
2013-11-07 05:36:55 +08:00
|
|
|
& SECTION_TYPE);
|
|
|
|
section.attributes = read32(swap, sect->flags) & SECTION_ATTRIBUTES;
|
|
|
|
section.alignment = read32(swap, sect->align);
|
|
|
|
section.address = read64(swap, sect->addr);
|
2014-01-15 06:32:38 +08:00
|
|
|
const uint8_t *content =
|
|
|
|
(uint8_t *)start + read32(swap, sect->offset);
|
2013-11-07 05:36:55 +08:00
|
|
|
size_t contentSize = read64(swap, sect->size);
|
|
|
|
// Note: this assign() is copying the content bytes. Ideally,
|
|
|
|
// we can use a custom allocator for vector to avoid the copy.
|
2014-01-11 09:07:43 +08:00
|
|
|
section.content = llvm::makeArrayRef(content, contentSize);
|
2014-01-27 11:09:26 +08:00
|
|
|
appendRelocations(section.relocations, mb->getBuffer(),
|
|
|
|
swap, isBigEndianArch, read32(swap, sect->reloff),
|
2013-11-07 05:36:55 +08:00
|
|
|
read32(swap, sect->nreloc));
|
2014-05-28 09:16:35 +08:00
|
|
|
if (section.type == S_NON_LAZY_SYMBOL_POINTERS) {
|
|
|
|
appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(),
|
|
|
|
swap, isBigEndianArch,
|
|
|
|
indirectSymbolTableOffset,
|
|
|
|
indirectSymbolTableCount,
|
|
|
|
read32(swap, sect->reserved1), contentSize/4);
|
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
f->sections.push_back(section);
|
|
|
|
}
|
|
|
|
}
|
2014-08-14 07:55:41 +08:00
|
|
|
break;
|
|
|
|
case LC_SEGMENT:
|
|
|
|
if (!is64) {
|
2014-01-27 11:09:26 +08:00
|
|
|
const segment_command *seg =
|
2013-11-07 05:36:55 +08:00
|
|
|
reinterpret_cast<const segment_command*>(lc);
|
2014-06-14 21:26:14 +08:00
|
|
|
const unsigned sectionCount = (swap
|
|
|
|
? llvm::sys::getSwappedBytes(seg->nsects)
|
|
|
|
: seg->nsects);
|
2013-11-07 05:36:55 +08:00
|
|
|
const section *sects = reinterpret_cast<const section*>
|
|
|
|
(lc + sizeof(segment_command));
|
2014-01-27 11:09:26 +08:00
|
|
|
const unsigned lcSize = sizeof(segment_command)
|
2013-11-07 05:36:55 +08:00
|
|
|
+ sectionCount*sizeof(section);
|
|
|
|
// Verify sections don't extend beyond end of segment load command.
|
2014-01-27 11:09:26 +08:00
|
|
|
if (lcSize > size)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
for (unsigned i=0; i < sectionCount; ++i) {
|
|
|
|
const section *sect = §s[i];
|
|
|
|
Section section;
|
|
|
|
section.segmentName = getString16(sect->segname);
|
|
|
|
section.sectionName = getString16(sect->sectname);
|
2014-01-27 11:09:26 +08:00
|
|
|
section.type = (SectionType)(read32(swap, sect->flags)
|
2013-11-07 05:36:55 +08:00
|
|
|
& SECTION_TYPE);
|
|
|
|
section.attributes = read32(swap, sect->flags) & SECTION_ATTRIBUTES;
|
|
|
|
section.alignment = read32(swap, sect->align);
|
|
|
|
section.address = read32(swap, sect->addr);
|
2014-01-15 06:32:38 +08:00
|
|
|
const uint8_t *content =
|
|
|
|
(uint8_t *)start + read32(swap, sect->offset);
|
2013-11-07 05:36:55 +08:00
|
|
|
size_t contentSize = read32(swap, sect->size);
|
|
|
|
// Note: this assign() is copying the content bytes. Ideally,
|
|
|
|
// we can use a custom allocator for vector to avoid the copy.
|
2014-01-11 09:07:43 +08:00
|
|
|
section.content = llvm::makeArrayRef(content, contentSize);
|
2014-01-27 11:09:26 +08:00
|
|
|
appendRelocations(section.relocations, mb->getBuffer(),
|
|
|
|
swap, isBigEndianArch, read32(swap, sect->reloff),
|
2013-11-07 05:36:55 +08:00
|
|
|
read32(swap, sect->nreloc));
|
2014-05-28 09:16:35 +08:00
|
|
|
if (section.type == S_NON_LAZY_SYMBOL_POINTERS) {
|
|
|
|
appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(),
|
|
|
|
swap, isBigEndianArch,
|
|
|
|
indirectSymbolTableOffset,
|
|
|
|
indirectSymbolTableCount,
|
|
|
|
read32(swap, sect->reserved1), contentSize/4);
|
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
f->sections.push_back(section);
|
|
|
|
}
|
|
|
|
}
|
2014-08-14 07:55:41 +08:00
|
|
|
break;
|
|
|
|
case LC_SYMTAB: {
|
2013-11-07 05:36:55 +08:00
|
|
|
const symtab_command *st = reinterpret_cast<const symtab_command*>(lc);
|
2014-01-15 06:32:38 +08:00
|
|
|
const char *strings = start + read32(swap, st->stroff);
|
2013-11-07 05:36:55 +08:00
|
|
|
const uint32_t strSize = read32(swap, st->strsize);
|
|
|
|
// Validate string pool and symbol table all in buffer.
|
2014-01-27 11:09:26 +08:00
|
|
|
if ( read32(swap, st->stroff)+read32(swap, st->strsize)
|
2014-01-15 06:32:38 +08:00
|
|
|
> objSize )
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
if (is64) {
|
|
|
|
const uint32_t symOffset = read32(swap, st->symoff);
|
|
|
|
const uint32_t symCount = read32(swap, st->nsyms);
|
2014-01-15 06:32:38 +08:00
|
|
|
if ( symOffset+(symCount*sizeof(nlist_64)) > objSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2014-01-15 06:32:38 +08:00
|
|
|
const nlist_64 *symbols =
|
|
|
|
reinterpret_cast<const nlist_64 *>(start + symOffset);
|
2013-11-07 05:36:55 +08:00
|
|
|
// Convert each nlist_64 to a lld::mach_o::normalized::Symbol.
|
|
|
|
for(uint32_t i=0; i < symCount; ++i) {
|
|
|
|
const nlist_64 *sin = &symbols[i];
|
|
|
|
nlist_64 tempSym;
|
|
|
|
if (swap) {
|
|
|
|
tempSym = *sin; swapStruct(tempSym); sin = &tempSym;
|
|
|
|
}
|
|
|
|
Symbol sout;
|
2014-01-27 11:09:26 +08:00
|
|
|
if (sin->n_strx > strSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
sout.name = &strings[sin->n_strx];
|
|
|
|
sout.type = (NListType)(sin->n_type & N_TYPE);
|
|
|
|
sout.scope = (sin->n_type & (N_PEXT|N_EXT));
|
|
|
|
sout.sect = sin->n_sect;
|
|
|
|
sout.desc = sin->n_desc;
|
|
|
|
sout.value = sin->n_value;
|
|
|
|
if (sout.type == N_UNDF)
|
|
|
|
f->undefinedSymbols.push_back(sout);
|
2014-06-28 02:25:01 +08:00
|
|
|
else if (sin->n_type & N_EXT)
|
2013-11-07 05:36:55 +08:00
|
|
|
f->globalSymbols.push_back(sout);
|
|
|
|
else
|
|
|
|
f->localSymbols.push_back(sout);
|
|
|
|
}
|
2014-01-27 11:09:26 +08:00
|
|
|
} else {
|
2013-11-07 05:36:55 +08:00
|
|
|
const uint32_t symOffset = read32(swap, st->symoff);
|
|
|
|
const uint32_t symCount = read32(swap, st->nsyms);
|
2014-01-15 06:32:38 +08:00
|
|
|
if ( symOffset+(symCount*sizeof(nlist)) > objSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2014-01-15 06:32:38 +08:00
|
|
|
const nlist *symbols =
|
|
|
|
reinterpret_cast<const nlist *>(start + symOffset);
|
2013-11-07 05:36:55 +08:00
|
|
|
// Convert each nlist to a lld::mach_o::normalized::Symbol.
|
|
|
|
for(uint32_t i=0; i < symCount; ++i) {
|
|
|
|
const nlist *sin = &symbols[i];
|
|
|
|
nlist tempSym;
|
|
|
|
if (swap) {
|
|
|
|
tempSym = *sin; swapStruct(tempSym); sin = &tempSym;
|
|
|
|
}
|
|
|
|
Symbol sout;
|
2014-01-27 11:09:26 +08:00
|
|
|
if (sin->n_strx > strSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
sout.name = &strings[sin->n_strx];
|
|
|
|
sout.type = (NListType)(sin->n_type & N_TYPE);
|
|
|
|
sout.scope = (sin->n_type & (N_PEXT|N_EXT));
|
|
|
|
sout.sect = sin->n_sect;
|
|
|
|
sout.desc = sin->n_desc;
|
|
|
|
sout.value = sin->n_value;
|
|
|
|
if (sout.type == N_UNDF)
|
|
|
|
f->undefinedSymbols.push_back(sout);
|
|
|
|
else if (sout.scope == (SymbolScope)N_EXT)
|
|
|
|
f->globalSymbols.push_back(sout);
|
|
|
|
else
|
|
|
|
f->localSymbols.push_back(sout);
|
|
|
|
}
|
|
|
|
}
|
2014-08-14 07:55:41 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case LC_ID_DYLIB: {
|
2014-07-01 16:15:41 +08:00
|
|
|
const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc);
|
2014-07-25 07:06:56 +08:00
|
|
|
f->installName = lc + read32(swap, dl->dylib.name);
|
2014-08-14 07:55:41 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case LC_DATA_IN_CODE: {
|
2014-07-25 07:06:56 +08:00
|
|
|
const linkedit_data_command *ldc =
|
|
|
|
reinterpret_cast<const linkedit_data_command*>(lc);
|
|
|
|
dataInCode = reinterpret_cast<const data_in_code_entry*>(
|
|
|
|
start + read32(swap, ldc->dataoff));
|
|
|
|
dataInCodeSize = read32(swap, ldc->datasize);
|
2014-08-14 07:55:41 +08:00
|
|
|
}
|
2014-09-04 03:52:50 +08:00
|
|
|
break;
|
2014-08-14 07:55:41 +08:00
|
|
|
case LC_LOAD_DYLIB:
|
|
|
|
case LC_LOAD_WEAK_DYLIB:
|
|
|
|
case LC_REEXPORT_DYLIB:
|
|
|
|
case LC_LOAD_UPWARD_DYLIB: {
|
|
|
|
const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc);
|
|
|
|
DependentDylib entry;
|
|
|
|
entry.path = lc + read32(swap, dl->dylib.name);
|
|
|
|
entry.kind = LoadCommandType(cmd);
|
|
|
|
f->dependentDylibs.push_back(entry);
|
|
|
|
}
|
|
|
|
break;
|
2014-09-04 03:52:50 +08:00
|
|
|
case LC_DYLD_INFO:
|
|
|
|
case LC_DYLD_INFO_ONLY:
|
|
|
|
dyldInfo = reinterpret_cast<const dyld_info_command*>(lc);
|
|
|
|
break;
|
2014-07-01 16:15:41 +08:00
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
return false;
|
|
|
|
});
|
2014-01-27 11:09:26 +08:00
|
|
|
if (ec)
|
2013-11-07 05:36:55 +08:00
|
|
|
return ec;
|
|
|
|
|
2014-07-25 07:06:56 +08:00
|
|
|
if (dataInCode) {
|
|
|
|
// Convert on-disk data_in_code_entry array to DataInCode vector.
|
|
|
|
for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) {
|
|
|
|
DataInCode entry;
|
|
|
|
entry.offset = read32(swap, dataInCode[i].offset);
|
|
|
|
entry.length = read16(swap, dataInCode[i].length);
|
|
|
|
entry.kind = (DataRegionType)read16(swap, dataInCode[i].kind);
|
|
|
|
f->dataInCode.push_back(entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-04 03:52:50 +08:00
|
|
|
if (dyldInfo) {
|
|
|
|
// If any exports, extract and add to normalized exportInfo vector.
|
|
|
|
if (dyldInfo->export_size) {
|
|
|
|
const uint8_t *trieStart = reinterpret_cast<const uint8_t*>(start +
|
|
|
|
dyldInfo->export_off);
|
|
|
|
ArrayRef<uint8_t> trie(trieStart, dyldInfo->export_size);
|
|
|
|
for (const ExportEntry &trieExport : MachOObjectFile::exports(trie)) {
|
|
|
|
Export normExport;
|
|
|
|
normExport.name = trieExport.name().copy(f->ownedAllocations);
|
|
|
|
normExport.offset = trieExport.address();
|
|
|
|
normExport.kind = ExportSymbolKind(trieExport.flags() & EXPORT_SYMBOL_FLAGS_KIND_MASK);
|
|
|
|
normExport.flags = trieExport.flags() & ~EXPORT_SYMBOL_FLAGS_KIND_MASK;
|
|
|
|
normExport.otherOffset = trieExport.other();
|
|
|
|
if (!trieExport.otherName().empty())
|
|
|
|
normExport.otherName = trieExport.otherName().copy(f->ownedAllocations);
|
|
|
|
f->exportInfo.push_back(normExport);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
return std::move(f);
|
|
|
|
}
|
|
|
|
|
2014-05-30 09:13:49 +08:00
|
|
|
class MachOReader : public Reader {
|
|
|
|
public:
|
2014-08-14 07:55:41 +08:00
|
|
|
MachOReader(MachOLinkingContext &ctx) : _ctx(ctx) {}
|
2014-05-30 09:13:49 +08:00
|
|
|
|
|
|
|
bool canParse(file_magic magic, StringRef ext,
|
|
|
|
const MemoryBuffer &mb) const override {
|
2014-10-08 09:48:10 +08:00
|
|
|
switch (magic) {
|
|
|
|
case llvm::sys::fs::file_magic::macho_object:
|
|
|
|
case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib:
|
|
|
|
case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
|
|
|
|
return (mb.getBufferSize() > 32);
|
|
|
|
default:
|
2014-05-30 09:13:49 +08:00
|
|
|
return false;
|
2014-10-08 09:48:10 +08:00
|
|
|
}
|
2014-05-30 09:13:49 +08:00
|
|
|
}
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
std::error_code
|
2014-05-30 09:13:49 +08:00
|
|
|
parseFile(std::unique_ptr<MemoryBuffer> &mb, const Registry ®istry,
|
2014-06-12 22:53:47 +08:00
|
|
|
std::vector<std::unique_ptr<File>> &result) const override {
|
2014-05-30 09:13:49 +08:00
|
|
|
// Convert binary file to normalized mach-o.
|
2014-08-14 07:55:41 +08:00
|
|
|
auto normFile = readBinary(mb, _ctx.arch());
|
2014-06-12 22:53:47 +08:00
|
|
|
if (std::error_code ec = normFile.getError())
|
2014-05-30 09:13:49 +08:00
|
|
|
return ec;
|
|
|
|
// Convert normalized mach-o to atoms.
|
|
|
|
auto file = normalizedToAtoms(**normFile, mb->getBufferIdentifier(), false);
|
2014-06-12 22:53:47 +08:00
|
|
|
if (std::error_code ec = file.getError())
|
2014-05-30 09:13:49 +08:00
|
|
|
return ec;
|
|
|
|
|
|
|
|
result.push_back(std::move(*file));
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2014-05-30 09:13:49 +08:00
|
|
|
}
|
|
|
|
private:
|
2014-08-14 07:55:41 +08:00
|
|
|
MachOLinkingContext &_ctx;
|
2014-05-30 09:13:49 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
} // namespace normalized
|
|
|
|
} // namespace mach_o
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
|
2014-08-14 07:55:41 +08:00
|
|
|
void Registry::addSupportMachOObjects(MachOLinkingContext &ctx) {
|
2014-07-17 03:49:02 +08:00
|
|
|
MachOLinkingContext::Arch arch = ctx.arch();
|
2014-08-14 07:55:41 +08:00
|
|
|
add(std::unique_ptr<Reader>(new mach_o::normalized::MachOReader(ctx)));
|
2014-07-17 03:49:02 +08:00
|
|
|
addKindTable(Reference::KindNamespace::mach_o, ctx.archHandler().kindArch(),
|
|
|
|
ctx.archHandler().kindStrings());
|
2014-01-11 09:07:43 +08:00
|
|
|
add(std::unique_ptr<YamlIOTaggedDocumentHandler>(
|
2014-07-01 06:57:33 +08:00
|
|
|
new mach_o::MachOYamlIOTaggedDocumentHandler(arch)));
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
}
|
|
|
|
|
2014-10-08 09:48:10 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
} // namespace lld
|
|
|
|
|