2013-11-07 05:36:55 +08:00
|
|
|
//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===//
|
|
|
|
//
|
|
|
|
// The LLVM Linker
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
///
|
|
|
|
/// \file For mach-o object files, this implementation converts from
|
|
|
|
/// mach-o on-disk binary format to in-memory normalized mach-o.
|
|
|
|
///
|
|
|
|
/// +---------------+
|
|
|
|
/// | binary mach-o |
|
|
|
|
/// +---------------+
|
|
|
|
/// |
|
|
|
|
/// |
|
|
|
|
/// v
|
|
|
|
/// +------------+
|
|
|
|
/// | normalized |
|
|
|
|
/// +------------+
|
|
|
|
|
|
|
|
#include "MachONormalizedFile.h"
|
|
|
|
#include "MachONormalizedFileBinaryUtils.h"
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
#include "ReferenceKinds.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "lld/Core/Error.h"
|
|
|
|
#include "lld/Core/LLVM.h"
|
|
|
|
#include "llvm/ADT/SmallString.h"
|
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/ADT/StringSwitch.h"
|
|
|
|
#include "llvm/ADT/Twine.h"
|
|
|
|
#include "llvm/Support/Casting.h"
|
2014-06-14 01:20:48 +08:00
|
|
|
#include "llvm/Support/Errc.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#include "llvm/Support/FileOutputBuffer.h"
|
|
|
|
#include "llvm/Support/Host.h"
|
|
|
|
#include "llvm/Support/MachO.h"
|
|
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <functional>
|
2014-06-13 01:15:58 +08:00
|
|
|
#include <system_error>
|
2013-11-07 05:36:55 +08:00
|
|
|
|
|
|
|
using namespace llvm::MachO;
|
|
|
|
|
|
|
|
namespace lld {
|
|
|
|
namespace mach_o {
|
|
|
|
namespace normalized {
|
|
|
|
|
|
|
|
// Utility to call a lambda expression on each load command.
|
2014-06-12 22:53:47 +08:00
|
|
|
static std::error_code forEachLoadCommand(
|
|
|
|
StringRef lcRange, unsigned lcCount, bool swap, bool is64,
|
|
|
|
std::function<bool(uint32_t cmd, uint32_t size, const char *lc)> func) {
|
2013-11-07 05:36:55 +08:00
|
|
|
const char* p = lcRange.begin();
|
|
|
|
for (unsigned i=0; i < lcCount; ++i) {
|
|
|
|
const load_command *lc = reinterpret_cast<const load_command*>(p);
|
|
|
|
load_command lcCopy;
|
|
|
|
const load_command *slc = lc;
|
|
|
|
if (swap) {
|
|
|
|
memcpy(&lcCopy, lc, sizeof(load_command));
|
|
|
|
swapStruct(lcCopy);
|
|
|
|
slc = &lcCopy;
|
|
|
|
}
|
|
|
|
if ( (p + slc->cmdsize) > lcRange.end() )
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-01-27 11:09:26 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
if (func(slc->cmd, slc->cmdsize, p))
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2014-01-27 11:09:26 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
p += slc->cmdsize;
|
2014-01-27 11:09:26 +08:00
|
|
|
}
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2013-11-07 05:36:55 +08:00
|
|
|
}
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
static std::error_code appendRelocations(Relocations &relocs, StringRef buffer,
|
|
|
|
bool swap, bool bigEndian,
|
|
|
|
uint32_t reloff, uint32_t nreloc) {
|
2013-11-07 05:36:55 +08:00
|
|
|
if ((reloff + nreloc*8) > buffer.size())
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-01-27 11:09:26 +08:00
|
|
|
const any_relocation_info* relocsArray =
|
2014-01-15 06:32:38 +08:00
|
|
|
reinterpret_cast<const any_relocation_info*>(buffer.begin()+reloff);
|
2014-01-27 11:09:26 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
for(uint32_t i=0; i < nreloc; ++i) {
|
|
|
|
relocs.push_back(unpackRelocation(relocsArray[i], swap, bigEndian));
|
|
|
|
}
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2013-11-07 05:36:55 +08:00
|
|
|
}
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
static std::error_code
|
2014-05-28 09:16:35 +08:00
|
|
|
appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool swap,
|
|
|
|
bool bigEndian, uint32_t istOffset, uint32_t istCount,
|
|
|
|
uint32_t startIndex, uint32_t count) {
|
|
|
|
if ((istOffset + istCount*4) > buffer.size())
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-05-28 09:16:35 +08:00
|
|
|
if (startIndex+count > istCount)
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-05-28 09:16:35 +08:00
|
|
|
const uint32_t *indirectSymbolArray =
|
|
|
|
reinterpret_cast<const uint32_t*>(buffer.begin()+istOffset);
|
|
|
|
|
|
|
|
for(uint32_t i=0; i < count; ++i) {
|
|
|
|
isyms.push_back(read32(swap, indirectSymbolArray[startIndex+i]));
|
|
|
|
}
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2014-05-28 09:16:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-01-15 06:32:38 +08:00
|
|
|
template <typename T> static T readBigEndian(T t) {
|
|
|
|
if (llvm::sys::IsLittleEndianHost)
|
2014-06-14 20:40:04 +08:00
|
|
|
return llvm::sys::getSwappedBytes(t);
|
2014-01-15 06:32:38 +08:00
|
|
|
return t;
|
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
|
|
|
|
/// Reads a mach-o file and produces an in-memory normalized view.
|
2014-01-15 06:32:38 +08:00
|
|
|
ErrorOr<std::unique_ptr<NormalizedFile>>
|
|
|
|
readBinary(std::unique_ptr<MemoryBuffer> &mb,
|
|
|
|
const MachOLinkingContext::Arch arch) {
|
2013-11-07 05:36:55 +08:00
|
|
|
// Make empty NormalizedFile.
|
|
|
|
std::unique_ptr<NormalizedFile> f(new NormalizedFile());
|
|
|
|
|
2014-01-15 06:32:38 +08:00
|
|
|
const char *start = mb->getBufferStart();
|
|
|
|
size_t objSize = mb->getBufferSize();
|
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
// Determine endianness and pointer size for mach-o file.
|
2014-01-15 06:32:38 +08:00
|
|
|
const mach_header *mh = reinterpret_cast<const mach_header *>(start);
|
|
|
|
bool isFat = mh->magic == llvm::MachO::FAT_CIGAM ||
|
|
|
|
mh->magic == llvm::MachO::FAT_MAGIC;
|
|
|
|
if (isFat) {
|
|
|
|
uint32_t cputype = MachOLinkingContext::cpuTypeFromArch(arch);
|
|
|
|
uint32_t cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(arch);
|
|
|
|
const fat_header *fh = reinterpret_cast<const fat_header *>(start);
|
|
|
|
uint32_t nfat_arch = readBigEndian(fh->nfat_arch);
|
|
|
|
const fat_arch *fa =
|
|
|
|
reinterpret_cast<const fat_arch *>(start + sizeof(fat_header));
|
|
|
|
bool foundArch = false;
|
|
|
|
while (nfat_arch-- > 0) {
|
|
|
|
if (readBigEndian(fa->cputype) == cputype &&
|
|
|
|
readBigEndian(fa->cpusubtype) == cpusubtype) {
|
|
|
|
foundArch = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
fa++;
|
|
|
|
}
|
|
|
|
if (!foundArch) {
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-01-15 06:32:38 +08:00
|
|
|
}
|
|
|
|
objSize = readBigEndian(fa->size);
|
|
|
|
uint32_t offset = readBigEndian(fa->offset);
|
|
|
|
if ((offset + objSize) > mb->getBufferSize())
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-01-15 06:32:38 +08:00
|
|
|
start += offset;
|
|
|
|
mh = reinterpret_cast<const mach_header *>(start);
|
|
|
|
}
|
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
bool is64, swap;
|
|
|
|
switch (mh->magic) {
|
|
|
|
case llvm::MachO::MH_MAGIC:
|
|
|
|
is64 = false;
|
|
|
|
swap = false;
|
|
|
|
break;
|
|
|
|
case llvm::MachO::MH_MAGIC_64:
|
|
|
|
is64 = true;
|
|
|
|
swap = false;
|
|
|
|
break;
|
|
|
|
case llvm::MachO::MH_CIGAM:
|
|
|
|
is64 = false;
|
|
|
|
swap = true;
|
|
|
|
break;
|
|
|
|
case llvm::MachO::MH_CIGAM_64:
|
|
|
|
is64 = true;
|
|
|
|
swap = true;
|
|
|
|
break;
|
|
|
|
default:
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2013-11-07 05:36:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Endian swap header, if needed.
|
|
|
|
mach_header headerCopy;
|
|
|
|
const mach_header *smh = mh;
|
|
|
|
if (swap) {
|
|
|
|
memcpy(&headerCopy, mh, sizeof(mach_header));
|
|
|
|
swapStruct(headerCopy);
|
|
|
|
smh = &headerCopy;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Validate head and load commands fit in buffer.
|
|
|
|
const uint32_t lcCount = smh->ncmds;
|
2014-01-15 06:32:38 +08:00
|
|
|
const char *lcStart =
|
|
|
|
start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header));
|
2013-11-07 05:36:55 +08:00
|
|
|
StringRef lcRange(lcStart, smh->sizeofcmds);
|
2014-01-15 06:32:38 +08:00
|
|
|
if (lcRange.end() > (start + objSize))
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2013-11-07 05:36:55 +08:00
|
|
|
|
|
|
|
// Normalize architecture
|
|
|
|
f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype);
|
|
|
|
bool isBigEndianArch = MachOLinkingContext::isBigEndian(f->arch);
|
|
|
|
// Copy file type and flags
|
|
|
|
f->fileType = HeaderFileType(smh->filetype);
|
|
|
|
f->flags = smh->flags;
|
|
|
|
|
|
|
|
|
2014-05-28 09:16:35 +08:00
|
|
|
// Pre-scan load commands looking for indirect symbol table.
|
|
|
|
uint32_t indirectSymbolTableOffset = 0;
|
|
|
|
uint32_t indirectSymbolTableCount = 0;
|
2014-06-12 22:53:47 +08:00
|
|
|
std::error_code ec = forEachLoadCommand(lcRange, lcCount, swap, is64,
|
|
|
|
[&](uint32_t cmd, uint32_t size,
|
|
|
|
const char *lc) -> bool {
|
2014-05-28 09:16:35 +08:00
|
|
|
if (cmd == LC_DYSYMTAB) {
|
|
|
|
const dysymtab_command *d = reinterpret_cast<const dysymtab_command*>(lc);
|
|
|
|
indirectSymbolTableOffset = read32(swap, d->indirectsymoff);
|
|
|
|
indirectSymbolTableCount = read32(swap, d->nindirectsyms);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
});
|
|
|
|
if (ec)
|
|
|
|
return ec;
|
|
|
|
|
|
|
|
// Walk load commands looking for segments/sections and the symbol table.
|
|
|
|
ec = forEachLoadCommand(lcRange, lcCount, swap, is64,
|
|
|
|
[&] (uint32_t cmd, uint32_t size, const char* lc) -> bool {
|
2013-11-07 05:36:55 +08:00
|
|
|
if (is64) {
|
|
|
|
if (cmd == LC_SEGMENT_64) {
|
2014-01-27 11:09:26 +08:00
|
|
|
const segment_command_64 *seg =
|
2013-11-07 05:36:55 +08:00
|
|
|
reinterpret_cast<const segment_command_64*>(lc);
|
2014-06-14 20:40:04 +08:00
|
|
|
const unsigned sectionCount = (swap ? llvm::sys::getSwappedBytes(seg->nsects)
|
2013-11-07 05:36:55 +08:00
|
|
|
: seg->nsects);
|
|
|
|
const section_64 *sects = reinterpret_cast<const section_64*>
|
|
|
|
(lc + sizeof(segment_command_64));
|
2014-01-27 11:09:26 +08:00
|
|
|
const unsigned lcSize = sizeof(segment_command_64)
|
2013-11-07 05:36:55 +08:00
|
|
|
+ sectionCount*sizeof(section_64);
|
|
|
|
// Verify sections don't extend beyond end of segment load command.
|
2014-01-27 11:09:26 +08:00
|
|
|
if (lcSize > size)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
for (unsigned i=0; i < sectionCount; ++i) {
|
|
|
|
const section_64 *sect = §s[i];
|
|
|
|
Section section;
|
|
|
|
section.segmentName = getString16(sect->segname);
|
|
|
|
section.sectionName = getString16(sect->sectname);
|
2014-01-27 11:09:26 +08:00
|
|
|
section.type = (SectionType)(read32(swap, sect->flags)
|
2013-11-07 05:36:55 +08:00
|
|
|
& SECTION_TYPE);
|
|
|
|
section.attributes = read32(swap, sect->flags) & SECTION_ATTRIBUTES;
|
|
|
|
section.alignment = read32(swap, sect->align);
|
|
|
|
section.address = read64(swap, sect->addr);
|
2014-01-15 06:32:38 +08:00
|
|
|
const uint8_t *content =
|
|
|
|
(uint8_t *)start + read32(swap, sect->offset);
|
2013-11-07 05:36:55 +08:00
|
|
|
size_t contentSize = read64(swap, sect->size);
|
|
|
|
// Note: this assign() is copying the content bytes. Ideally,
|
|
|
|
// we can use a custom allocator for vector to avoid the copy.
|
2014-01-11 09:07:43 +08:00
|
|
|
section.content = llvm::makeArrayRef(content, contentSize);
|
2014-01-27 11:09:26 +08:00
|
|
|
appendRelocations(section.relocations, mb->getBuffer(),
|
|
|
|
swap, isBigEndianArch, read32(swap, sect->reloff),
|
2013-11-07 05:36:55 +08:00
|
|
|
read32(swap, sect->nreloc));
|
2014-05-28 09:16:35 +08:00
|
|
|
if (section.type == S_NON_LAZY_SYMBOL_POINTERS) {
|
|
|
|
appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(),
|
|
|
|
swap, isBigEndianArch,
|
|
|
|
indirectSymbolTableOffset,
|
|
|
|
indirectSymbolTableCount,
|
|
|
|
read32(swap, sect->reserved1), contentSize/4);
|
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
f->sections.push_back(section);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (cmd == LC_SEGMENT) {
|
2014-01-27 11:09:26 +08:00
|
|
|
const segment_command *seg =
|
2013-11-07 05:36:55 +08:00
|
|
|
reinterpret_cast<const segment_command*>(lc);
|
2014-06-14 20:40:04 +08:00
|
|
|
const unsigned sectionCount = (swap ? llvm::sys::getSwappedBytes(seg->nsects)
|
2013-11-07 05:36:55 +08:00
|
|
|
: seg->nsects);
|
|
|
|
const section *sects = reinterpret_cast<const section*>
|
|
|
|
(lc + sizeof(segment_command));
|
2014-01-27 11:09:26 +08:00
|
|
|
const unsigned lcSize = sizeof(segment_command)
|
2013-11-07 05:36:55 +08:00
|
|
|
+ sectionCount*sizeof(section);
|
|
|
|
// Verify sections don't extend beyond end of segment load command.
|
2014-01-27 11:09:26 +08:00
|
|
|
if (lcSize > size)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
for (unsigned i=0; i < sectionCount; ++i) {
|
|
|
|
const section *sect = §s[i];
|
|
|
|
Section section;
|
|
|
|
section.segmentName = getString16(sect->segname);
|
|
|
|
section.sectionName = getString16(sect->sectname);
|
2014-01-27 11:09:26 +08:00
|
|
|
section.type = (SectionType)(read32(swap, sect->flags)
|
2013-11-07 05:36:55 +08:00
|
|
|
& SECTION_TYPE);
|
|
|
|
section.attributes = read32(swap, sect->flags) & SECTION_ATTRIBUTES;
|
|
|
|
section.alignment = read32(swap, sect->align);
|
|
|
|
section.address = read32(swap, sect->addr);
|
2014-01-15 06:32:38 +08:00
|
|
|
const uint8_t *content =
|
|
|
|
(uint8_t *)start + read32(swap, sect->offset);
|
2013-11-07 05:36:55 +08:00
|
|
|
size_t contentSize = read32(swap, sect->size);
|
|
|
|
// Note: this assign() is copying the content bytes. Ideally,
|
|
|
|
// we can use a custom allocator for vector to avoid the copy.
|
2014-01-11 09:07:43 +08:00
|
|
|
section.content = llvm::makeArrayRef(content, contentSize);
|
2014-01-27 11:09:26 +08:00
|
|
|
appendRelocations(section.relocations, mb->getBuffer(),
|
|
|
|
swap, isBigEndianArch, read32(swap, sect->reloff),
|
2013-11-07 05:36:55 +08:00
|
|
|
read32(swap, sect->nreloc));
|
2014-05-28 09:16:35 +08:00
|
|
|
if (section.type == S_NON_LAZY_SYMBOL_POINTERS) {
|
|
|
|
appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(),
|
|
|
|
swap, isBigEndianArch,
|
|
|
|
indirectSymbolTableOffset,
|
|
|
|
indirectSymbolTableCount,
|
|
|
|
read32(swap, sect->reserved1), contentSize/4);
|
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
f->sections.push_back(section);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (cmd == LC_SYMTAB) {
|
|
|
|
const symtab_command *st = reinterpret_cast<const symtab_command*>(lc);
|
2014-01-15 06:32:38 +08:00
|
|
|
const char *strings = start + read32(swap, st->stroff);
|
2013-11-07 05:36:55 +08:00
|
|
|
const uint32_t strSize = read32(swap, st->strsize);
|
|
|
|
// Validate string pool and symbol table all in buffer.
|
2014-01-27 11:09:26 +08:00
|
|
|
if ( read32(swap, st->stroff)+read32(swap, st->strsize)
|
2014-01-15 06:32:38 +08:00
|
|
|
> objSize )
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
if (is64) {
|
|
|
|
const uint32_t symOffset = read32(swap, st->symoff);
|
|
|
|
const uint32_t symCount = read32(swap, st->nsyms);
|
2014-01-15 06:32:38 +08:00
|
|
|
if ( symOffset+(symCount*sizeof(nlist_64)) > objSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2014-01-15 06:32:38 +08:00
|
|
|
const nlist_64 *symbols =
|
|
|
|
reinterpret_cast<const nlist_64 *>(start + symOffset);
|
2013-11-07 05:36:55 +08:00
|
|
|
// Convert each nlist_64 to a lld::mach_o::normalized::Symbol.
|
|
|
|
for(uint32_t i=0; i < symCount; ++i) {
|
|
|
|
const nlist_64 *sin = &symbols[i];
|
|
|
|
nlist_64 tempSym;
|
|
|
|
if (swap) {
|
|
|
|
tempSym = *sin; swapStruct(tempSym); sin = &tempSym;
|
|
|
|
}
|
|
|
|
Symbol sout;
|
2014-01-27 11:09:26 +08:00
|
|
|
if (sin->n_strx > strSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
sout.name = &strings[sin->n_strx];
|
|
|
|
sout.type = (NListType)(sin->n_type & N_TYPE);
|
|
|
|
sout.scope = (sin->n_type & (N_PEXT|N_EXT));
|
|
|
|
sout.sect = sin->n_sect;
|
|
|
|
sout.desc = sin->n_desc;
|
|
|
|
sout.value = sin->n_value;
|
|
|
|
if (sout.type == N_UNDF)
|
|
|
|
f->undefinedSymbols.push_back(sout);
|
|
|
|
else if (sout.scope == (SymbolScope)N_EXT)
|
|
|
|
f->globalSymbols.push_back(sout);
|
|
|
|
else
|
|
|
|
f->localSymbols.push_back(sout);
|
|
|
|
}
|
2014-01-27 11:09:26 +08:00
|
|
|
} else {
|
2013-11-07 05:36:55 +08:00
|
|
|
const uint32_t symOffset = read32(swap, st->symoff);
|
|
|
|
const uint32_t symCount = read32(swap, st->nsyms);
|
2014-01-15 06:32:38 +08:00
|
|
|
if ( symOffset+(symCount*sizeof(nlist)) > objSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2014-01-15 06:32:38 +08:00
|
|
|
const nlist *symbols =
|
|
|
|
reinterpret_cast<const nlist *>(start + symOffset);
|
2013-11-07 05:36:55 +08:00
|
|
|
// Convert each nlist to a lld::mach_o::normalized::Symbol.
|
|
|
|
for(uint32_t i=0; i < symCount; ++i) {
|
|
|
|
const nlist *sin = &symbols[i];
|
|
|
|
nlist tempSym;
|
|
|
|
if (swap) {
|
|
|
|
tempSym = *sin; swapStruct(tempSym); sin = &tempSym;
|
|
|
|
}
|
|
|
|
Symbol sout;
|
2014-01-27 11:09:26 +08:00
|
|
|
if (sin->n_strx > strSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
sout.name = &strings[sin->n_strx];
|
|
|
|
sout.type = (NListType)(sin->n_type & N_TYPE);
|
|
|
|
sout.scope = (sin->n_type & (N_PEXT|N_EXT));
|
|
|
|
sout.sect = sin->n_sect;
|
|
|
|
sout.desc = sin->n_desc;
|
|
|
|
sout.value = sin->n_value;
|
|
|
|
if (sout.type == N_UNDF)
|
|
|
|
f->undefinedSymbols.push_back(sout);
|
|
|
|
else if (sout.scope == (SymbolScope)N_EXT)
|
|
|
|
f->globalSymbols.push_back(sout);
|
|
|
|
else
|
|
|
|
f->localSymbols.push_back(sout);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
});
|
2014-01-27 11:09:26 +08:00
|
|
|
if (ec)
|
2013-11-07 05:36:55 +08:00
|
|
|
return ec;
|
|
|
|
|
|
|
|
return std::move(f);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-05-30 09:13:49 +08:00
|
|
|
|
|
|
|
class MachOReader : public Reader {
|
|
|
|
public:
|
|
|
|
MachOReader(MachOLinkingContext::Arch arch) : _arch(arch) {}
|
|
|
|
|
|
|
|
bool canParse(file_magic magic, StringRef ext,
|
|
|
|
const MemoryBuffer &mb) const override {
|
|
|
|
if (magic != llvm::sys::fs::file_magic::macho_object)
|
|
|
|
return false;
|
|
|
|
if (mb.getBufferSize() < 32)
|
|
|
|
return false;
|
|
|
|
const char *start = mb.getBufferStart();
|
|
|
|
const mach_header *mh = reinterpret_cast<const mach_header *>(start);
|
|
|
|
const bool swap = (mh->magic == llvm::MachO::MH_CIGAM) ||
|
|
|
|
(mh->magic == llvm::MachO::MH_CIGAM_64);
|
|
|
|
const uint32_t filesCpuType = read32(swap, mh->cputype);
|
|
|
|
const uint32_t filesCpuSubtype = read32(swap, mh->cpusubtype);
|
|
|
|
if (filesCpuType != MachOLinkingContext::cpuTypeFromArch(_arch))
|
|
|
|
return false;
|
|
|
|
if (filesCpuSubtype != MachOLinkingContext::cpuSubtypeFromArch(_arch))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Is mach-o file with correct cpu type/subtype.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
std::error_code
|
2014-05-30 09:13:49 +08:00
|
|
|
parseFile(std::unique_ptr<MemoryBuffer> &mb, const Registry ®istry,
|
2014-06-12 22:53:47 +08:00
|
|
|
std::vector<std::unique_ptr<File>> &result) const override {
|
2014-05-30 09:13:49 +08:00
|
|
|
// Convert binary file to normalized mach-o.
|
|
|
|
auto normFile = readBinary(mb, _arch);
|
2014-06-12 22:53:47 +08:00
|
|
|
if (std::error_code ec = normFile.getError())
|
2014-05-30 09:13:49 +08:00
|
|
|
return ec;
|
|
|
|
// Convert normalized mach-o to atoms.
|
|
|
|
auto file = normalizedToAtoms(**normFile, mb->getBufferIdentifier(), false);
|
2014-06-12 22:53:47 +08:00
|
|
|
if (std::error_code ec = file.getError())
|
2014-05-30 09:13:49 +08:00
|
|
|
return ec;
|
|
|
|
|
|
|
|
result.push_back(std::move(*file));
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2014-05-30 09:13:49 +08:00
|
|
|
}
|
|
|
|
private:
|
|
|
|
MachOLinkingContext::Arch _arch;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
} // namespace normalized
|
|
|
|
} // namespace mach_o
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
|
|
|
|
void Registry::addSupportMachOObjects(StringRef archName) {
|
|
|
|
MachOLinkingContext::Arch arch = MachOLinkingContext::archFromName(archName);
|
2014-05-30 09:13:49 +08:00
|
|
|
add(std::unique_ptr<Reader>(new mach_o::normalized::MachOReader(arch)));
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
switch (arch) {
|
|
|
|
case MachOLinkingContext::arch_x86_64:
|
2013-12-20 15:48:29 +08:00
|
|
|
addKindTable(Reference::KindNamespace::mach_o, Reference::KindArch::x86_64,
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
mach_o::KindHandler_x86_64::kindStrings);
|
|
|
|
break;
|
|
|
|
case MachOLinkingContext::arch_x86:
|
2013-12-20 15:48:29 +08:00
|
|
|
addKindTable(Reference::KindNamespace::mach_o, Reference::KindArch::x86,
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
mach_o::KindHandler_x86::kindStrings);
|
|
|
|
break;
|
|
|
|
case MachOLinkingContext::arch_armv6:
|
|
|
|
case MachOLinkingContext::arch_armv7:
|
|
|
|
case MachOLinkingContext::arch_armv7s:
|
2013-12-20 15:48:29 +08:00
|
|
|
addKindTable(Reference::KindNamespace::mach_o, Reference::KindArch::ARM,
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
mach_o::KindHandler_arm::kindStrings);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("mach-o arch not supported");
|
|
|
|
}
|
2014-01-11 09:07:43 +08:00
|
|
|
add(std::unique_ptr<YamlIOTaggedDocumentHandler>(
|
|
|
|
new mach_o::MachOYamlIOTaggedDocumentHandler()));
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
}
|
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
} // namespace lld
|
|
|
|
|