2013-11-07 05:36:55 +08:00
|
|
|
//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2013-11-07 05:36:55 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
///
|
|
|
|
/// \file For mach-o object files, this implementation converts from
|
|
|
|
/// mach-o on-disk binary format to in-memory normalized mach-o.
|
|
|
|
///
|
|
|
|
/// +---------------+
|
|
|
|
/// | binary mach-o |
|
|
|
|
/// +---------------+
|
|
|
|
/// |
|
|
|
|
/// |
|
|
|
|
/// v
|
|
|
|
/// +------------+
|
|
|
|
/// | normalized |
|
|
|
|
/// +------------+
|
|
|
|
|
2014-07-17 03:49:02 +08:00
|
|
|
#include "ArchHandler.h"
|
2017-06-07 11:48:56 +08:00
|
|
|
#include "MachONormalizedFile.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "MachONormalizedFileBinaryUtils.h"
|
2017-10-03 05:00:41 +08:00
|
|
|
#include "lld/Common/LLVM.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "lld/Core/Error.h"
|
2014-08-14 07:55:41 +08:00
|
|
|
#include "lld/Core/SharedLibraryFile.h"
|
2017-06-07 11:48:56 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "llvm/ADT/SmallString.h"
|
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/ADT/StringSwitch.h"
|
|
|
|
#include "llvm/ADT/Twine.h"
|
2017-06-07 11:48:56 +08:00
|
|
|
#include "llvm/BinaryFormat/MachO.h"
|
|
|
|
#include "llvm/BinaryFormat/Magic.h"
|
2014-09-04 03:52:50 +08:00
|
|
|
#include "llvm/Object/MachO.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "llvm/Support/Casting.h"
|
2014-06-14 01:20:48 +08:00
|
|
|
#include "llvm/Support/Errc.h"
|
2013-11-07 05:36:55 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#include "llvm/Support/FileOutputBuffer.h"
|
|
|
|
#include "llvm/Support/Host.h"
|
|
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <functional>
|
2014-06-13 01:15:58 +08:00
|
|
|
#include <system_error>
|
2013-11-07 05:36:55 +08:00
|
|
|
|
|
|
|
using namespace llvm::MachO;
|
2014-09-04 03:52:50 +08:00
|
|
|
using llvm::object::ExportEntry;
|
2017-06-07 11:48:56 +08:00
|
|
|
using llvm::file_magic;
|
2014-09-04 03:52:50 +08:00
|
|
|
using llvm::object::MachOObjectFile;
|
2013-11-07 05:36:55 +08:00
|
|
|
|
|
|
|
namespace lld {
|
|
|
|
namespace mach_o {
|
|
|
|
namespace normalized {
|
|
|
|
|
|
|
|
// Utility to call a lambda expression on each load command.
|
2016-03-31 08:08:16 +08:00
|
|
|
static llvm::Error forEachLoadCommand(
|
2014-10-28 06:48:35 +08:00
|
|
|
StringRef lcRange, unsigned lcCount, bool isBig, bool is64,
|
2014-06-12 22:53:47 +08:00
|
|
|
std::function<bool(uint32_t cmd, uint32_t size, const char *lc)> func) {
|
2013-11-07 05:36:55 +08:00
|
|
|
const char* p = lcRange.begin();
|
|
|
|
for (unsigned i=0; i < lcCount; ++i) {
|
|
|
|
const load_command *lc = reinterpret_cast<const load_command*>(p);
|
|
|
|
load_command lcCopy;
|
|
|
|
const load_command *slc = lc;
|
2014-10-28 06:48:35 +08:00
|
|
|
if (isBig != llvm::sys::IsBigEndianHost) {
|
2013-11-07 05:36:55 +08:00
|
|
|
memcpy(&lcCopy, lc, sizeof(load_command));
|
|
|
|
swapStruct(lcCopy);
|
|
|
|
slc = &lcCopy;
|
|
|
|
}
|
|
|
|
if ( (p + slc->cmdsize) > lcRange.end() )
|
2016-03-31 08:08:16 +08:00
|
|
|
return llvm::make_error<GenericError>("Load command exceeds range");
|
2014-01-27 11:09:26 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
if (func(slc->cmd, slc->cmdsize, p))
|
2016-11-11 12:29:25 +08:00
|
|
|
return llvm::Error::success();
|
2014-01-27 11:09:26 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
p += slc->cmdsize;
|
2014-01-27 11:09:26 +08:00
|
|
|
}
|
|
|
|
|
2016-11-11 12:29:25 +08:00
|
|
|
return llvm::Error::success();
|
2013-11-07 05:36:55 +08:00
|
|
|
}
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
static std::error_code appendRelocations(Relocations &relocs, StringRef buffer,
|
2014-10-28 06:48:35 +08:00
|
|
|
bool bigEndian,
|
2014-06-12 22:53:47 +08:00
|
|
|
uint32_t reloff, uint32_t nreloc) {
|
2013-11-07 05:36:55 +08:00
|
|
|
if ((reloff + nreloc*8) > buffer.size())
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-01-27 11:09:26 +08:00
|
|
|
const any_relocation_info* relocsArray =
|
2014-01-15 06:32:38 +08:00
|
|
|
reinterpret_cast<const any_relocation_info*>(buffer.begin()+reloff);
|
2014-01-27 11:09:26 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
for(uint32_t i=0; i < nreloc; ++i) {
|
2014-10-28 06:48:35 +08:00
|
|
|
relocs.push_back(unpackRelocation(relocsArray[i], bigEndian));
|
2013-11-07 05:36:55 +08:00
|
|
|
}
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2013-11-07 05:36:55 +08:00
|
|
|
}
|
|
|
|
|
2014-06-12 22:53:47 +08:00
|
|
|
static std::error_code
|
2014-10-28 06:48:35 +08:00
|
|
|
appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool isBig,
|
|
|
|
uint32_t istOffset, uint32_t istCount,
|
2014-05-28 09:16:35 +08:00
|
|
|
uint32_t startIndex, uint32_t count) {
|
|
|
|
if ((istOffset + istCount*4) > buffer.size())
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-05-28 09:16:35 +08:00
|
|
|
if (startIndex+count > istCount)
|
2014-06-14 01:20:48 +08:00
|
|
|
return make_error_code(llvm::errc::executable_format_error);
|
2014-10-28 06:48:35 +08:00
|
|
|
const uint8_t *indirectSymbolArray = (const uint8_t *)buffer.data();
|
2014-05-28 09:16:35 +08:00
|
|
|
|
|
|
|
for(uint32_t i=0; i < count; ++i) {
|
2014-10-28 06:48:35 +08:00
|
|
|
isyms.push_back(read32(
|
|
|
|
indirectSymbolArray + (startIndex + i) * sizeof(uint32_t), isBig));
|
2014-05-28 09:16:35 +08:00
|
|
|
}
|
2014-06-12 22:53:47 +08:00
|
|
|
return std::error_code();
|
2014-05-28 09:16:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-01-15 06:32:38 +08:00
|
|
|
template <typename T> static T readBigEndian(T t) {
|
|
|
|
if (llvm::sys::IsLittleEndianHost)
|
2014-06-14 21:26:14 +08:00
|
|
|
llvm::sys::swapByteOrder(t);
|
2014-01-15 06:32:38 +08:00
|
|
|
return t;
|
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
|
2014-09-05 04:08:30 +08:00
|
|
|
|
2014-10-28 06:48:35 +08:00
|
|
|
static bool isMachOHeader(const mach_header *mh, bool &is64, bool &isBig) {
|
|
|
|
switch (read32(&mh->magic, false)) {
|
2014-09-05 04:08:30 +08:00
|
|
|
case llvm::MachO::MH_MAGIC:
|
|
|
|
is64 = false;
|
2014-10-28 06:48:35 +08:00
|
|
|
isBig = false;
|
2014-09-05 04:08:30 +08:00
|
|
|
return true;
|
|
|
|
case llvm::MachO::MH_MAGIC_64:
|
|
|
|
is64 = true;
|
2014-10-28 06:48:35 +08:00
|
|
|
isBig = false;
|
2014-09-05 04:08:30 +08:00
|
|
|
return true;
|
|
|
|
case llvm::MachO::MH_CIGAM:
|
|
|
|
is64 = false;
|
2014-10-28 06:48:35 +08:00
|
|
|
isBig = true;
|
2014-09-05 04:08:30 +08:00
|
|
|
return true;
|
|
|
|
case llvm::MachO::MH_CIGAM_64:
|
|
|
|
is64 = true;
|
2014-10-28 06:48:35 +08:00
|
|
|
isBig = true;
|
2014-09-05 04:08:30 +08:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch) {
|
|
|
|
// Try opening and mapping file at path.
|
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> b = MemoryBuffer::getFileOrSTDIN(path);
|
|
|
|
if (b.getError())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// If file length < 32 it is too small to be mach-o object file.
|
|
|
|
StringRef fileBuffer = b->get()->getBuffer();
|
|
|
|
if (fileBuffer.size() < 32)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// If file buffer does not start with MH_MAGIC (and variants), not obj file.
|
|
|
|
const mach_header *mh = reinterpret_cast<const mach_header *>(
|
|
|
|
fileBuffer.begin());
|
2014-10-28 06:48:35 +08:00
|
|
|
bool is64, isBig;
|
|
|
|
if (!isMachOHeader(mh, is64, isBig))
|
2014-09-05 04:08:30 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// If not MH_OBJECT, not object file.
|
2014-10-28 06:48:35 +08:00
|
|
|
if (read32(&mh->filetype, isBig) != MH_OBJECT)
|
2014-09-05 04:08:30 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Lookup up arch from cpu/subtype pair.
|
2014-10-28 06:48:35 +08:00
|
|
|
arch = MachOLinkingContext::archFromCpuType(
|
|
|
|
read32(&mh->cputype, isBig),
|
|
|
|
read32(&mh->cpusubtype, isBig));
|
2014-09-05 04:08:30 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-04-28 06:48:51 +08:00
|
|
|
bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch,
|
|
|
|
uint32_t &offset, uint32_t &size) {
|
2014-10-08 09:48:10 +08:00
|
|
|
const char *start = mb.getBufferStart();
|
|
|
|
const llvm::MachO::fat_header *fh =
|
|
|
|
reinterpret_cast<const llvm::MachO::fat_header *>(start);
|
|
|
|
if (readBigEndian(fh->magic) != llvm::MachO::FAT_MAGIC)
|
|
|
|
return false;
|
|
|
|
uint32_t nfat_arch = readBigEndian(fh->nfat_arch);
|
|
|
|
const fat_arch *fstart =
|
|
|
|
reinterpret_cast<const fat_arch *>(start + sizeof(fat_header));
|
|
|
|
const fat_arch *fend =
|
|
|
|
reinterpret_cast<const fat_arch *>(start + sizeof(fat_header) +
|
|
|
|
sizeof(fat_arch) * nfat_arch);
|
|
|
|
const uint32_t reqCpuType = MachOLinkingContext::cpuTypeFromArch(arch);
|
|
|
|
const uint32_t reqCpuSubtype = MachOLinkingContext::cpuSubtypeFromArch(arch);
|
|
|
|
for (const fat_arch *fa = fstart; fa < fend; ++fa) {
|
|
|
|
if ((readBigEndian(fa->cputype) == reqCpuType) &&
|
|
|
|
(readBigEndian(fa->cpusubtype) == reqCpuSubtype)) {
|
|
|
|
offset = readBigEndian(fa->offset);
|
|
|
|
size = readBigEndian(fa->size);
|
|
|
|
if ((offset + size) > mb.getBufferSize())
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
/// Reads a mach-o file and produces an in-memory normalized view.
|
2016-03-31 07:58:24 +08:00
|
|
|
llvm::Expected<std::unique_ptr<NormalizedFile>>
|
2014-01-15 06:32:38 +08:00
|
|
|
readBinary(std::unique_ptr<MemoryBuffer> &mb,
|
|
|
|
const MachOLinkingContext::Arch arch) {
|
2013-11-07 05:36:55 +08:00
|
|
|
// Make empty NormalizedFile.
|
|
|
|
std::unique_ptr<NormalizedFile> f(new NormalizedFile());
|
|
|
|
|
2014-01-15 06:32:38 +08:00
|
|
|
const char *start = mb->getBufferStart();
|
|
|
|
size_t objSize = mb->getBufferSize();
|
|
|
|
const mach_header *mh = reinterpret_cast<const mach_header *>(start);
|
2014-10-08 09:48:10 +08:00
|
|
|
|
|
|
|
uint32_t sliceOffset;
|
|
|
|
uint32_t sliceSize;
|
2015-04-28 06:48:51 +08:00
|
|
|
if (sliceFromFatFile(mb->getMemBufferRef(), arch, sliceOffset, sliceSize)) {
|
2014-10-08 09:48:10 +08:00
|
|
|
start = &start[sliceOffset];
|
|
|
|
objSize = sliceSize;
|
2014-01-15 06:32:38 +08:00
|
|
|
mh = reinterpret_cast<const mach_header *>(start);
|
|
|
|
}
|
|
|
|
|
2014-10-08 09:48:10 +08:00
|
|
|
// Determine endianness and pointer size for mach-o file.
|
2014-10-28 06:48:35 +08:00
|
|
|
bool is64, isBig;
|
|
|
|
if (!isMachOHeader(mh, is64, isBig))
|
2016-03-31 07:58:24 +08:00
|
|
|
return llvm::make_error<GenericError>("File is not a mach-o");
|
2013-11-07 05:36:55 +08:00
|
|
|
|
|
|
|
// Endian swap header, if needed.
|
|
|
|
mach_header headerCopy;
|
|
|
|
const mach_header *smh = mh;
|
2014-10-28 06:48:35 +08:00
|
|
|
if (isBig != llvm::sys::IsBigEndianHost) {
|
2013-11-07 05:36:55 +08:00
|
|
|
memcpy(&headerCopy, mh, sizeof(mach_header));
|
|
|
|
swapStruct(headerCopy);
|
|
|
|
smh = &headerCopy;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Validate head and load commands fit in buffer.
|
|
|
|
const uint32_t lcCount = smh->ncmds;
|
2014-01-15 06:32:38 +08:00
|
|
|
const char *lcStart =
|
|
|
|
start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header));
|
2013-11-07 05:36:55 +08:00
|
|
|
StringRef lcRange(lcStart, smh->sizeofcmds);
|
2014-01-15 06:32:38 +08:00
|
|
|
if (lcRange.end() > (start + objSize))
|
2016-03-31 07:58:24 +08:00
|
|
|
return llvm::make_error<GenericError>("Load commands exceed file size");
|
2013-11-07 05:36:55 +08:00
|
|
|
|
2014-07-01 06:57:33 +08:00
|
|
|
// Get architecture from mach_header.
|
2013-11-07 05:36:55 +08:00
|
|
|
f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype);
|
2014-07-01 06:57:33 +08:00
|
|
|
if (f->arch != arch) {
|
2016-03-31 07:58:24 +08:00
|
|
|
return llvm::make_error<GenericError>(
|
|
|
|
Twine("file is wrong architecture. Expected "
|
2014-07-01 06:57:33 +08:00
|
|
|
"(" + MachOLinkingContext::nameFromArch(arch)
|
|
|
|
+ ") found ("
|
|
|
|
+ MachOLinkingContext::nameFromArch(f->arch)
|
|
|
|
+ ")" ));
|
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
// Copy file type and flags
|
|
|
|
f->fileType = HeaderFileType(smh->filetype);
|
|
|
|
f->flags = smh->flags;
|
|
|
|
|
|
|
|
|
2014-05-28 09:16:35 +08:00
|
|
|
// Pre-scan load commands looking for indirect symbol table.
|
|
|
|
uint32_t indirectSymbolTableOffset = 0;
|
|
|
|
uint32_t indirectSymbolTableCount = 0;
|
2016-03-31 08:08:16 +08:00
|
|
|
auto ec = forEachLoadCommand(lcRange, lcCount, isBig, is64,
|
|
|
|
[&](uint32_t cmd, uint32_t size,
|
|
|
|
const char *lc) -> bool {
|
2014-05-28 09:16:35 +08:00
|
|
|
if (cmd == LC_DYSYMTAB) {
|
|
|
|
const dysymtab_command *d = reinterpret_cast<const dysymtab_command*>(lc);
|
2014-10-28 06:48:35 +08:00
|
|
|
indirectSymbolTableOffset = read32(&d->indirectsymoff, isBig);
|
|
|
|
indirectSymbolTableCount = read32(&d->nindirectsyms, isBig);
|
2014-05-28 09:16:35 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
});
|
|
|
|
if (ec)
|
2016-03-31 08:08:16 +08:00
|
|
|
return std::move(ec);
|
2014-05-28 09:16:35 +08:00
|
|
|
|
|
|
|
// Walk load commands looking for segments/sections and the symbol table.
|
2014-07-25 07:06:56 +08:00
|
|
|
const data_in_code_entry *dataInCode = nullptr;
|
2014-09-04 03:52:50 +08:00
|
|
|
const dyld_info_command *dyldInfo = nullptr;
|
2014-07-25 07:06:56 +08:00
|
|
|
uint32_t dataInCodeSize = 0;
|
2014-10-28 06:48:35 +08:00
|
|
|
ec = forEachLoadCommand(lcRange, lcCount, isBig, is64,
|
2014-05-28 09:16:35 +08:00
|
|
|
[&] (uint32_t cmd, uint32_t size, const char* lc) -> bool {
|
2014-08-14 07:55:41 +08:00
|
|
|
switch(cmd) {
|
|
|
|
case LC_SEGMENT_64:
|
|
|
|
if (is64) {
|
2014-01-27 11:09:26 +08:00
|
|
|
const segment_command_64 *seg =
|
2013-11-07 05:36:55 +08:00
|
|
|
reinterpret_cast<const segment_command_64*>(lc);
|
2014-10-28 06:48:35 +08:00
|
|
|
const unsigned sectionCount = read32(&seg->nsects, isBig);
|
2013-11-07 05:36:55 +08:00
|
|
|
const section_64 *sects = reinterpret_cast<const section_64*>
|
|
|
|
(lc + sizeof(segment_command_64));
|
2014-01-27 11:09:26 +08:00
|
|
|
const unsigned lcSize = sizeof(segment_command_64)
|
2013-11-07 05:36:55 +08:00
|
|
|
+ sectionCount*sizeof(section_64);
|
|
|
|
// Verify sections don't extend beyond end of segment load command.
|
2014-01-27 11:09:26 +08:00
|
|
|
if (lcSize > size)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
for (unsigned i=0; i < sectionCount; ++i) {
|
|
|
|
const section_64 *sect = §s[i];
|
|
|
|
Section section;
|
|
|
|
section.segmentName = getString16(sect->segname);
|
|
|
|
section.sectionName = getString16(sect->sectname);
|
2014-10-28 06:48:35 +08:00
|
|
|
section.type = (SectionType)(read32(§->flags, isBig) &
|
|
|
|
SECTION_TYPE);
|
|
|
|
section.attributes = read32(§->flags, isBig) & SECTION_ATTRIBUTES;
|
2015-03-26 09:44:01 +08:00
|
|
|
section.alignment = 1 << read32(§->align, isBig);
|
2014-10-28 06:48:35 +08:00
|
|
|
section.address = read64(§->addr, isBig);
|
2014-01-15 06:32:38 +08:00
|
|
|
const uint8_t *content =
|
2014-11-14 15:15:43 +08:00
|
|
|
(const uint8_t *)start + read32(§->offset, isBig);
|
2014-10-28 06:48:35 +08:00
|
|
|
size_t contentSize = read64(§->size, isBig);
|
2013-11-07 05:36:55 +08:00
|
|
|
// Note: this assign() is copying the content bytes. Ideally,
|
|
|
|
// we can use a custom allocator for vector to avoid the copy.
|
2014-01-11 09:07:43 +08:00
|
|
|
section.content = llvm::makeArrayRef(content, contentSize);
|
2014-10-28 06:48:35 +08:00
|
|
|
appendRelocations(section.relocations, mb->getBuffer(), isBig,
|
|
|
|
read32(§->reloff, isBig),
|
|
|
|
read32(§->nreloc, isBig));
|
2014-05-28 09:16:35 +08:00
|
|
|
if (section.type == S_NON_LAZY_SYMBOL_POINTERS) {
|
|
|
|
appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(),
|
2014-10-28 06:48:35 +08:00
|
|
|
isBig,
|
2014-05-28 09:16:35 +08:00
|
|
|
indirectSymbolTableOffset,
|
|
|
|
indirectSymbolTableCount,
|
2014-10-28 06:48:35 +08:00
|
|
|
read32(§->reserved1, isBig),
|
|
|
|
contentSize/4);
|
2014-05-28 09:16:35 +08:00
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
f->sections.push_back(section);
|
|
|
|
}
|
|
|
|
}
|
2014-08-14 07:55:41 +08:00
|
|
|
break;
|
|
|
|
case LC_SEGMENT:
|
|
|
|
if (!is64) {
|
2014-01-27 11:09:26 +08:00
|
|
|
const segment_command *seg =
|
2013-11-07 05:36:55 +08:00
|
|
|
reinterpret_cast<const segment_command*>(lc);
|
2014-10-28 06:48:35 +08:00
|
|
|
const unsigned sectionCount = read32(&seg->nsects, isBig);
|
2013-11-07 05:36:55 +08:00
|
|
|
const section *sects = reinterpret_cast<const section*>
|
|
|
|
(lc + sizeof(segment_command));
|
2014-01-27 11:09:26 +08:00
|
|
|
const unsigned lcSize = sizeof(segment_command)
|
2013-11-07 05:36:55 +08:00
|
|
|
+ sectionCount*sizeof(section);
|
|
|
|
// Verify sections don't extend beyond end of segment load command.
|
2014-01-27 11:09:26 +08:00
|
|
|
if (lcSize > size)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
for (unsigned i=0; i < sectionCount; ++i) {
|
|
|
|
const section *sect = §s[i];
|
|
|
|
Section section;
|
|
|
|
section.segmentName = getString16(sect->segname);
|
|
|
|
section.sectionName = getString16(sect->sectname);
|
2014-10-28 06:48:35 +08:00
|
|
|
section.type = (SectionType)(read32(§->flags, isBig) &
|
|
|
|
SECTION_TYPE);
|
|
|
|
section.attributes =
|
2014-11-14 15:15:43 +08:00
|
|
|
read32((const uint8_t *)§->flags, isBig) & SECTION_ATTRIBUTES;
|
2015-03-26 09:44:01 +08:00
|
|
|
section.alignment = 1 << read32(§->align, isBig);
|
2014-10-28 06:48:35 +08:00
|
|
|
section.address = read32(§->addr, isBig);
|
2014-01-15 06:32:38 +08:00
|
|
|
const uint8_t *content =
|
2014-11-14 15:15:43 +08:00
|
|
|
(const uint8_t *)start + read32(§->offset, isBig);
|
2014-10-28 06:48:35 +08:00
|
|
|
size_t contentSize = read32(§->size, isBig);
|
2013-11-07 05:36:55 +08:00
|
|
|
// Note: this assign() is copying the content bytes. Ideally,
|
|
|
|
// we can use a custom allocator for vector to avoid the copy.
|
2014-01-11 09:07:43 +08:00
|
|
|
section.content = llvm::makeArrayRef(content, contentSize);
|
2014-10-28 06:48:35 +08:00
|
|
|
appendRelocations(section.relocations, mb->getBuffer(), isBig,
|
|
|
|
read32(§->reloff, isBig),
|
|
|
|
read32(§->nreloc, isBig));
|
2014-05-28 09:16:35 +08:00
|
|
|
if (section.type == S_NON_LAZY_SYMBOL_POINTERS) {
|
2014-10-28 06:48:35 +08:00
|
|
|
appendIndirectSymbols(
|
|
|
|
section.indirectSymbols, mb->getBuffer(), isBig,
|
|
|
|
indirectSymbolTableOffset, indirectSymbolTableCount,
|
|
|
|
read32(§->reserved1, isBig), contentSize / 4);
|
2014-05-28 09:16:35 +08:00
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
f->sections.push_back(section);
|
|
|
|
}
|
|
|
|
}
|
2014-08-14 07:55:41 +08:00
|
|
|
break;
|
|
|
|
case LC_SYMTAB: {
|
2013-11-07 05:36:55 +08:00
|
|
|
const symtab_command *st = reinterpret_cast<const symtab_command*>(lc);
|
2014-10-28 06:48:35 +08:00
|
|
|
const char *strings = start + read32(&st->stroff, isBig);
|
|
|
|
const uint32_t strSize = read32(&st->strsize, isBig);
|
2013-11-07 05:36:55 +08:00
|
|
|
// Validate string pool and symbol table all in buffer.
|
2014-11-14 15:15:43 +08:00
|
|
|
if (read32((const uint8_t *)&st->stroff, isBig) +
|
|
|
|
read32((const uint8_t *)&st->strsize, isBig) >
|
2014-10-28 06:48:35 +08:00
|
|
|
objSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
if (is64) {
|
2014-10-28 06:48:35 +08:00
|
|
|
const uint32_t symOffset = read32(&st->symoff, isBig);
|
|
|
|
const uint32_t symCount = read32(&st->nsyms, isBig);
|
2014-01-15 06:32:38 +08:00
|
|
|
if ( symOffset+(symCount*sizeof(nlist_64)) > objSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2014-01-15 06:32:38 +08:00
|
|
|
const nlist_64 *symbols =
|
|
|
|
reinterpret_cast<const nlist_64 *>(start + symOffset);
|
2013-11-07 05:36:55 +08:00
|
|
|
// Convert each nlist_64 to a lld::mach_o::normalized::Symbol.
|
|
|
|
for(uint32_t i=0; i < symCount; ++i) {
|
|
|
|
nlist_64 tempSym;
|
2016-03-24 02:00:10 +08:00
|
|
|
memcpy(&tempSym, &symbols[i], sizeof(nlist_64));
|
|
|
|
const nlist_64 *sin = &tempSym;
|
|
|
|
if (isBig != llvm::sys::IsBigEndianHost)
|
|
|
|
swapStruct(tempSym);
|
2013-11-07 05:36:55 +08:00
|
|
|
Symbol sout;
|
2014-01-27 11:09:26 +08:00
|
|
|
if (sin->n_strx > strSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
sout.name = &strings[sin->n_strx];
|
2016-07-28 06:55:30 +08:00
|
|
|
sout.type = static_cast<NListType>(sin->n_type & (N_STAB|N_TYPE));
|
2013-11-07 05:36:55 +08:00
|
|
|
sout.scope = (sin->n_type & (N_PEXT|N_EXT));
|
|
|
|
sout.sect = sin->n_sect;
|
|
|
|
sout.desc = sin->n_desc;
|
|
|
|
sout.value = sin->n_value;
|
2016-07-28 06:55:30 +08:00
|
|
|
if (sin->n_type & N_STAB)
|
|
|
|
f->stabsSymbols.push_back(sout);
|
|
|
|
else if (sout.type == N_UNDF)
|
2013-11-07 05:36:55 +08:00
|
|
|
f->undefinedSymbols.push_back(sout);
|
2014-06-28 02:25:01 +08:00
|
|
|
else if (sin->n_type & N_EXT)
|
2013-11-07 05:36:55 +08:00
|
|
|
f->globalSymbols.push_back(sout);
|
|
|
|
else
|
|
|
|
f->localSymbols.push_back(sout);
|
|
|
|
}
|
2014-01-27 11:09:26 +08:00
|
|
|
} else {
|
2014-10-28 06:48:35 +08:00
|
|
|
const uint32_t symOffset = read32(&st->symoff, isBig);
|
|
|
|
const uint32_t symCount = read32(&st->nsyms, isBig);
|
2014-01-15 06:32:38 +08:00
|
|
|
if ( symOffset+(symCount*sizeof(nlist)) > objSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2014-01-15 06:32:38 +08:00
|
|
|
const nlist *symbols =
|
|
|
|
reinterpret_cast<const nlist *>(start + symOffset);
|
2013-11-07 05:36:55 +08:00
|
|
|
// Convert each nlist to a lld::mach_o::normalized::Symbol.
|
|
|
|
for(uint32_t i=0; i < symCount; ++i) {
|
|
|
|
const nlist *sin = &symbols[i];
|
|
|
|
nlist tempSym;
|
2014-10-28 06:48:35 +08:00
|
|
|
if (isBig != llvm::sys::IsBigEndianHost) {
|
2013-11-07 05:36:55 +08:00
|
|
|
tempSym = *sin; swapStruct(tempSym); sin = &tempSym;
|
|
|
|
}
|
|
|
|
Symbol sout;
|
2014-01-27 11:09:26 +08:00
|
|
|
if (sin->n_strx > strSize)
|
2014-06-03 12:41:30 +08:00
|
|
|
return true;
|
2013-11-07 05:36:55 +08:00
|
|
|
sout.name = &strings[sin->n_strx];
|
|
|
|
sout.type = (NListType)(sin->n_type & N_TYPE);
|
|
|
|
sout.scope = (sin->n_type & (N_PEXT|N_EXT));
|
|
|
|
sout.sect = sin->n_sect;
|
|
|
|
sout.desc = sin->n_desc;
|
|
|
|
sout.value = sin->n_value;
|
|
|
|
if (sout.type == N_UNDF)
|
|
|
|
f->undefinedSymbols.push_back(sout);
|
|
|
|
else if (sout.scope == (SymbolScope)N_EXT)
|
|
|
|
f->globalSymbols.push_back(sout);
|
2016-07-28 06:55:30 +08:00
|
|
|
else if (sin->n_type & N_STAB)
|
|
|
|
f->stabsSymbols.push_back(sout);
|
2013-11-07 05:36:55 +08:00
|
|
|
else
|
|
|
|
f->localSymbols.push_back(sout);
|
|
|
|
}
|
|
|
|
}
|
2014-08-14 07:55:41 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case LC_ID_DYLIB: {
|
2014-07-01 16:15:41 +08:00
|
|
|
const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc);
|
2014-10-28 06:48:35 +08:00
|
|
|
f->installName = lc + read32(&dl->dylib.name, isBig);
|
2014-12-20 17:22:56 +08:00
|
|
|
f->currentVersion = read32(&dl->dylib.current_version, isBig);
|
|
|
|
f->compatVersion = read32(&dl->dylib.compatibility_version, isBig);
|
2014-08-14 07:55:41 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case LC_DATA_IN_CODE: {
|
2014-07-25 07:06:56 +08:00
|
|
|
const linkedit_data_command *ldc =
|
|
|
|
reinterpret_cast<const linkedit_data_command*>(lc);
|
2014-10-28 06:48:35 +08:00
|
|
|
dataInCode = reinterpret_cast<const data_in_code_entry *>(
|
|
|
|
start + read32(&ldc->dataoff, isBig));
|
|
|
|
dataInCodeSize = read32(&ldc->datasize, isBig);
|
2014-08-14 07:55:41 +08:00
|
|
|
}
|
2014-09-04 03:52:50 +08:00
|
|
|
break;
|
2014-08-14 07:55:41 +08:00
|
|
|
case LC_LOAD_DYLIB:
|
|
|
|
case LC_LOAD_WEAK_DYLIB:
|
|
|
|
case LC_REEXPORT_DYLIB:
|
|
|
|
case LC_LOAD_UPWARD_DYLIB: {
|
|
|
|
const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc);
|
|
|
|
DependentDylib entry;
|
2014-10-28 06:48:35 +08:00
|
|
|
entry.path = lc + read32(&dl->dylib.name, isBig);
|
2014-08-14 07:55:41 +08:00
|
|
|
entry.kind = LoadCommandType(cmd);
|
2014-11-19 10:21:53 +08:00
|
|
|
entry.compatVersion = read32(&dl->dylib.compatibility_version, isBig);
|
|
|
|
entry.currentVersion = read32(&dl->dylib.current_version, isBig);
|
2014-08-14 07:55:41 +08:00
|
|
|
f->dependentDylibs.push_back(entry);
|
2014-11-19 10:21:53 +08:00
|
|
|
}
|
2014-08-14 07:55:41 +08:00
|
|
|
break;
|
2014-12-19 05:33:38 +08:00
|
|
|
case LC_RPATH: {
|
|
|
|
const rpath_command *rpc = reinterpret_cast<const rpath_command *>(lc);
|
|
|
|
f->rpaths.push_back(lc + read32(&rpc->path, isBig));
|
|
|
|
}
|
|
|
|
break;
|
2014-09-04 03:52:50 +08:00
|
|
|
case LC_DYLD_INFO:
|
|
|
|
case LC_DYLD_INFO_ONLY:
|
|
|
|
dyldInfo = reinterpret_cast<const dyld_info_command*>(lc);
|
|
|
|
break;
|
2016-02-04 10:16:08 +08:00
|
|
|
case LC_VERSION_MIN_MACOSX:
|
|
|
|
case LC_VERSION_MIN_IPHONEOS:
|
|
|
|
case LC_VERSION_MIN_WATCHOS:
|
|
|
|
case LC_VERSION_MIN_TVOS:
|
|
|
|
// If we are emitting an object file, then we may take the load command
|
|
|
|
// kind from these commands and pass it on to the output
|
|
|
|
// file.
|
|
|
|
f->minOSVersionKind = (LoadCommandType)cmd;
|
|
|
|
break;
|
2014-07-01 16:15:41 +08:00
|
|
|
}
|
2013-11-07 05:36:55 +08:00
|
|
|
return false;
|
|
|
|
});
|
2014-01-27 11:09:26 +08:00
|
|
|
if (ec)
|
2016-03-31 08:08:16 +08:00
|
|
|
return std::move(ec);
|
2013-11-07 05:36:55 +08:00
|
|
|
|
2014-07-25 07:06:56 +08:00
|
|
|
if (dataInCode) {
|
|
|
|
// Convert on-disk data_in_code_entry array to DataInCode vector.
|
|
|
|
for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) {
|
|
|
|
DataInCode entry;
|
2014-10-28 06:48:35 +08:00
|
|
|
entry.offset = read32(&dataInCode[i].offset, isBig);
|
|
|
|
entry.length = read16(&dataInCode[i].length, isBig);
|
|
|
|
entry.kind =
|
2014-11-14 15:15:43 +08:00
|
|
|
(DataRegionType)read16((const uint8_t *)&dataInCode[i].kind, isBig);
|
2014-07-25 07:06:56 +08:00
|
|
|
f->dataInCode.push_back(entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-04 03:52:50 +08:00
|
|
|
if (dyldInfo) {
|
|
|
|
// If any exports, extract and add to normalized exportInfo vector.
|
|
|
|
if (dyldInfo->export_size) {
|
2017-07-07 23:20:17 +08:00
|
|
|
const uint8_t *trieStart = reinterpret_cast<const uint8_t *>(
|
|
|
|
start + read32(&dyldInfo->export_off, isBig));
|
|
|
|
ArrayRef<uint8_t> trie(trieStart, read32(&dyldInfo->export_size, isBig));
|
2017-07-21 07:09:19 +08:00
|
|
|
Error Err = Error::success();
|
|
|
|
for (const ExportEntry &trieExport : MachOObjectFile::exports(Err, trie)) {
|
2014-09-04 03:52:50 +08:00
|
|
|
Export normExport;
|
|
|
|
normExport.name = trieExport.name().copy(f->ownedAllocations);
|
|
|
|
normExport.offset = trieExport.address();
|
|
|
|
normExport.kind = ExportSymbolKind(trieExport.flags() & EXPORT_SYMBOL_FLAGS_KIND_MASK);
|
|
|
|
normExport.flags = trieExport.flags() & ~EXPORT_SYMBOL_FLAGS_KIND_MASK;
|
|
|
|
normExport.otherOffset = trieExport.other();
|
|
|
|
if (!trieExport.otherName().empty())
|
|
|
|
normExport.otherName = trieExport.otherName().copy(f->ownedAllocations);
|
|
|
|
f->exportInfo.push_back(normExport);
|
|
|
|
}
|
2017-07-21 07:09:19 +08:00
|
|
|
if (Err)
|
|
|
|
return std::move(Err);
|
2014-09-04 03:52:50 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
return std::move(f);
|
|
|
|
}
|
|
|
|
|
Separate file parsing from File's constructors.
This is a second patch for InputGraph cleanup.
Sorry about the size of the patch, but what I did in this
patch is basically moving code from constructor to a new
method, parse(), so the amount of new code is small.
This has no change in functionality.
We've discussed the issue that we have too many classes
to represent a concept of "file". We have File subclasses
that represent files read from disk. In addition to that,
we have bunch of InputElement subclasses (that are part
of InputGraph) that represent command line arguments for
input file names. InputElement is a wrapper for File.
InputElement has parseFile method. The method instantiates
a File. The File's constructor reads a file from disk and
parses that.
Because parseFile method is called from multiple worker
threads, file parsing is processed in parallel. In other
words, one reason why we needed the wrapper classes is
because a File would start reading a file as soon as it
is instantiated.
So, the reason why we have too many classes here is at
least partly because of the design flaw of File class.
Just like threads in a good threading library, we need
to separate instantiation from "start" method, so that
we can instantiate File objects when we need them (which
should be very fast because it involves only one mmap()
and no real file IO) and use them directly instead of
the wrapper classes. Later, we call parse() on each
file in parallel to let them do actual file IO.
In this design, we can eliminate a reason to have the
wrapper classes.
In order to minimize the size of the patch, I didn't go so
far as to replace the wrapper classes with File classes.
The wrapper classes are still there.
In this patch, we call parse() immediately after
instantiating a File, so this really has no change in
functionality. Eventually the call of parse() should be
moved to Driver::link(). That'll be done in another patch.
llvm-svn: 224102
2014-12-12 15:31:09 +08:00
|
|
|
class MachOObjectReader : public Reader {
|
2014-05-30 09:13:49 +08:00
|
|
|
public:
|
Separate file parsing from File's constructors.
This is a second patch for InputGraph cleanup.
Sorry about the size of the patch, but what I did in this
patch is basically moving code from constructor to a new
method, parse(), so the amount of new code is small.
This has no change in functionality.
We've discussed the issue that we have too many classes
to represent a concept of "file". We have File subclasses
that represent files read from disk. In addition to that,
we have bunch of InputElement subclasses (that are part
of InputGraph) that represent command line arguments for
input file names. InputElement is a wrapper for File.
InputElement has parseFile method. The method instantiates
a File. The File's constructor reads a file from disk and
parses that.
Because parseFile method is called from multiple worker
threads, file parsing is processed in parallel. In other
words, one reason why we needed the wrapper classes is
because a File would start reading a file as soon as it
is instantiated.
So, the reason why we have too many classes here is at
least partly because of the design flaw of File class.
Just like threads in a good threading library, we need
to separate instantiation from "start" method, so that
we can instantiate File objects when we need them (which
should be very fast because it involves only one mmap()
and no real file IO) and use them directly instead of
the wrapper classes. Later, we call parse() on each
file in parallel to let them do actual file IO.
In this design, we can eliminate a reason to have the
wrapper classes.
In order to minimize the size of the patch, I didn't go so
far as to replace the wrapper classes with File classes.
The wrapper classes are still there.
In this patch, we call parse() immediately after
instantiating a File, so this really has no change in
functionality. Eventually the call of parse() should be
moved to Driver::link(). That'll be done in another patch.
llvm-svn: 224102
2014-12-12 15:31:09 +08:00
|
|
|
MachOObjectReader(MachOLinkingContext &ctx) : _ctx(ctx) {}
|
2014-05-30 09:13:49 +08:00
|
|
|
|
2015-04-25 05:10:50 +08:00
|
|
|
bool canParse(file_magic magic, MemoryBufferRef mb) const override {
|
2017-06-07 11:48:56 +08:00
|
|
|
return (magic == file_magic::macho_object && mb.getBufferSize() > 32);
|
2014-05-30 09:13:49 +08:00
|
|
|
}
|
|
|
|
|
2015-04-25 02:33:50 +08:00
|
|
|
ErrorOr<std::unique_ptr<File>>
|
|
|
|
loadFile(std::unique_ptr<MemoryBuffer> mb,
|
|
|
|
const Registry ®istry) const override {
|
|
|
|
std::unique_ptr<File> ret =
|
2019-08-15 06:28:17 +08:00
|
|
|
std::make_unique<MachOFile>(std::move(mb), &_ctx);
|
2015-04-25 02:33:50 +08:00
|
|
|
return std::move(ret);
|
Separate file parsing from File's constructors.
This is a second patch for InputGraph cleanup.
Sorry about the size of the patch, but what I did in this
patch is basically moving code from constructor to a new
method, parse(), so the amount of new code is small.
This has no change in functionality.
We've discussed the issue that we have too many classes
to represent a concept of "file". We have File subclasses
that represent files read from disk. In addition to that,
we have bunch of InputElement subclasses (that are part
of InputGraph) that represent command line arguments for
input file names. InputElement is a wrapper for File.
InputElement has parseFile method. The method instantiates
a File. The File's constructor reads a file from disk and
parses that.
Because parseFile method is called from multiple worker
threads, file parsing is processed in parallel. In other
words, one reason why we needed the wrapper classes is
because a File would start reading a file as soon as it
is instantiated.
So, the reason why we have too many classes here is at
least partly because of the design flaw of File class.
Just like threads in a good threading library, we need
to separate instantiation from "start" method, so that
we can instantiate File objects when we need them (which
should be very fast because it involves only one mmap()
and no real file IO) and use them directly instead of
the wrapper classes. Later, we call parse() on each
file in parallel to let them do actual file IO.
In this design, we can eliminate a reason to have the
wrapper classes.
In order to minimize the size of the patch, I didn't go so
far as to replace the wrapper classes with File classes.
The wrapper classes are still there.
In this patch, we call parse() immediately after
instantiating a File, so this really has no change in
functionality. Eventually the call of parse() should be
moved to Driver::link(). That'll be done in another patch.
llvm-svn: 224102
2014-12-12 15:31:09 +08:00
|
|
|
}
|
2014-05-30 09:13:49 +08:00
|
|
|
|
Separate file parsing from File's constructors.
This is a second patch for InputGraph cleanup.
Sorry about the size of the patch, but what I did in this
patch is basically moving code from constructor to a new
method, parse(), so the amount of new code is small.
This has no change in functionality.
We've discussed the issue that we have too many classes
to represent a concept of "file". We have File subclasses
that represent files read from disk. In addition to that,
we have bunch of InputElement subclasses (that are part
of InputGraph) that represent command line arguments for
input file names. InputElement is a wrapper for File.
InputElement has parseFile method. The method instantiates
a File. The File's constructor reads a file from disk and
parses that.
Because parseFile method is called from multiple worker
threads, file parsing is processed in parallel. In other
words, one reason why we needed the wrapper classes is
because a File would start reading a file as soon as it
is instantiated.
So, the reason why we have too many classes here is at
least partly because of the design flaw of File class.
Just like threads in a good threading library, we need
to separate instantiation from "start" method, so that
we can instantiate File objects when we need them (which
should be very fast because it involves only one mmap()
and no real file IO) and use them directly instead of
the wrapper classes. Later, we call parse() on each
file in parallel to let them do actual file IO.
In this design, we can eliminate a reason to have the
wrapper classes.
In order to minimize the size of the patch, I didn't go so
far as to replace the wrapper classes with File classes.
The wrapper classes are still there.
In this patch, we call parse() immediately after
instantiating a File, so this really has no change in
functionality. Eventually the call of parse() should be
moved to Driver::link(). That'll be done in another patch.
llvm-svn: 224102
2014-12-12 15:31:09 +08:00
|
|
|
private:
|
|
|
|
MachOLinkingContext &_ctx;
|
|
|
|
};
|
2014-05-30 09:13:49 +08:00
|
|
|
|
Separate file parsing from File's constructors.
This is a second patch for InputGraph cleanup.
Sorry about the size of the patch, but what I did in this
patch is basically moving code from constructor to a new
method, parse(), so the amount of new code is small.
This has no change in functionality.
We've discussed the issue that we have too many classes
to represent a concept of "file". We have File subclasses
that represent files read from disk. In addition to that,
we have bunch of InputElement subclasses (that are part
of InputGraph) that represent command line arguments for
input file names. InputElement is a wrapper for File.
InputElement has parseFile method. The method instantiates
a File. The File's constructor reads a file from disk and
parses that.
Because parseFile method is called from multiple worker
threads, file parsing is processed in parallel. In other
words, one reason why we needed the wrapper classes is
because a File would start reading a file as soon as it
is instantiated.
So, the reason why we have too many classes here is at
least partly because of the design flaw of File class.
Just like threads in a good threading library, we need
to separate instantiation from "start" method, so that
we can instantiate File objects when we need them (which
should be very fast because it involves only one mmap()
and no real file IO) and use them directly instead of
the wrapper classes. Later, we call parse() on each
file in parallel to let them do actual file IO.
In this design, we can eliminate a reason to have the
wrapper classes.
In order to minimize the size of the patch, I didn't go so
far as to replace the wrapper classes with File classes.
The wrapper classes are still there.
In this patch, we call parse() immediately after
instantiating a File, so this really has no change in
functionality. Eventually the call of parse() should be
moved to Driver::link(). That'll be done in another patch.
llvm-svn: 224102
2014-12-12 15:31:09 +08:00
|
|
|
class MachODylibReader : public Reader {
|
|
|
|
public:
|
|
|
|
MachODylibReader(MachOLinkingContext &ctx) : _ctx(ctx) {}
|
|
|
|
|
2015-04-25 05:10:50 +08:00
|
|
|
bool canParse(file_magic magic, MemoryBufferRef mb) const override {
|
Separate file parsing from File's constructors.
This is a second patch for InputGraph cleanup.
Sorry about the size of the patch, but what I did in this
patch is basically moving code from constructor to a new
method, parse(), so the amount of new code is small.
This has no change in functionality.
We've discussed the issue that we have too many classes
to represent a concept of "file". We have File subclasses
that represent files read from disk. In addition to that,
we have bunch of InputElement subclasses (that are part
of InputGraph) that represent command line arguments for
input file names. InputElement is a wrapper for File.
InputElement has parseFile method. The method instantiates
a File. The File's constructor reads a file from disk and
parses that.
Because parseFile method is called from multiple worker
threads, file parsing is processed in parallel. In other
words, one reason why we needed the wrapper classes is
because a File would start reading a file as soon as it
is instantiated.
So, the reason why we have too many classes here is at
least partly because of the design flaw of File class.
Just like threads in a good threading library, we need
to separate instantiation from "start" method, so that
we can instantiate File objects when we need them (which
should be very fast because it involves only one mmap()
and no real file IO) and use them directly instead of
the wrapper classes. Later, we call parse() on each
file in parallel to let them do actual file IO.
In this design, we can eliminate a reason to have the
wrapper classes.
In order to minimize the size of the patch, I didn't go so
far as to replace the wrapper classes with File classes.
The wrapper classes are still there.
In this patch, we call parse() immediately after
instantiating a File, so this really has no change in
functionality. Eventually the call of parse() should be
moved to Driver::link(). That'll be done in another patch.
llvm-svn: 224102
2014-12-12 15:31:09 +08:00
|
|
|
switch (magic) {
|
2017-06-07 11:48:56 +08:00
|
|
|
case file_magic::macho_dynamically_linked_shared_lib:
|
|
|
|
case file_magic::macho_dynamically_linked_shared_lib_stub:
|
2015-04-04 10:44:36 +08:00
|
|
|
return mb.getBufferSize() > 32;
|
Separate file parsing from File's constructors.
This is a second patch for InputGraph cleanup.
Sorry about the size of the patch, but what I did in this
patch is basically moving code from constructor to a new
method, parse(), so the amount of new code is small.
This has no change in functionality.
We've discussed the issue that we have too many classes
to represent a concept of "file". We have File subclasses
that represent files read from disk. In addition to that,
we have bunch of InputElement subclasses (that are part
of InputGraph) that represent command line arguments for
input file names. InputElement is a wrapper for File.
InputElement has parseFile method. The method instantiates
a File. The File's constructor reads a file from disk and
parses that.
Because parseFile method is called from multiple worker
threads, file parsing is processed in parallel. In other
words, one reason why we needed the wrapper classes is
because a File would start reading a file as soon as it
is instantiated.
So, the reason why we have too many classes here is at
least partly because of the design flaw of File class.
Just like threads in a good threading library, we need
to separate instantiation from "start" method, so that
we can instantiate File objects when we need them (which
should be very fast because it involves only one mmap()
and no real file IO) and use them directly instead of
the wrapper classes. Later, we call parse() on each
file in parallel to let them do actual file IO.
In this design, we can eliminate a reason to have the
wrapper classes.
In order to minimize the size of the patch, I didn't go so
far as to replace the wrapper classes with File classes.
The wrapper classes are still there.
In this patch, we call parse() immediately after
instantiating a File, so this really has no change in
functionality. Eventually the call of parse() should be
moved to Driver::link(). That'll be done in another patch.
llvm-svn: 224102
2014-12-12 15:31:09 +08:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-25 02:33:50 +08:00
|
|
|
ErrorOr<std::unique_ptr<File>>
|
|
|
|
loadFile(std::unique_ptr<MemoryBuffer> mb,
|
|
|
|
const Registry ®istry) const override {
|
|
|
|
std::unique_ptr<File> ret =
|
2019-08-15 06:28:17 +08:00
|
|
|
std::make_unique<MachODylibFile>(std::move(mb), &_ctx);
|
2015-04-25 02:33:50 +08:00
|
|
|
return std::move(ret);
|
2014-05-30 09:13:49 +08:00
|
|
|
}
|
Separate file parsing from File's constructors.
This is a second patch for InputGraph cleanup.
Sorry about the size of the patch, but what I did in this
patch is basically moving code from constructor to a new
method, parse(), so the amount of new code is small.
This has no change in functionality.
We've discussed the issue that we have too many classes
to represent a concept of "file". We have File subclasses
that represent files read from disk. In addition to that,
we have bunch of InputElement subclasses (that are part
of InputGraph) that represent command line arguments for
input file names. InputElement is a wrapper for File.
InputElement has parseFile method. The method instantiates
a File. The File's constructor reads a file from disk and
parses that.
Because parseFile method is called from multiple worker
threads, file parsing is processed in parallel. In other
words, one reason why we needed the wrapper classes is
because a File would start reading a file as soon as it
is instantiated.
So, the reason why we have too many classes here is at
least partly because of the design flaw of File class.
Just like threads in a good threading library, we need
to separate instantiation from "start" method, so that
we can instantiate File objects when we need them (which
should be very fast because it involves only one mmap()
and no real file IO) and use them directly instead of
the wrapper classes. Later, we call parse() on each
file in parallel to let them do actual file IO.
In this design, we can eliminate a reason to have the
wrapper classes.
In order to minimize the size of the patch, I didn't go so
far as to replace the wrapper classes with File classes.
The wrapper classes are still there.
In this patch, we call parse() immediately after
instantiating a File, so this really has no change in
functionality. Eventually the call of parse() should be
moved to Driver::link(). That'll be done in another patch.
llvm-svn: 224102
2014-12-12 15:31:09 +08:00
|
|
|
|
2014-05-30 09:13:49 +08:00
|
|
|
private:
|
2014-08-14 07:55:41 +08:00
|
|
|
MachOLinkingContext &_ctx;
|
2014-05-30 09:13:49 +08:00
|
|
|
};
|
|
|
|
|
2020-07-02 12:02:09 +08:00
|
|
|
class MachOTAPIReader : public Reader {
|
|
|
|
public:
|
|
|
|
MachOTAPIReader(MachOLinkingContext &ctx) : _ctx(ctx) {}
|
|
|
|
|
|
|
|
bool canParse(file_magic magic, MemoryBufferRef mb) const override {
|
|
|
|
return magic == file_magic::tapi_file;
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorOr<std::unique_ptr<File>>
|
|
|
|
loadFile(std::unique_ptr<MemoryBuffer> mb,
|
|
|
|
const Registry ®istry) const override {
|
|
|
|
std::unique_ptr<File> ret =
|
|
|
|
std::make_unique<TAPIFile>(std::move(mb), &_ctx);
|
|
|
|
return std::move(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
MachOLinkingContext &_ctx;
|
|
|
|
};
|
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
} // namespace normalized
|
|
|
|
} // namespace mach_o
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
|
2014-08-14 07:55:41 +08:00
|
|
|
void Registry::addSupportMachOObjects(MachOLinkingContext &ctx) {
|
2014-07-17 03:49:02 +08:00
|
|
|
MachOLinkingContext::Arch arch = ctx.arch();
|
Separate file parsing from File's constructors.
This is a second patch for InputGraph cleanup.
Sorry about the size of the patch, but what I did in this
patch is basically moving code from constructor to a new
method, parse(), so the amount of new code is small.
This has no change in functionality.
We've discussed the issue that we have too many classes
to represent a concept of "file". We have File subclasses
that represent files read from disk. In addition to that,
we have bunch of InputElement subclasses (that are part
of InputGraph) that represent command line arguments for
input file names. InputElement is a wrapper for File.
InputElement has parseFile method. The method instantiates
a File. The File's constructor reads a file from disk and
parses that.
Because parseFile method is called from multiple worker
threads, file parsing is processed in parallel. In other
words, one reason why we needed the wrapper classes is
because a File would start reading a file as soon as it
is instantiated.
So, the reason why we have too many classes here is at
least partly because of the design flaw of File class.
Just like threads in a good threading library, we need
to separate instantiation from "start" method, so that
we can instantiate File objects when we need them (which
should be very fast because it involves only one mmap()
and no real file IO) and use them directly instead of
the wrapper classes. Later, we call parse() on each
file in parallel to let them do actual file IO.
In this design, we can eliminate a reason to have the
wrapper classes.
In order to minimize the size of the patch, I didn't go so
far as to replace the wrapper classes with File classes.
The wrapper classes are still there.
In this patch, we call parse() immediately after
instantiating a File, so this really has no change in
functionality. Eventually the call of parse() should be
moved to Driver::link(). That'll be done in another patch.
llvm-svn: 224102
2014-12-12 15:31:09 +08:00
|
|
|
add(std::unique_ptr<Reader>(new mach_o::normalized::MachOObjectReader(ctx)));
|
|
|
|
add(std::unique_ptr<Reader>(new mach_o::normalized::MachODylibReader(ctx)));
|
2020-07-02 12:02:09 +08:00
|
|
|
add(std::unique_ptr<Reader>(new mach_o::normalized::MachOTAPIReader(ctx)));
|
2015-02-23 07:54:38 +08:00
|
|
|
addKindTable(Reference::KindNamespace::mach_o, ctx.archHandler().kindArch(),
|
2014-07-17 03:49:02 +08:00
|
|
|
ctx.archHandler().kindStrings());
|
2014-01-11 09:07:43 +08:00
|
|
|
add(std::unique_ptr<YamlIOTaggedDocumentHandler>(
|
2014-07-01 06:57:33 +08:00
|
|
|
new mach_o::MachOYamlIOTaggedDocumentHandler(arch)));
|
[lld] Introduce registry and Reference kind tuple
The main changes are in:
include/lld/Core/Reference.h
include/lld/ReaderWriter/Reader.h
Everything else is details to support the main change.
1) Registration based Readers
Previously, lld had a tangled interdependency with all the Readers. It would
have been impossible to make a streamlined linker (say for a JIT) which
just supported one file format and one architecture (no yaml, no archives, etc).
The old model also required a LinkingContext to read an object file, which
would have made .o inspection tools awkward.
The new model is that there is a global Registry object. You programmatically
register the Readers you want with the registry object. Whenever you need to
read/parse a file, you ask the registry to do it, and the registry tries each
registered reader.
For ease of use with the existing lld code base, there is one Registry
object inside the LinkingContext object.
2) Changing kind value to be a tuple
Beside Readers, the registry also keeps track of the mapping for Reference
Kind values to and from strings. Along with that, this patch also fixes
an ambiguity with the previous Reference::Kind values. The problem was that
we wanted to reuse existing relocation type values as Reference::Kind values.
But then how can the YAML write know how to convert a value to a string? The
fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace
(e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and
a 16-bit value. This tuple system allows conversion to and from strings with
no ambiguities.
llvm-svn: 197727
2013-12-20 05:58:00 +08:00
|
|
|
}
|
|
|
|
|
2014-10-08 09:48:10 +08:00
|
|
|
|
2013-11-07 05:36:55 +08:00
|
|
|
} // namespace lld
|