llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryRe...

577 lines
21 KiB
C++
Raw Normal View History

//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file For mach-o object files, this implementation converts from
/// mach-o on-disk binary format to in-memory normalized mach-o.
///
/// +---------------+
/// | binary mach-o |
/// +---------------+
/// |
/// |
/// v
/// +------------+
/// | normalized |
/// +------------+
#include "MachONormalizedFile.h"
#include "ArchHandler.h"
#include "MachONormalizedFileBinaryUtils.h"
#include "lld/Core/Error.h"
#include "lld/Core/LLVM.h"
#include "lld/Core/SharedLibraryFile.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Object/MachO.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MachO.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <functional>
#include <system_error>
using namespace llvm::MachO;
using llvm::object::ExportEntry;
using llvm::object::MachOObjectFile;
namespace lld {
namespace mach_o {
namespace normalized {
// Utility to call a lambda expression on each load command.
static std::error_code forEachLoadCommand(
StringRef lcRange, unsigned lcCount, bool isBig, bool is64,
std::function<bool(uint32_t cmd, uint32_t size, const char *lc)> func) {
const char* p = lcRange.begin();
for (unsigned i=0; i < lcCount; ++i) {
const load_command *lc = reinterpret_cast<const load_command*>(p);
load_command lcCopy;
const load_command *slc = lc;
if (isBig != llvm::sys::IsBigEndianHost) {
memcpy(&lcCopy, lc, sizeof(load_command));
swapStruct(lcCopy);
slc = &lcCopy;
}
if ( (p + slc->cmdsize) > lcRange.end() )
return make_error_code(llvm::errc::executable_format_error);
if (func(slc->cmd, slc->cmdsize, p))
return std::error_code();
p += slc->cmdsize;
}
return std::error_code();
}
static std::error_code appendRelocations(Relocations &relocs, StringRef buffer,
bool bigEndian,
uint32_t reloff, uint32_t nreloc) {
if ((reloff + nreloc*8) > buffer.size())
return make_error_code(llvm::errc::executable_format_error);
const any_relocation_info* relocsArray =
reinterpret_cast<const any_relocation_info*>(buffer.begin()+reloff);
for(uint32_t i=0; i < nreloc; ++i) {
relocs.push_back(unpackRelocation(relocsArray[i], bigEndian));
}
return std::error_code();
}
static std::error_code
appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool isBig,
uint32_t istOffset, uint32_t istCount,
uint32_t startIndex, uint32_t count) {
if ((istOffset + istCount*4) > buffer.size())
return make_error_code(llvm::errc::executable_format_error);
if (startIndex+count > istCount)
return make_error_code(llvm::errc::executable_format_error);
const uint8_t *indirectSymbolArray = (const uint8_t *)buffer.data();
for(uint32_t i=0; i < count; ++i) {
isyms.push_back(read32(
indirectSymbolArray + (startIndex + i) * sizeof(uint32_t), isBig));
}
return std::error_code();
}
template <typename T> static T readBigEndian(T t) {
if (llvm::sys::IsLittleEndianHost)
llvm::sys::swapByteOrder(t);
return t;
}
static bool isMachOHeader(const mach_header *mh, bool &is64, bool &isBig) {
switch (read32(&mh->magic, false)) {
case llvm::MachO::MH_MAGIC:
is64 = false;
isBig = false;
return true;
case llvm::MachO::MH_MAGIC_64:
is64 = true;
isBig = false;
return true;
case llvm::MachO::MH_CIGAM:
is64 = false;
isBig = true;
return true;
case llvm::MachO::MH_CIGAM_64:
is64 = true;
isBig = true;
return true;
default:
return false;
}
}
bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch) {
// Try opening and mapping file at path.
ErrorOr<std::unique_ptr<MemoryBuffer>> b = MemoryBuffer::getFileOrSTDIN(path);
if (b.getError())
return false;
// If file length < 32 it is too small to be mach-o object file.
StringRef fileBuffer = b->get()->getBuffer();
if (fileBuffer.size() < 32)
return false;
// If file buffer does not start with MH_MAGIC (and variants), not obj file.
const mach_header *mh = reinterpret_cast<const mach_header *>(
fileBuffer.begin());
bool is64, isBig;
if (!isMachOHeader(mh, is64, isBig))
return false;
// If not MH_OBJECT, not object file.
if (read32(&mh->filetype, isBig) != MH_OBJECT)
return false;
// Lookup up arch from cpu/subtype pair.
arch = MachOLinkingContext::archFromCpuType(
read32(&mh->cputype, isBig),
read32(&mh->cpusubtype, isBig));
return true;
}
bool sliceFromFatFile(const MemoryBuffer &mb, MachOLinkingContext::Arch arch,
uint32_t &offset, uint32_t &size) {
const char *start = mb.getBufferStart();
const llvm::MachO::fat_header *fh =
reinterpret_cast<const llvm::MachO::fat_header *>(start);
if (readBigEndian(fh->magic) != llvm::MachO::FAT_MAGIC)
return false;
uint32_t nfat_arch = readBigEndian(fh->nfat_arch);
const fat_arch *fstart =
reinterpret_cast<const fat_arch *>(start + sizeof(fat_header));
const fat_arch *fend =
reinterpret_cast<const fat_arch *>(start + sizeof(fat_header) +
sizeof(fat_arch) * nfat_arch);
const uint32_t reqCpuType = MachOLinkingContext::cpuTypeFromArch(arch);
const uint32_t reqCpuSubtype = MachOLinkingContext::cpuSubtypeFromArch(arch);
for (const fat_arch *fa = fstart; fa < fend; ++fa) {
if ((readBigEndian(fa->cputype) == reqCpuType) &&
(readBigEndian(fa->cpusubtype) == reqCpuSubtype)) {
offset = readBigEndian(fa->offset);
size = readBigEndian(fa->size);
if ((offset + size) > mb.getBufferSize())
return false;
return true;
}
}
return false;
}
/// Reads a mach-o file and produces an in-memory normalized view.
ErrorOr<std::unique_ptr<NormalizedFile>>
readBinary(std::unique_ptr<MemoryBuffer> &mb,
const MachOLinkingContext::Arch arch) {
// Make empty NormalizedFile.
std::unique_ptr<NormalizedFile> f(new NormalizedFile());
const char *start = mb->getBufferStart();
size_t objSize = mb->getBufferSize();
const mach_header *mh = reinterpret_cast<const mach_header *>(start);
uint32_t sliceOffset;
uint32_t sliceSize;
if (sliceFromFatFile(*mb, arch, sliceOffset, sliceSize)) {
start = &start[sliceOffset];
objSize = sliceSize;
mh = reinterpret_cast<const mach_header *>(start);
}
// Determine endianness and pointer size for mach-o file.
bool is64, isBig;
if (!isMachOHeader(mh, is64, isBig))
return make_error_code(llvm::errc::executable_format_error);
// Endian swap header, if needed.
mach_header headerCopy;
const mach_header *smh = mh;
if (isBig != llvm::sys::IsBigEndianHost) {
memcpy(&headerCopy, mh, sizeof(mach_header));
swapStruct(headerCopy);
smh = &headerCopy;
}
// Validate head and load commands fit in buffer.
const uint32_t lcCount = smh->ncmds;
const char *lcStart =
start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header));
StringRef lcRange(lcStart, smh->sizeofcmds);
if (lcRange.end() > (start + objSize))
return make_error_code(llvm::errc::executable_format_error);
// Get architecture from mach_header.
f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype);
if (f->arch != arch) {
return make_dynamic_error_code(Twine("file is wrong architecture. Expected "
"(" + MachOLinkingContext::nameFromArch(arch)
+ ") found ("
+ MachOLinkingContext::nameFromArch(f->arch)
+ ")" ));
}
// Copy file type and flags
f->fileType = HeaderFileType(smh->filetype);
f->flags = smh->flags;
// Pre-scan load commands looking for indirect symbol table.
uint32_t indirectSymbolTableOffset = 0;
uint32_t indirectSymbolTableCount = 0;
std::error_code ec = forEachLoadCommand(lcRange, lcCount, isBig, is64,
[&](uint32_t cmd, uint32_t size,
const char *lc) -> bool {
if (cmd == LC_DYSYMTAB) {
const dysymtab_command *d = reinterpret_cast<const dysymtab_command*>(lc);
indirectSymbolTableOffset = read32(&d->indirectsymoff, isBig);
indirectSymbolTableCount = read32(&d->nindirectsyms, isBig);
return true;
}
return false;
});
if (ec)
return ec;
// Walk load commands looking for segments/sections and the symbol table.
const data_in_code_entry *dataInCode = nullptr;
const dyld_info_command *dyldInfo = nullptr;
uint32_t dataInCodeSize = 0;
ec = forEachLoadCommand(lcRange, lcCount, isBig, is64,
[&] (uint32_t cmd, uint32_t size, const char* lc) -> bool {
switch(cmd) {
case LC_SEGMENT_64:
if (is64) {
const segment_command_64 *seg =
reinterpret_cast<const segment_command_64*>(lc);
const unsigned sectionCount = read32(&seg->nsects, isBig);
const section_64 *sects = reinterpret_cast<const section_64*>
(lc + sizeof(segment_command_64));
const unsigned lcSize = sizeof(segment_command_64)
+ sectionCount*sizeof(section_64);
// Verify sections don't extend beyond end of segment load command.
if (lcSize > size)
return true;
for (unsigned i=0; i < sectionCount; ++i) {
const section_64 *sect = &sects[i];
Section section;
section.segmentName = getString16(sect->segname);
section.sectionName = getString16(sect->sectname);
section.type = (SectionType)(read32(&sect->flags, isBig) &
SECTION_TYPE);
section.attributes = read32(&sect->flags, isBig) & SECTION_ATTRIBUTES;
section.alignment = read32(&sect->align, isBig);
section.address = read64(&sect->addr, isBig);
const uint8_t *content =
(const uint8_t *)start + read32(&sect->offset, isBig);
size_t contentSize = read64(&sect->size, isBig);
// Note: this assign() is copying the content bytes. Ideally,
// we can use a custom allocator for vector to avoid the copy.
section.content = llvm::makeArrayRef(content, contentSize);
appendRelocations(section.relocations, mb->getBuffer(), isBig,
read32(&sect->reloff, isBig),
read32(&sect->nreloc, isBig));
if (section.type == S_NON_LAZY_SYMBOL_POINTERS) {
appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(),
isBig,
indirectSymbolTableOffset,
indirectSymbolTableCount,
read32(&sect->reserved1, isBig),
contentSize/4);
}
f->sections.push_back(section);
}
}
break;
case LC_SEGMENT:
if (!is64) {
const segment_command *seg =
reinterpret_cast<const segment_command*>(lc);
const unsigned sectionCount = read32(&seg->nsects, isBig);
const section *sects = reinterpret_cast<const section*>
(lc + sizeof(segment_command));
const unsigned lcSize = sizeof(segment_command)
+ sectionCount*sizeof(section);
// Verify sections don't extend beyond end of segment load command.
if (lcSize > size)
return true;
for (unsigned i=0; i < sectionCount; ++i) {
const section *sect = &sects[i];
Section section;
section.segmentName = getString16(sect->segname);
section.sectionName = getString16(sect->sectname);
section.type = (SectionType)(read32(&sect->flags, isBig) &
SECTION_TYPE);
section.attributes =
read32((const uint8_t *)&sect->flags, isBig) & SECTION_ATTRIBUTES;
section.alignment = read32(&sect->align, isBig);
section.address = read32(&sect->addr, isBig);
const uint8_t *content =
(const uint8_t *)start + read32(&sect->offset, isBig);
size_t contentSize = read32(&sect->size, isBig);
// Note: this assign() is copying the content bytes. Ideally,
// we can use a custom allocator for vector to avoid the copy.
section.content = llvm::makeArrayRef(content, contentSize);
appendRelocations(section.relocations, mb->getBuffer(), isBig,
read32(&sect->reloff, isBig),
read32(&sect->nreloc, isBig));
if (section.type == S_NON_LAZY_SYMBOL_POINTERS) {
appendIndirectSymbols(
section.indirectSymbols, mb->getBuffer(), isBig,
indirectSymbolTableOffset, indirectSymbolTableCount,
read32(&sect->reserved1, isBig), contentSize / 4);
}
f->sections.push_back(section);
}
}
break;
case LC_SYMTAB: {
const symtab_command *st = reinterpret_cast<const symtab_command*>(lc);
const char *strings = start + read32(&st->stroff, isBig);
const uint32_t strSize = read32(&st->strsize, isBig);
// Validate string pool and symbol table all in buffer.
if (read32((const uint8_t *)&st->stroff, isBig) +
read32((const uint8_t *)&st->strsize, isBig) >
objSize)
return true;
if (is64) {
const uint32_t symOffset = read32(&st->symoff, isBig);
const uint32_t symCount = read32(&st->nsyms, isBig);
if ( symOffset+(symCount*sizeof(nlist_64)) > objSize)
return true;
const nlist_64 *symbols =
reinterpret_cast<const nlist_64 *>(start + symOffset);
// Convert each nlist_64 to a lld::mach_o::normalized::Symbol.
for(uint32_t i=0; i < symCount; ++i) {
const nlist_64 *sin = &symbols[i];
nlist_64 tempSym;
if (isBig != llvm::sys::IsBigEndianHost) {
tempSym = *sin; swapStruct(tempSym); sin = &tempSym;
}
Symbol sout;
if (sin->n_strx > strSize)
return true;
sout.name = &strings[sin->n_strx];
sout.type = (NListType)(sin->n_type & N_TYPE);
sout.scope = (sin->n_type & (N_PEXT|N_EXT));
sout.sect = sin->n_sect;
sout.desc = sin->n_desc;
sout.value = sin->n_value;
if (sout.type == N_UNDF)
f->undefinedSymbols.push_back(sout);
else if (sin->n_type & N_EXT)
f->globalSymbols.push_back(sout);
else
f->localSymbols.push_back(sout);
}
} else {
const uint32_t symOffset = read32(&st->symoff, isBig);
const uint32_t symCount = read32(&st->nsyms, isBig);
if ( symOffset+(symCount*sizeof(nlist)) > objSize)
return true;
const nlist *symbols =
reinterpret_cast<const nlist *>(start + symOffset);
// Convert each nlist to a lld::mach_o::normalized::Symbol.
for(uint32_t i=0; i < symCount; ++i) {
const nlist *sin = &symbols[i];
nlist tempSym;
if (isBig != llvm::sys::IsBigEndianHost) {
tempSym = *sin; swapStruct(tempSym); sin = &tempSym;
}
Symbol sout;
if (sin->n_strx > strSize)
return true;
sout.name = &strings[sin->n_strx];
sout.type = (NListType)(sin->n_type & N_TYPE);
sout.scope = (sin->n_type & (N_PEXT|N_EXT));
sout.sect = sin->n_sect;
sout.desc = sin->n_desc;
sout.value = sin->n_value;
if (sout.type == N_UNDF)
f->undefinedSymbols.push_back(sout);
else if (sout.scope == (SymbolScope)N_EXT)
f->globalSymbols.push_back(sout);
else
f->localSymbols.push_back(sout);
}
}
}
break;
case LC_ID_DYLIB: {
const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc);
f->installName = lc + read32(&dl->dylib.name, isBig);
}
break;
case LC_DATA_IN_CODE: {
const linkedit_data_command *ldc =
reinterpret_cast<const linkedit_data_command*>(lc);
dataInCode = reinterpret_cast<const data_in_code_entry *>(
start + read32(&ldc->dataoff, isBig));
dataInCodeSize = read32(&ldc->datasize, isBig);
}
break;
case LC_LOAD_DYLIB:
case LC_LOAD_WEAK_DYLIB:
case LC_REEXPORT_DYLIB:
case LC_LOAD_UPWARD_DYLIB: {
const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc);
DependentDylib entry;
entry.path = lc + read32(&dl->dylib.name, isBig);
entry.kind = LoadCommandType(cmd);
entry.compatVersion = read32(&dl->dylib.compatibility_version, isBig);
entry.currentVersion = read32(&dl->dylib.current_version, isBig);
f->dependentDylibs.push_back(entry);
}
break;
case LC_DYLD_INFO:
case LC_DYLD_INFO_ONLY:
dyldInfo = reinterpret_cast<const dyld_info_command*>(lc);
break;
}
return false;
});
if (ec)
return ec;
if (dataInCode) {
// Convert on-disk data_in_code_entry array to DataInCode vector.
for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) {
DataInCode entry;
entry.offset = read32(&dataInCode[i].offset, isBig);
entry.length = read16(&dataInCode[i].length, isBig);
entry.kind =
(DataRegionType)read16((const uint8_t *)&dataInCode[i].kind, isBig);
f->dataInCode.push_back(entry);
}
}
if (dyldInfo) {
// If any exports, extract and add to normalized exportInfo vector.
if (dyldInfo->export_size) {
const uint8_t *trieStart = reinterpret_cast<const uint8_t*>(start +
dyldInfo->export_off);
ArrayRef<uint8_t> trie(trieStart, dyldInfo->export_size);
for (const ExportEntry &trieExport : MachOObjectFile::exports(trie)) {
Export normExport;
normExport.name = trieExport.name().copy(f->ownedAllocations);
normExport.offset = trieExport.address();
normExport.kind = ExportSymbolKind(trieExport.flags() & EXPORT_SYMBOL_FLAGS_KIND_MASK);
normExport.flags = trieExport.flags() & ~EXPORT_SYMBOL_FLAGS_KIND_MASK;
normExport.otherOffset = trieExport.other();
if (!trieExport.otherName().empty())
normExport.otherName = trieExport.otherName().copy(f->ownedAllocations);
f->exportInfo.push_back(normExport);
}
}
}
return std::move(f);
}
Separate file parsing from File's constructors. This is a second patch for InputGraph cleanup. Sorry about the size of the patch, but what I did in this patch is basically moving code from constructor to a new method, parse(), so the amount of new code is small. This has no change in functionality. We've discussed the issue that we have too many classes to represent a concept of "file". We have File subclasses that represent files read from disk. In addition to that, we have bunch of InputElement subclasses (that are part of InputGraph) that represent command line arguments for input file names. InputElement is a wrapper for File. InputElement has parseFile method. The method instantiates a File. The File's constructor reads a file from disk and parses that. Because parseFile method is called from multiple worker threads, file parsing is processed in parallel. In other words, one reason why we needed the wrapper classes is because a File would start reading a file as soon as it is instantiated. So, the reason why we have too many classes here is at least partly because of the design flaw of File class. Just like threads in a good threading library, we need to separate instantiation from "start" method, so that we can instantiate File objects when we need them (which should be very fast because it involves only one mmap() and no real file IO) and use them directly instead of the wrapper classes. Later, we call parse() on each file in parallel to let them do actual file IO. In this design, we can eliminate a reason to have the wrapper classes. In order to minimize the size of the patch, I didn't go so far as to replace the wrapper classes with File classes. The wrapper classes are still there. In this patch, we call parse() immediately after instantiating a File, so this really has no change in functionality. Eventually the call of parse() should be moved to Driver::link(). That'll be done in another patch. llvm-svn: 224102
2014-12-12 15:31:09 +08:00
class MachOObjectReader : public Reader {
public:
Separate file parsing from File's constructors. This is a second patch for InputGraph cleanup. Sorry about the size of the patch, but what I did in this patch is basically moving code from constructor to a new method, parse(), so the amount of new code is small. This has no change in functionality. We've discussed the issue that we have too many classes to represent a concept of "file". We have File subclasses that represent files read from disk. In addition to that, we have bunch of InputElement subclasses (that are part of InputGraph) that represent command line arguments for input file names. InputElement is a wrapper for File. InputElement has parseFile method. The method instantiates a File. The File's constructor reads a file from disk and parses that. Because parseFile method is called from multiple worker threads, file parsing is processed in parallel. In other words, one reason why we needed the wrapper classes is because a File would start reading a file as soon as it is instantiated. So, the reason why we have too many classes here is at least partly because of the design flaw of File class. Just like threads in a good threading library, we need to separate instantiation from "start" method, so that we can instantiate File objects when we need them (which should be very fast because it involves only one mmap() and no real file IO) and use them directly instead of the wrapper classes. Later, we call parse() on each file in parallel to let them do actual file IO. In this design, we can eliminate a reason to have the wrapper classes. In order to minimize the size of the patch, I didn't go so far as to replace the wrapper classes with File classes. The wrapper classes are still there. In this patch, we call parse() immediately after instantiating a File, so this really has no change in functionality. Eventually the call of parse() should be moved to Driver::link(). That'll be done in another patch. llvm-svn: 224102
2014-12-12 15:31:09 +08:00
MachOObjectReader(MachOLinkingContext &ctx) : _ctx(ctx) {}
bool canParse(file_magic magic, StringRef ext,
const MemoryBuffer &mb) const override {
switch (magic) {
case llvm::sys::fs::file_magic::macho_object:
return (mb.getBufferSize() > 32);
default:
return false;
}
}
std::error_code
parseFile(std::unique_ptr<MemoryBuffer> &mb, const Registry &registry,
std::vector<std::unique_ptr<File>> &result) const override {
Separate file parsing from File's constructors. This is a second patch for InputGraph cleanup. Sorry about the size of the patch, but what I did in this patch is basically moving code from constructor to a new method, parse(), so the amount of new code is small. This has no change in functionality. We've discussed the issue that we have too many classes to represent a concept of "file". We have File subclasses that represent files read from disk. In addition to that, we have bunch of InputElement subclasses (that are part of InputGraph) that represent command line arguments for input file names. InputElement is a wrapper for File. InputElement has parseFile method. The method instantiates a File. The File's constructor reads a file from disk and parses that. Because parseFile method is called from multiple worker threads, file parsing is processed in parallel. In other words, one reason why we needed the wrapper classes is because a File would start reading a file as soon as it is instantiated. So, the reason why we have too many classes here is at least partly because of the design flaw of File class. Just like threads in a good threading library, we need to separate instantiation from "start" method, so that we can instantiate File objects when we need them (which should be very fast because it involves only one mmap() and no real file IO) and use them directly instead of the wrapper classes. Later, we call parse() on each file in parallel to let them do actual file IO. In this design, we can eliminate a reason to have the wrapper classes. In order to minimize the size of the patch, I didn't go so far as to replace the wrapper classes with File classes. The wrapper classes are still there. In this patch, we call parse() immediately after instantiating a File, so this really has no change in functionality. Eventually the call of parse() should be moved to Driver::link(). That'll be done in another patch. llvm-svn: 224102
2014-12-12 15:31:09 +08:00
auto *file = new MachOFile(mb.get(), &_ctx);
result.push_back(std::unique_ptr<MachOFile>(file));
return std::error_code();
}
Separate file parsing from File's constructors. This is a second patch for InputGraph cleanup. Sorry about the size of the patch, but what I did in this patch is basically moving code from constructor to a new method, parse(), so the amount of new code is small. This has no change in functionality. We've discussed the issue that we have too many classes to represent a concept of "file". We have File subclasses that represent files read from disk. In addition to that, we have bunch of InputElement subclasses (that are part of InputGraph) that represent command line arguments for input file names. InputElement is a wrapper for File. InputElement has parseFile method. The method instantiates a File. The File's constructor reads a file from disk and parses that. Because parseFile method is called from multiple worker threads, file parsing is processed in parallel. In other words, one reason why we needed the wrapper classes is because a File would start reading a file as soon as it is instantiated. So, the reason why we have too many classes here is at least partly because of the design flaw of File class. Just like threads in a good threading library, we need to separate instantiation from "start" method, so that we can instantiate File objects when we need them (which should be very fast because it involves only one mmap() and no real file IO) and use them directly instead of the wrapper classes. Later, we call parse() on each file in parallel to let them do actual file IO. In this design, we can eliminate a reason to have the wrapper classes. In order to minimize the size of the patch, I didn't go so far as to replace the wrapper classes with File classes. The wrapper classes are still there. In this patch, we call parse() immediately after instantiating a File, so this really has no change in functionality. Eventually the call of parse() should be moved to Driver::link(). That'll be done in another patch. llvm-svn: 224102
2014-12-12 15:31:09 +08:00
private:
MachOLinkingContext &_ctx;
};
Separate file parsing from File's constructors. This is a second patch for InputGraph cleanup. Sorry about the size of the patch, but what I did in this patch is basically moving code from constructor to a new method, parse(), so the amount of new code is small. This has no change in functionality. We've discussed the issue that we have too many classes to represent a concept of "file". We have File subclasses that represent files read from disk. In addition to that, we have bunch of InputElement subclasses (that are part of InputGraph) that represent command line arguments for input file names. InputElement is a wrapper for File. InputElement has parseFile method. The method instantiates a File. The File's constructor reads a file from disk and parses that. Because parseFile method is called from multiple worker threads, file parsing is processed in parallel. In other words, one reason why we needed the wrapper classes is because a File would start reading a file as soon as it is instantiated. So, the reason why we have too many classes here is at least partly because of the design flaw of File class. Just like threads in a good threading library, we need to separate instantiation from "start" method, so that we can instantiate File objects when we need them (which should be very fast because it involves only one mmap() and no real file IO) and use them directly instead of the wrapper classes. Later, we call parse() on each file in parallel to let them do actual file IO. In this design, we can eliminate a reason to have the wrapper classes. In order to minimize the size of the patch, I didn't go so far as to replace the wrapper classes with File classes. The wrapper classes are still there. In this patch, we call parse() immediately after instantiating a File, so this really has no change in functionality. Eventually the call of parse() should be moved to Driver::link(). That'll be done in another patch. llvm-svn: 224102
2014-12-12 15:31:09 +08:00
class MachODylibReader : public Reader {
public:
MachODylibReader(MachOLinkingContext &ctx) : _ctx(ctx) {}
bool canParse(file_magic magic, StringRef ext,
const MemoryBuffer &mb) const override {
switch (magic) {
case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib:
case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
return (mb.getBufferSize() > 32);
default:
return false;
}
}
std::error_code
parseFile(std::unique_ptr<MemoryBuffer> &mb, const Registry &registry,
std::vector<std::unique_ptr<File>> &result) const override {
auto *file = new MachODylibFile(mb.get(), &_ctx);
result.push_back(std::unique_ptr<MachODylibFile>(file));
return std::error_code();
}
Separate file parsing from File's constructors. This is a second patch for InputGraph cleanup. Sorry about the size of the patch, but what I did in this patch is basically moving code from constructor to a new method, parse(), so the amount of new code is small. This has no change in functionality. We've discussed the issue that we have too many classes to represent a concept of "file". We have File subclasses that represent files read from disk. In addition to that, we have bunch of InputElement subclasses (that are part of InputGraph) that represent command line arguments for input file names. InputElement is a wrapper for File. InputElement has parseFile method. The method instantiates a File. The File's constructor reads a file from disk and parses that. Because parseFile method is called from multiple worker threads, file parsing is processed in parallel. In other words, one reason why we needed the wrapper classes is because a File would start reading a file as soon as it is instantiated. So, the reason why we have too many classes here is at least partly because of the design flaw of File class. Just like threads in a good threading library, we need to separate instantiation from "start" method, so that we can instantiate File objects when we need them (which should be very fast because it involves only one mmap() and no real file IO) and use them directly instead of the wrapper classes. Later, we call parse() on each file in parallel to let them do actual file IO. In this design, we can eliminate a reason to have the wrapper classes. In order to minimize the size of the patch, I didn't go so far as to replace the wrapper classes with File classes. The wrapper classes are still there. In this patch, we call parse() immediately after instantiating a File, so this really has no change in functionality. Eventually the call of parse() should be moved to Driver::link(). That'll be done in another patch. llvm-svn: 224102
2014-12-12 15:31:09 +08:00
private:
MachOLinkingContext &_ctx;
};
} // namespace normalized
} // namespace mach_o
[lld] Introduce registry and Reference kind tuple The main changes are in: include/lld/Core/Reference.h include/lld/ReaderWriter/Reader.h Everything else is details to support the main change. 1) Registration based Readers Previously, lld had a tangled interdependency with all the Readers. It would have been impossible to make a streamlined linker (say for a JIT) which just supported one file format and one architecture (no yaml, no archives, etc). The old model also required a LinkingContext to read an object file, which would have made .o inspection tools awkward. The new model is that there is a global Registry object. You programmatically register the Readers you want with the registry object. Whenever you need to read/parse a file, you ask the registry to do it, and the registry tries each registered reader. For ease of use with the existing lld code base, there is one Registry object inside the LinkingContext object. 2) Changing kind value to be a tuple Beside Readers, the registry also keeps track of the mapping for Reference Kind values to and from strings. Along with that, this patch also fixes an ambiguity with the previous Reference::Kind values. The problem was that we wanted to reuse existing relocation type values as Reference::Kind values. But then how can the YAML write know how to convert a value to a string? The fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace (e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and a 16-bit value. This tuple system allows conversion to and from strings with no ambiguities. llvm-svn: 197727
2013-12-20 05:58:00 +08:00
void Registry::addSupportMachOObjects(MachOLinkingContext &ctx) {
MachOLinkingContext::Arch arch = ctx.arch();
Separate file parsing from File's constructors. This is a second patch for InputGraph cleanup. Sorry about the size of the patch, but what I did in this patch is basically moving code from constructor to a new method, parse(), so the amount of new code is small. This has no change in functionality. We've discussed the issue that we have too many classes to represent a concept of "file". We have File subclasses that represent files read from disk. In addition to that, we have bunch of InputElement subclasses (that are part of InputGraph) that represent command line arguments for input file names. InputElement is a wrapper for File. InputElement has parseFile method. The method instantiates a File. The File's constructor reads a file from disk and parses that. Because parseFile method is called from multiple worker threads, file parsing is processed in parallel. In other words, one reason why we needed the wrapper classes is because a File would start reading a file as soon as it is instantiated. So, the reason why we have too many classes here is at least partly because of the design flaw of File class. Just like threads in a good threading library, we need to separate instantiation from "start" method, so that we can instantiate File objects when we need them (which should be very fast because it involves only one mmap() and no real file IO) and use them directly instead of the wrapper classes. Later, we call parse() on each file in parallel to let them do actual file IO. In this design, we can eliminate a reason to have the wrapper classes. In order to minimize the size of the patch, I didn't go so far as to replace the wrapper classes with File classes. The wrapper classes are still there. In this patch, we call parse() immediately after instantiating a File, so this really has no change in functionality. Eventually the call of parse() should be moved to Driver::link(). That'll be done in another patch. llvm-svn: 224102
2014-12-12 15:31:09 +08:00
add(std::unique_ptr<Reader>(new mach_o::normalized::MachOObjectReader(ctx)));
add(std::unique_ptr<Reader>(new mach_o::normalized::MachODylibReader(ctx)));
addKindTable(Reference::KindNamespace::mach_o, ctx.archHandler().kindArch(),
ctx.archHandler().kindStrings());
add(std::unique_ptr<YamlIOTaggedDocumentHandler>(
new mach_o::MachOYamlIOTaggedDocumentHandler(arch)));
[lld] Introduce registry and Reference kind tuple The main changes are in: include/lld/Core/Reference.h include/lld/ReaderWriter/Reader.h Everything else is details to support the main change. 1) Registration based Readers Previously, lld had a tangled interdependency with all the Readers. It would have been impossible to make a streamlined linker (say for a JIT) which just supported one file format and one architecture (no yaml, no archives, etc). The old model also required a LinkingContext to read an object file, which would have made .o inspection tools awkward. The new model is that there is a global Registry object. You programmatically register the Readers you want with the registry object. Whenever you need to read/parse a file, you ask the registry to do it, and the registry tries each registered reader. For ease of use with the existing lld code base, there is one Registry object inside the LinkingContext object. 2) Changing kind value to be a tuple Beside Readers, the registry also keeps track of the mapping for Reference Kind values to and from strings. Along with that, this patch also fixes an ambiguity with the previous Reference::Kind values. The problem was that we wanted to reuse existing relocation type values as Reference::Kind values. But then how can the YAML write know how to convert a value to a string? The fix is to change the 32-bit Reference::Kind into a tuple with an 8-bit namespace (e.g. ELF, COFFF, etc), an 8-bit architecture (e.g. x86_64, PowerPC, etc), and a 16-bit value. This tuple system allows conversion to and from strings with no ambiguities. llvm-svn: 197727
2013-12-20 05:58:00 +08:00
}
} // namespace lld