From 70fbbcdd3437e9890307ef23d7057c565f142c44 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Tue, 19 May 2020 08:29:17 -0700 Subject: [PATCH] Revert "[lld-macho] Support .subsections_via_symbols" Due to build breakage mentioned in https://reviews.llvm.org/D79926. This reverts commit e270b2f1727c0fbde2676e8d0340c0d934726d3c. --- lld/MachO/Driver.cpp | 11 +- lld/MachO/InputFiles.cpp | 212 ++++++-------------- lld/MachO/InputFiles.h | 18 +- lld/MachO/InputSection.cpp | 9 +- lld/MachO/InputSection.h | 6 +- lld/test/MachO/subsections-section-relocs.s | 47 ----- lld/test/MachO/subsections-symbol-relocs.s | 55 ----- 7 files changed, 80 insertions(+), 278 deletions(-) delete mode 100644 lld/test/MachO/subsections-section-relocs.s delete mode 100644 lld/test/MachO/subsections-symbol-relocs.s diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index c653e8612957..ed1d03f80b00 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -326,14 +326,9 @@ bool macho::link(llvm::ArrayRef argsArr, bool canExitEarly, createSyntheticSections(); // Initialize InputSections. - for (InputFile *file : inputFiles) { - for (SubsectionMap &map : file->subsections) { - for (auto &p : map) { - InputSection *isec = p.second; - inputSections.push_back(isec); - } - } - } + for (InputFile *file : inputFiles) + for (InputSection *sec : file->sections) + inputSections.push_back(sec); // Write to an output file. writeResult(); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index cfd3df449a9d..c1107431af85 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -127,13 +127,17 @@ static const load_command *findCommand(const mach_header_64 *hdr, return nullptr; } -void InputFile::parseSections(ArrayRef sections) { - subsections.reserve(sections.size()); +std::vector +InputFile::parseSections(ArrayRef sections) { + std::vector ret; + ret.reserve(sections.size()); + auto *buf = reinterpret_cast(mb.getBufferStart()); for (const section_64 &sec : sections) { InputSection *isec = make(); isec->file = this; + isec->header = &sec; isec->name = StringRef(sec.sectname, strnlen(sec.sectname, 16)); isec->segname = StringRef(sec.segname, strnlen(sec.segname, 16)); isec->data = {buf + sec.offset, static_cast(sec.size)}; @@ -143,185 +147,96 @@ void InputFile::parseSections(ArrayRef sections) { else isec->align = 1 << sec.align; isec->flags = sec.flags; - subsections.push_back({{0, isec}}); + ret.push_back(isec); } -} -// Find the subsection corresponding to the greatest section offset that is <= -// that of the given offset. -// -// offset: an offset relative to the start of the original InputSection (before -// any subsection splitting has occurred). It will be updated to represent the -// same location as an offset relative to the start of the containing -// subsection. -static InputSection *findContainingSubsection(SubsectionMap &map, - uint32_t *offset) { - auto it = std::prev(map.upper_bound(*offset)); - *offset -= it->first; - return it->second; + return ret; } void InputFile::parseRelocations(const section_64 &sec, - SubsectionMap &subsecMap) { + std::vector &relocs) { auto *buf = reinterpret_cast(mb.getBufferStart()); ArrayRef relInfos( reinterpret_cast(buf + sec.reloff), sec.nreloc); for (const any_relocation_info &anyRel : relInfos) { - if (anyRel.r_word0 & R_SCATTERED) - fatal("TODO: Scattered relocations not supported"); - - auto rel = reinterpret_cast(anyRel); - if (!rel.r_pcrel) - fatal("TODO: Only pcrel relocations are supported"); - Reloc r; - r.type = rel.r_type; - uint32_t secRelOffset = rel.r_address; - uint64_t rawAddend = - target->getImplicitAddend(buf + sec.offset + secRelOffset, r.type); - - if (rel.r_extern) { - r.target = symbols[rel.r_symbolnum]; - r.addend = rawAddend; + if (anyRel.r_word0 & R_SCATTERED) { + error("TODO: Scattered relocations not supported"); } else { - if (rel.r_symbolnum == 0 || rel.r_symbolnum > subsections.size()) - fatal("invalid section index in relocation for offset " + - std::to_string(r.offset) + " in section " + sec.sectname + - " of " + getName()); - - SubsectionMap &targetSubsecMap = subsections[rel.r_symbolnum - 1]; - const section_64 &targetSec = sectionHeaders[rel.r_symbolnum - 1]; - // The implicit addend for pcrel section relocations is the pcrel offset - // in terms of the addresses in the input file. Here we adjust it so that - // it describes the offset from the start of the target section. - // TODO: Figure out what to do for non-pcrel section relocations. - // TODO: The offset of 4 is probably not right for ARM64, nor for - // relocations with r_length != 2. - uint32_t targetOffset = - sec.addr + secRelOffset + 4 + rawAddend - targetSec.addr; - r.target = findContainingSubsection(targetSubsecMap, &targetOffset); - r.addend = targetOffset; + auto rel = reinterpret_cast(anyRel); + r.type = rel.r_type; + r.offset = rel.r_address; + r.addend = target->getImplicitAddend(buf + sec.offset + r.offset, r.type); + if (rel.r_extern) { + r.target = symbols[rel.r_symbolnum]; + } else { + if (rel.r_symbolnum == 0 || rel.r_symbolnum > sections.size()) + fatal("invalid section index in relocation for offset " + + std::to_string(r.offset) + " in section " + sec.sectname + + " of " + getName()); + r.target = sections[rel.r_symbolnum - 1]; + } } - - InputSection *subsec = findContainingSubsection(subsecMap, &secRelOffset); - r.offset = secRelOffset; - subsec->relocs.push_back(r); - } -} - -void InputFile::parseSymbols(ArrayRef nList, const char *strtab, - bool subsectionsViaSymbols) { - // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols - // out-of-sequence. - symbols.resize(nList.size()); - std::vector altEntrySymIdxs; - - auto createDefined = [&](const nlist_64 &sym, InputSection *isec, - uint32_t value) -> Symbol * { - StringRef name = strtab + sym.n_strx; - if (sym.n_type & N_EXT) - // Global defined symbol - return symtab->addDefined(name, isec, value); - else - // Local defined symbol - return make(name, isec, value); - }; - - for (size_t i = 0, n = nList.size(); i < n; ++i) { - const nlist_64 &sym = nList[i]; - - // Undefined symbol - if (!sym.n_sect) { - StringRef name = strtab + sym.n_strx; - symbols[i] = symtab->addUndefined(name); - continue; - } - - const section_64 &sec = sectionHeaders[sym.n_sect - 1]; - SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; - uint64_t offset = sym.n_value - sec.addr; - - // If the input file does not use subsections-via-symbols, all symbols can - // use the same subsection. Otherwise, we must split the sections along - // symbol boundaries. - if (!subsectionsViaSymbols) { - symbols[i] = createDefined(sym, subsecMap[0], offset); - continue; - } - - // nList entries aren't necessarily arranged in address order. Therefore, - // we can't create alt-entry symbols at this point because a later symbol - // may split its section, which may affect which subsection the alt-entry - // symbol is assigned to. So we need to handle them in a second pass below. - if (sym.n_desc & N_ALT_ENTRY) { - altEntrySymIdxs.push_back(i); - continue; - } - - // Find the subsection corresponding to the greatest section offset that is - // <= that of the current symbol. The subsection that we find either needs - // to be used directly or split in two. - uint32_t firstSize = offset; - InputSection *firstIsec = findContainingSubsection(subsecMap, &firstSize); - - if (firstSize == 0) { - // Alias of an existing symbol, or the first symbol in the section. These - // are handled by reusing the existing section. - symbols[i] = createDefined(sym, firstIsec, 0); - continue; - } - - // We saw a symbol definition at a new offset. Split the section into two - // subsections. The new symbol uses the second subsection. - auto *secondIsec = make(*firstIsec); - secondIsec->data = firstIsec->data.slice(firstSize); - firstIsec->data = firstIsec->data.slice(0, firstSize); - // TODO: ld64 appears to preserve the original alignment as well as each - // subsection's offset from the last aligned address. We should consider - // emulating that behavior. - secondIsec->align = MinAlign(firstIsec->align, offset); - - subsecMap[offset] = secondIsec; - // By construction, the symbol will be at offset zero in the new section. - symbols[i] = createDefined(sym, secondIsec, 0); - } - - for (size_t idx : altEntrySymIdxs) { - const nlist_64 &sym = nList[idx]; - SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; - uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr; - InputSection *subsec = findContainingSubsection(subsecMap, &off); - symbols[idx] = createDefined(sym, subsec, off); + relocs.push_back(r); } } ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) { auto *buf = reinterpret_cast(mb.getBufferStart()); auto *hdr = reinterpret_cast(mb.getBufferStart()); + ArrayRef objSections; if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) { auto *c = reinterpret_cast(cmd); - sectionHeaders = ArrayRef{ + objSections = ArrayRef{ reinterpret_cast(c + 1), c->nsects}; - parseSections(sectionHeaders); + sections = parseSections(objSections); } // TODO: Error on missing LC_SYMTAB? if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) { auto *c = reinterpret_cast(cmd); + const char *strtab = reinterpret_cast(buf) + c->stroff; ArrayRef nList( reinterpret_cast(buf + c->symoff), c->nsyms); - const char *strtab = reinterpret_cast(buf) + c->stroff; - bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS; - parseSymbols(nList, strtab, subsectionsViaSymbols); + + symbols.reserve(c->nsyms); + + for (const nlist_64 &sym : nList) { + StringRef name = strtab + sym.n_strx; + + // Undefined symbol + if (!sym.n_sect) { + symbols.push_back(symtab->addUndefined(name)); + continue; + } + + InputSection *isec = sections[sym.n_sect - 1]; + const section_64 &objSec = objSections[sym.n_sect - 1]; + uint64_t value = sym.n_value - objSec.addr; + + // Global defined symbol + if (sym.n_type & N_EXT) { + symbols.push_back(symtab->addDefined(name, isec, value)); + continue; + } + + // Local defined symbol + symbols.push_back(make(name, isec, value)); + } } // The relocations may refer to the symbols, so we parse them after we have - // parsed all the symbols. - for (size_t i = 0, n = subsections.size(); i < n; ++i) - parseRelocations(sectionHeaders[i], subsections[i]); + // the symbols loaded. + if (!sections.empty()) { + auto it = sections.begin(); + for (const section_64 &sec : objSections) { + parseRelocations(sec, (*it)->relocs); + ++it; + } + } } DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella) @@ -409,8 +324,7 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) { sym.getName()); auto file = make(mb); symbols.insert(symbols.end(), file->symbols.begin(), file->symbols.end()); - subsections.insert(subsections.end(), file->subsections.begin(), - file->subsections.end()); + sections.insert(sections.end(), file->sections.begin(), file->sections.end()); } // Returns "" or "baz.o". diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index 5d2bfb03e904..c94035b1bcf6 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -14,8 +14,6 @@ #include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/Archive.h" #include "llvm/Support/MemoryBuffer.h" - -#include #include namespace lld { @@ -25,11 +23,6 @@ class InputSection; class Symbol; struct Reloc; -// If .subsections_via_symbols is set, each InputSection will be split along -// symbol boundaries. The keys of a SubsectionMap represent the offsets of -// each subsection from the start of the original pre-split InputSection. -using SubsectionMap = std::map; - class InputFile { public: enum Kind { @@ -44,18 +37,15 @@ public: MemoryBufferRef mb; std::vector symbols; - ArrayRef sectionHeaders; - std::vector subsections; + std::vector sections; protected: InputFile(Kind kind, MemoryBufferRef mb) : mb(mb), fileKind(kind) {} - void parseSections(ArrayRef); + std::vector parseSections(ArrayRef); - void parseSymbols(ArrayRef nList, - const char *strtab, bool subsectionsViaSymbols); - - void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &); + void parseRelocations(const llvm::MachO::section_64 &, + std::vector &relocs); private: const Kind fileKind; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 5453c0b8cd78..ace30c8c41dc 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -32,6 +32,7 @@ void InputSection::writeTo(uint8_t *buf) { for (Reloc &r : relocs) { uint64_t va = 0; + uint64_t addend = r.addend; if (auto *s = r.target.dyn_cast()) { if (auto *dylibSymbol = dyn_cast(s)) { va = target->getDylibSymbolVA(*dylibSymbol, r.type); @@ -40,9 +41,15 @@ void InputSection::writeTo(uint8_t *buf) { } } else if (auto *isec = r.target.dyn_cast()) { va = isec->getVA(); + // The implicit addend for pcrel section relocations is the pcrel offset + // in terms of the addresses in the input file. Here we adjust it so that + // it describes the offset from the start of the target section. + // TODO: Figure out what to do for non-pcrel section relocations. + // TODO: The offset of 4 is probably not right for ARM64. + addend -= isec->header->addr - (header->addr + r.offset + 4); } - uint64_t val = va + r.addend; + uint64_t val = va + addend; if (1) // TODO: handle non-pcrel relocations val -= getVA() + r.offset; target->relocateOne(buf + r.offset, r.type, val); diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index 1d11b8e64c30..908f09e6d29d 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -24,11 +24,7 @@ class Symbol; struct Reloc { uint8_t type; - // Adding this offset to the address of the target symbol or subsection gives - // the destination that this relocation refers to. uint32_t addend; - // The offset from the start of the subsection that this relocation belongs - // to. uint32_t offset; llvm::PointerUnion target; }; @@ -46,6 +42,8 @@ public: InputFile *file = nullptr; StringRef name; StringRef segname; + // This provides access to the address of the section in the input file. + const llvm::MachO::section_64 *header; OutputSection *parent = nullptr; uint64_t outSecOff = 0; diff --git a/lld/test/MachO/subsections-section-relocs.s b/lld/test/MachO/subsections-section-relocs.s deleted file mode 100644 index e8a8d7a3ec40..000000000000 --- a/lld/test/MachO/subsections-section-relocs.s +++ /dev/null @@ -1,47 +0,0 @@ -# REQUIRES: x86 -# RUN: mkdir -p %t -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o - -# RUN: echo "_bar_str" > %t/order-file -# RUN: echo "_foo_str" >> %t/order-file - -# RUN: lld -flavor darwinnew -o %t/test %t/test.o -order_file %t/order-file -# RUN: llvm-objdump --section-headers -d --no-show-raw-insn %t/test | FileCheck %s -# CHECK-LABEL: Sections: -# CHECK: __cstring {{[^ ]*}} {{0*}}[[#%x, CSTRING_ADDR:]] -# CHECK-LABEL: Disassembly of section __TEXT,__text: -## L._str should end up at CSTRING_ADDR + 4, and leaq is 7 bytes long so we -## have RIP = ADDR + 7 -# CHECK: [[#%x, ADDR:]]: leaq -# CHECK-SAME: [[#%u, CSTRING_ADDR + 4 - ADDR - 7]](%rip), %rsi {{.*}} <_bar_str+0x4> - -# RUN: llvm-readobj --string-dump=__cstring %t/test | FileCheck %s --check-prefix=STRINGS -# STRINGS: bar -# STRINGS: Private symbol -# STRINGS: foo - -.text -.globl _main, _foo_str, _bar_str - -_main: - leaq L_.str(%rip), %rsi - mov $0, %rax - ret - -.section __TEXT,__cstring -_foo_str: - .asciz "foo" - -_bar_str: - .asciz "bar" - -## References to this generate a section relocation -## N.B.: ld64 doesn't actually reorder symbols in __cstring based on the order -## file. Only our implementation does. However, I'm not sure how else to -## test section relocations that target an address inside a relocated -## symbol: using a non-__cstring section would cause llvm-mc to emit a -## symbol relocation instead using the nearest symbol. -L_.str: - .asciz "Private symbol" - -.subsections_via_symbols diff --git a/lld/test/MachO/subsections-symbol-relocs.s b/lld/test/MachO/subsections-symbol-relocs.s deleted file mode 100644 index 475c909377da..000000000000 --- a/lld/test/MachO/subsections-symbol-relocs.s +++ /dev/null @@ -1,55 +0,0 @@ -# REQUIRES: x86 -# RUN: mkdir -p %t -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o - -# RUN: echo "_bar" > %t/order-file-1 -# RUN: echo "_foo" >> %t/order-file-1 -# RUN: echo "_main" >> %t/order-file-1 -## _qux is marked as .alt_entry, so it should not create a new subsection and -## its contents should move with _bar to the start of the output despite the -## order file listing it at the end. -# RUN: echo "_qux" >> %t/order-file-1 - -## _bar and _baz point to the same address, so both order files should achieve -## the same result. -# RUN: echo "_baz" > %t/order-file-2 -# RUN: echo "_foo" >> %t/order-file-2 -# RUN: echo "_main" >> %t/order-file-2 -# RUN: echo "_qux" >> %t/order-file-2 - -# RUN: lld -flavor darwinnew -o %t/test-1 %t/test.o -order_file %t/order-file-1 -# RUN: llvm-objdump -d --no-show-raw-insn %t/test-1 | FileCheck %s -# RUN: lld -flavor darwinnew -o %t/test-2 %t/test.o -order_file %t/order-file-2 -# RUN: llvm-objdump -d --no-show-raw-insn %t/test-2 | FileCheck %s -# CHECK-LABEL: Disassembly of section __TEXT,__text: -# CHECK: <_bar>: -# CHECK-NEXT: callq {{.*}} <_foo> -# CHECK-EMPTY: -# CHECK-NEXT: <_qux>: -# CHECK-NEXT: retq -# CHECK: <_foo>: -# CHECK-NEXT: retq -# CHECK: <_main>: -# CHECK-NEXT: callq {{.*}} <_bar> -# CHECK-NEXT: movq $0, %rax -# CHECK-NEXT: retq - -.text -.globl _main, _foo, _bar, _qux -.alt_entry _qux - -_foo: - retq - -_main: - callq _bar - movq $0, %rax - retq - -_bar: -_baz: - callq _foo -_qux: - retq - -.subsections_via_symbols