forked from OSchip/llvm-project
Revert "[lld-macho] Support .subsections_via_symbols"
Due to build breakage mentioned in https://reviews.llvm.org/D79926.
This reverts commit e270b2f172
.
This commit is contained in:
parent
db8559eee4
commit
70fbbcdd34
|
@ -326,14 +326,9 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
|
|||
createSyntheticSections();
|
||||
|
||||
// Initialize InputSections.
|
||||
for (InputFile *file : inputFiles) {
|
||||
for (SubsectionMap &map : file->subsections) {
|
||||
for (auto &p : map) {
|
||||
InputSection *isec = p.second;
|
||||
inputSections.push_back(isec);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (InputFile *file : inputFiles)
|
||||
for (InputSection *sec : file->sections)
|
||||
inputSections.push_back(sec);
|
||||
|
||||
// Write to an output file.
|
||||
writeResult();
|
||||
|
|
|
@ -127,13 +127,17 @@ static const load_command *findCommand(const mach_header_64 *hdr,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void InputFile::parseSections(ArrayRef<section_64> sections) {
|
||||
subsections.reserve(sections.size());
|
||||
std::vector<InputSection *>
|
||||
InputFile::parseSections(ArrayRef<section_64> sections) {
|
||||
std::vector<InputSection *> ret;
|
||||
ret.reserve(sections.size());
|
||||
|
||||
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
|
||||
|
||||
for (const section_64 &sec : sections) {
|
||||
InputSection *isec = make<InputSection>();
|
||||
isec->file = this;
|
||||
isec->header = &sec;
|
||||
isec->name = StringRef(sec.sectname, strnlen(sec.sectname, 16));
|
||||
isec->segname = StringRef(sec.segname, strnlen(sec.segname, 16));
|
||||
isec->data = {buf + sec.offset, static_cast<size_t>(sec.size)};
|
||||
|
@ -143,185 +147,96 @@ void InputFile::parseSections(ArrayRef<section_64> sections) {
|
|||
else
|
||||
isec->align = 1 << sec.align;
|
||||
isec->flags = sec.flags;
|
||||
subsections.push_back({{0, isec}});
|
||||
ret.push_back(isec);
|
||||
}
|
||||
}
|
||||
|
||||
// Find the subsection corresponding to the greatest section offset that is <=
|
||||
// that of the given offset.
|
||||
//
|
||||
// offset: an offset relative to the start of the original InputSection (before
|
||||
// any subsection splitting has occurred). It will be updated to represent the
|
||||
// same location as an offset relative to the start of the containing
|
||||
// subsection.
|
||||
static InputSection *findContainingSubsection(SubsectionMap &map,
|
||||
uint32_t *offset) {
|
||||
auto it = std::prev(map.upper_bound(*offset));
|
||||
*offset -= it->first;
|
||||
return it->second;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void InputFile::parseRelocations(const section_64 &sec,
|
||||
SubsectionMap &subsecMap) {
|
||||
std::vector<Reloc> &relocs) {
|
||||
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
|
||||
ArrayRef<any_relocation_info> relInfos(
|
||||
reinterpret_cast<const any_relocation_info *>(buf + sec.reloff),
|
||||
sec.nreloc);
|
||||
|
||||
for (const any_relocation_info &anyRel : relInfos) {
|
||||
if (anyRel.r_word0 & R_SCATTERED)
|
||||
fatal("TODO: Scattered relocations not supported");
|
||||
|
||||
auto rel = reinterpret_cast<const relocation_info &>(anyRel);
|
||||
if (!rel.r_pcrel)
|
||||
fatal("TODO: Only pcrel relocations are supported");
|
||||
|
||||
Reloc r;
|
||||
r.type = rel.r_type;
|
||||
uint32_t secRelOffset = rel.r_address;
|
||||
uint64_t rawAddend =
|
||||
target->getImplicitAddend(buf + sec.offset + secRelOffset, r.type);
|
||||
|
||||
if (rel.r_extern) {
|
||||
r.target = symbols[rel.r_symbolnum];
|
||||
r.addend = rawAddend;
|
||||
if (anyRel.r_word0 & R_SCATTERED) {
|
||||
error("TODO: Scattered relocations not supported");
|
||||
} else {
|
||||
if (rel.r_symbolnum == 0 || rel.r_symbolnum > subsections.size())
|
||||
fatal("invalid section index in relocation for offset " +
|
||||
std::to_string(r.offset) + " in section " + sec.sectname +
|
||||
" of " + getName());
|
||||
|
||||
SubsectionMap &targetSubsecMap = subsections[rel.r_symbolnum - 1];
|
||||
const section_64 &targetSec = sectionHeaders[rel.r_symbolnum - 1];
|
||||
// The implicit addend for pcrel section relocations is the pcrel offset
|
||||
// in terms of the addresses in the input file. Here we adjust it so that
|
||||
// it describes the offset from the start of the target section.
|
||||
// TODO: Figure out what to do for non-pcrel section relocations.
|
||||
// TODO: The offset of 4 is probably not right for ARM64, nor for
|
||||
// relocations with r_length != 2.
|
||||
uint32_t targetOffset =
|
||||
sec.addr + secRelOffset + 4 + rawAddend - targetSec.addr;
|
||||
r.target = findContainingSubsection(targetSubsecMap, &targetOffset);
|
||||
r.addend = targetOffset;
|
||||
auto rel = reinterpret_cast<const relocation_info &>(anyRel);
|
||||
r.type = rel.r_type;
|
||||
r.offset = rel.r_address;
|
||||
r.addend = target->getImplicitAddend(buf + sec.offset + r.offset, r.type);
|
||||
if (rel.r_extern) {
|
||||
r.target = symbols[rel.r_symbolnum];
|
||||
} else {
|
||||
if (rel.r_symbolnum == 0 || rel.r_symbolnum > sections.size())
|
||||
fatal("invalid section index in relocation for offset " +
|
||||
std::to_string(r.offset) + " in section " + sec.sectname +
|
||||
" of " + getName());
|
||||
r.target = sections[rel.r_symbolnum - 1];
|
||||
}
|
||||
}
|
||||
|
||||
InputSection *subsec = findContainingSubsection(subsecMap, &secRelOffset);
|
||||
r.offset = secRelOffset;
|
||||
subsec->relocs.push_back(r);
|
||||
}
|
||||
}
|
||||
|
||||
void InputFile::parseSymbols(ArrayRef<const nlist_64> nList, const char *strtab,
|
||||
bool subsectionsViaSymbols) {
|
||||
// resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
|
||||
// out-of-sequence.
|
||||
symbols.resize(nList.size());
|
||||
std::vector<size_t> altEntrySymIdxs;
|
||||
|
||||
auto createDefined = [&](const nlist_64 &sym, InputSection *isec,
|
||||
uint32_t value) -> Symbol * {
|
||||
StringRef name = strtab + sym.n_strx;
|
||||
if (sym.n_type & N_EXT)
|
||||
// Global defined symbol
|
||||
return symtab->addDefined(name, isec, value);
|
||||
else
|
||||
// Local defined symbol
|
||||
return make<Defined>(name, isec, value);
|
||||
};
|
||||
|
||||
for (size_t i = 0, n = nList.size(); i < n; ++i) {
|
||||
const nlist_64 &sym = nList[i];
|
||||
|
||||
// Undefined symbol
|
||||
if (!sym.n_sect) {
|
||||
StringRef name = strtab + sym.n_strx;
|
||||
symbols[i] = symtab->addUndefined(name);
|
||||
continue;
|
||||
}
|
||||
|
||||
const section_64 &sec = sectionHeaders[sym.n_sect - 1];
|
||||
SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
|
||||
uint64_t offset = sym.n_value - sec.addr;
|
||||
|
||||
// If the input file does not use subsections-via-symbols, all symbols can
|
||||
// use the same subsection. Otherwise, we must split the sections along
|
||||
// symbol boundaries.
|
||||
if (!subsectionsViaSymbols) {
|
||||
symbols[i] = createDefined(sym, subsecMap[0], offset);
|
||||
continue;
|
||||
}
|
||||
|
||||
// nList entries aren't necessarily arranged in address order. Therefore,
|
||||
// we can't create alt-entry symbols at this point because a later symbol
|
||||
// may split its section, which may affect which subsection the alt-entry
|
||||
// symbol is assigned to. So we need to handle them in a second pass below.
|
||||
if (sym.n_desc & N_ALT_ENTRY) {
|
||||
altEntrySymIdxs.push_back(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find the subsection corresponding to the greatest section offset that is
|
||||
// <= that of the current symbol. The subsection that we find either needs
|
||||
// to be used directly or split in two.
|
||||
uint32_t firstSize = offset;
|
||||
InputSection *firstIsec = findContainingSubsection(subsecMap, &firstSize);
|
||||
|
||||
if (firstSize == 0) {
|
||||
// Alias of an existing symbol, or the first symbol in the section. These
|
||||
// are handled by reusing the existing section.
|
||||
symbols[i] = createDefined(sym, firstIsec, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
// We saw a symbol definition at a new offset. Split the section into two
|
||||
// subsections. The new symbol uses the second subsection.
|
||||
auto *secondIsec = make<InputSection>(*firstIsec);
|
||||
secondIsec->data = firstIsec->data.slice(firstSize);
|
||||
firstIsec->data = firstIsec->data.slice(0, firstSize);
|
||||
// TODO: ld64 appears to preserve the original alignment as well as each
|
||||
// subsection's offset from the last aligned address. We should consider
|
||||
// emulating that behavior.
|
||||
secondIsec->align = MinAlign(firstIsec->align, offset);
|
||||
|
||||
subsecMap[offset] = secondIsec;
|
||||
// By construction, the symbol will be at offset zero in the new section.
|
||||
symbols[i] = createDefined(sym, secondIsec, 0);
|
||||
}
|
||||
|
||||
for (size_t idx : altEntrySymIdxs) {
|
||||
const nlist_64 &sym = nList[idx];
|
||||
SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
|
||||
uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr;
|
||||
InputSection *subsec = findContainingSubsection(subsecMap, &off);
|
||||
symbols[idx] = createDefined(sym, subsec, off);
|
||||
relocs.push_back(r);
|
||||
}
|
||||
}
|
||||
|
||||
ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) {
|
||||
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
|
||||
auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
|
||||
ArrayRef<section_64> objSections;
|
||||
|
||||
if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) {
|
||||
auto *c = reinterpret_cast<const segment_command_64 *>(cmd);
|
||||
sectionHeaders = ArrayRef<section_64>{
|
||||
objSections = ArrayRef<section_64>{
|
||||
reinterpret_cast<const section_64 *>(c + 1), c->nsects};
|
||||
parseSections(sectionHeaders);
|
||||
sections = parseSections(objSections);
|
||||
}
|
||||
|
||||
// TODO: Error on missing LC_SYMTAB?
|
||||
if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
|
||||
auto *c = reinterpret_cast<const symtab_command *>(cmd);
|
||||
const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
|
||||
ArrayRef<const nlist_64> nList(
|
||||
reinterpret_cast<const nlist_64 *>(buf + c->symoff), c->nsyms);
|
||||
const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
|
||||
bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
|
||||
parseSymbols(nList, strtab, subsectionsViaSymbols);
|
||||
|
||||
symbols.reserve(c->nsyms);
|
||||
|
||||
for (const nlist_64 &sym : nList) {
|
||||
StringRef name = strtab + sym.n_strx;
|
||||
|
||||
// Undefined symbol
|
||||
if (!sym.n_sect) {
|
||||
symbols.push_back(symtab->addUndefined(name));
|
||||
continue;
|
||||
}
|
||||
|
||||
InputSection *isec = sections[sym.n_sect - 1];
|
||||
const section_64 &objSec = objSections[sym.n_sect - 1];
|
||||
uint64_t value = sym.n_value - objSec.addr;
|
||||
|
||||
// Global defined symbol
|
||||
if (sym.n_type & N_EXT) {
|
||||
symbols.push_back(symtab->addDefined(name, isec, value));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Local defined symbol
|
||||
symbols.push_back(make<Defined>(name, isec, value));
|
||||
}
|
||||
}
|
||||
|
||||
// The relocations may refer to the symbols, so we parse them after we have
|
||||
// parsed all the symbols.
|
||||
for (size_t i = 0, n = subsections.size(); i < n; ++i)
|
||||
parseRelocations(sectionHeaders[i], subsections[i]);
|
||||
// the symbols loaded.
|
||||
if (!sections.empty()) {
|
||||
auto it = sections.begin();
|
||||
for (const section_64 &sec : objSections) {
|
||||
parseRelocations(sec, (*it)->relocs);
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
|
||||
|
@ -409,8 +324,7 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
|
|||
sym.getName());
|
||||
auto file = make<ObjFile>(mb);
|
||||
symbols.insert(symbols.end(), file->symbols.begin(), file->symbols.end());
|
||||
subsections.insert(subsections.end(), file->subsections.begin(),
|
||||
file->subsections.end());
|
||||
sections.insert(sections.end(), file->sections.begin(), file->sections.end());
|
||||
}
|
||||
|
||||
// Returns "<internal>" or "baz.o".
|
||||
|
|
|
@ -14,8 +14,6 @@
|
|||
#include "llvm/BinaryFormat/MachO.h"
|
||||
#include "llvm/Object/Archive.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace lld {
|
||||
|
@ -25,11 +23,6 @@ class InputSection;
|
|||
class Symbol;
|
||||
struct Reloc;
|
||||
|
||||
// If .subsections_via_symbols is set, each InputSection will be split along
|
||||
// symbol boundaries. The keys of a SubsectionMap represent the offsets of
|
||||
// each subsection from the start of the original pre-split InputSection.
|
||||
using SubsectionMap = std::map<uint32_t, InputSection *>;
|
||||
|
||||
class InputFile {
|
||||
public:
|
||||
enum Kind {
|
||||
|
@ -44,18 +37,15 @@ public:
|
|||
|
||||
MemoryBufferRef mb;
|
||||
std::vector<Symbol *> symbols;
|
||||
ArrayRef<llvm::MachO::section_64> sectionHeaders;
|
||||
std::vector<SubsectionMap> subsections;
|
||||
std::vector<InputSection *> sections;
|
||||
|
||||
protected:
|
||||
InputFile(Kind kind, MemoryBufferRef mb) : mb(mb), fileKind(kind) {}
|
||||
|
||||
void parseSections(ArrayRef<llvm::MachO::section_64>);
|
||||
std::vector<InputSection *> parseSections(ArrayRef<llvm::MachO::section_64>);
|
||||
|
||||
void parseSymbols(ArrayRef<const llvm::MachO::nlist_64> nList,
|
||||
const char *strtab, bool subsectionsViaSymbols);
|
||||
|
||||
void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &);
|
||||
void parseRelocations(const llvm::MachO::section_64 &,
|
||||
std::vector<Reloc> &relocs);
|
||||
|
||||
private:
|
||||
const Kind fileKind;
|
||||
|
|
|
@ -32,6 +32,7 @@ void InputSection::writeTo(uint8_t *buf) {
|
|||
|
||||
for (Reloc &r : relocs) {
|
||||
uint64_t va = 0;
|
||||
uint64_t addend = r.addend;
|
||||
if (auto *s = r.target.dyn_cast<Symbol *>()) {
|
||||
if (auto *dylibSymbol = dyn_cast<DylibSymbol>(s)) {
|
||||
va = target->getDylibSymbolVA(*dylibSymbol, r.type);
|
||||
|
@ -40,9 +41,15 @@ void InputSection::writeTo(uint8_t *buf) {
|
|||
}
|
||||
} else if (auto *isec = r.target.dyn_cast<InputSection *>()) {
|
||||
va = isec->getVA();
|
||||
// The implicit addend for pcrel section relocations is the pcrel offset
|
||||
// in terms of the addresses in the input file. Here we adjust it so that
|
||||
// it describes the offset from the start of the target section.
|
||||
// TODO: Figure out what to do for non-pcrel section relocations.
|
||||
// TODO: The offset of 4 is probably not right for ARM64.
|
||||
addend -= isec->header->addr - (header->addr + r.offset + 4);
|
||||
}
|
||||
|
||||
uint64_t val = va + r.addend;
|
||||
uint64_t val = va + addend;
|
||||
if (1) // TODO: handle non-pcrel relocations
|
||||
val -= getVA() + r.offset;
|
||||
target->relocateOne(buf + r.offset, r.type, val);
|
||||
|
|
|
@ -24,11 +24,7 @@ class Symbol;
|
|||
|
||||
struct Reloc {
|
||||
uint8_t type;
|
||||
// Adding this offset to the address of the target symbol or subsection gives
|
||||
// the destination that this relocation refers to.
|
||||
uint32_t addend;
|
||||
// The offset from the start of the subsection that this relocation belongs
|
||||
// to.
|
||||
uint32_t offset;
|
||||
llvm::PointerUnion<Symbol *, InputSection *> target;
|
||||
};
|
||||
|
@ -46,6 +42,8 @@ public:
|
|||
InputFile *file = nullptr;
|
||||
StringRef name;
|
||||
StringRef segname;
|
||||
// This provides access to the address of the section in the input file.
|
||||
const llvm::MachO::section_64 *header;
|
||||
|
||||
OutputSection *parent = nullptr;
|
||||
uint64_t outSecOff = 0;
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
# REQUIRES: x86
|
||||
# RUN: mkdir -p %t
|
||||
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
|
||||
|
||||
# RUN: echo "_bar_str" > %t/order-file
|
||||
# RUN: echo "_foo_str" >> %t/order-file
|
||||
|
||||
# RUN: lld -flavor darwinnew -o %t/test %t/test.o -order_file %t/order-file
|
||||
# RUN: llvm-objdump --section-headers -d --no-show-raw-insn %t/test | FileCheck %s
|
||||
# CHECK-LABEL: Sections:
|
||||
# CHECK: __cstring {{[^ ]*}} {{0*}}[[#%x, CSTRING_ADDR:]]
|
||||
# CHECK-LABEL: Disassembly of section __TEXT,__text:
|
||||
## L._str should end up at CSTRING_ADDR + 4, and leaq is 7 bytes long so we
|
||||
## have RIP = ADDR + 7
|
||||
# CHECK: [[#%x, ADDR:]]: leaq
|
||||
# CHECK-SAME: [[#%u, CSTRING_ADDR + 4 - ADDR - 7]](%rip), %rsi {{.*}} <_bar_str+0x4>
|
||||
|
||||
# RUN: llvm-readobj --string-dump=__cstring %t/test | FileCheck %s --check-prefix=STRINGS
|
||||
# STRINGS: bar
|
||||
# STRINGS: Private symbol
|
||||
# STRINGS: foo
|
||||
|
||||
.text
|
||||
.globl _main, _foo_str, _bar_str
|
||||
|
||||
_main:
|
||||
leaq L_.str(%rip), %rsi
|
||||
mov $0, %rax
|
||||
ret
|
||||
|
||||
.section __TEXT,__cstring
|
||||
_foo_str:
|
||||
.asciz "foo"
|
||||
|
||||
_bar_str:
|
||||
.asciz "bar"
|
||||
|
||||
## References to this generate a section relocation
|
||||
## N.B.: ld64 doesn't actually reorder symbols in __cstring based on the order
|
||||
## file. Only our implementation does. However, I'm not sure how else to
|
||||
## test section relocations that target an address inside a relocated
|
||||
## symbol: using a non-__cstring section would cause llvm-mc to emit a
|
||||
## symbol relocation instead using the nearest symbol.
|
||||
L_.str:
|
||||
.asciz "Private symbol"
|
||||
|
||||
.subsections_via_symbols
|
|
@ -1,55 +0,0 @@
|
|||
# REQUIRES: x86
|
||||
# RUN: mkdir -p %t
|
||||
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
|
||||
|
||||
# RUN: echo "_bar" > %t/order-file-1
|
||||
# RUN: echo "_foo" >> %t/order-file-1
|
||||
# RUN: echo "_main" >> %t/order-file-1
|
||||
## _qux is marked as .alt_entry, so it should not create a new subsection and
|
||||
## its contents should move with _bar to the start of the output despite the
|
||||
## order file listing it at the end.
|
||||
# RUN: echo "_qux" >> %t/order-file-1
|
||||
|
||||
## _bar and _baz point to the same address, so both order files should achieve
|
||||
## the same result.
|
||||
# RUN: echo "_baz" > %t/order-file-2
|
||||
# RUN: echo "_foo" >> %t/order-file-2
|
||||
# RUN: echo "_main" >> %t/order-file-2
|
||||
# RUN: echo "_qux" >> %t/order-file-2
|
||||
|
||||
# RUN: lld -flavor darwinnew -o %t/test-1 %t/test.o -order_file %t/order-file-1
|
||||
# RUN: llvm-objdump -d --no-show-raw-insn %t/test-1 | FileCheck %s
|
||||
# RUN: lld -flavor darwinnew -o %t/test-2 %t/test.o -order_file %t/order-file-2
|
||||
# RUN: llvm-objdump -d --no-show-raw-insn %t/test-2 | FileCheck %s
|
||||
# CHECK-LABEL: Disassembly of section __TEXT,__text:
|
||||
# CHECK: <_bar>:
|
||||
# CHECK-NEXT: callq {{.*}} <_foo>
|
||||
# CHECK-EMPTY:
|
||||
# CHECK-NEXT: <_qux>:
|
||||
# CHECK-NEXT: retq
|
||||
# CHECK: <_foo>:
|
||||
# CHECK-NEXT: retq
|
||||
# CHECK: <_main>:
|
||||
# CHECK-NEXT: callq {{.*}} <_bar>
|
||||
# CHECK-NEXT: movq $0, %rax
|
||||
# CHECK-NEXT: retq
|
||||
|
||||
.text
|
||||
.globl _main, _foo, _bar, _qux
|
||||
.alt_entry _qux
|
||||
|
||||
_foo:
|
||||
retq
|
||||
|
||||
_main:
|
||||
callq _bar
|
||||
movq $0, %rax
|
||||
retq
|
||||
|
||||
_bar:
|
||||
_baz:
|
||||
callq _foo
|
||||
_qux:
|
||||
retq
|
||||
|
||||
.subsections_via_symbols
|
Loading…
Reference in New Issue