[lld-macho] Support __dso_handle for C++

The C++ ABI requires dylibs to pass a pointer to __cxa_atexit which does
e.g. cleanup of static global variables. The C++ spec says that the pointer
can point to any address in one of the dylib's segments, but in practice
ld64 seems to set it to point to the header, so that's what's implemented
here.

Reviewed By: #lld-macho, smeenai

Differential Revision: https://reviews.llvm.org/D83603
This commit is contained in:
Jez Ng 2020-07-30 14:28:41 -07:00
parent 9ebeac6788
commit 3587de2281
9 changed files with 110 additions and 25 deletions

View File

@ -13,6 +13,7 @@
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "Writer.h"
@ -479,6 +480,7 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
}
createSyntheticSections();
symtab->addDSOHandle(in.header);
// Initialize InputSections.
for (InputFile *file : inputFiles) {

View File

@ -94,4 +94,17 @@ Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
return s;
}
Symbol *SymbolTable::addDSOHandle(const MachHeaderSection *header) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(DSOHandle::name);
if (!wasInserted) {
if (auto *defined = dyn_cast<Defined>(s))
error("found defined symbol from " + defined->isec->file->getName() +
" with illegal name " + DSOHandle::name);
}
replaceSymbol<DSOHandle>(s, header);
return s;
}
SymbolTable *macho::symtab;

View File

@ -20,6 +20,7 @@ namespace macho {
class ArchiveFile;
class DylibFile;
class InputSection;
class MachHeaderSection;
class Symbol;
/*
@ -40,6 +41,8 @@ public:
Symbol *addLazy(StringRef name, ArchiveFile *file,
const llvm::object::Archive::Symbol &sym);
Symbol *addDSOHandle(const MachHeaderSection *);
ArrayRef<Symbol *> getSymbols() const { return symVector; }
Symbol *find(StringRef name);

View File

@ -8,6 +8,7 @@
#include "Symbols.h"
#include "InputFiles.h"
#include "SyntheticSections.h"
using namespace llvm;
using namespace lld;
@ -21,3 +22,9 @@ std::string lld::toString(const Symbol &sym) {
return *s;
return std::string(sym.getName());
}
uint64_t DSOHandle::getVA() const { return header->addr; }
uint64_t DSOHandle::getFileOffset() const { return header->fileOff; }
constexpr StringRef DSOHandle::name;

View File

@ -19,6 +19,7 @@ namespace lld {
namespace macho {
class InputSection;
class MachHeaderSection;
class DylibFile;
class ArchiveFile;
@ -37,6 +38,7 @@ public:
UndefinedKind,
DylibKind,
LazyKind,
DSOHandleKind,
};
virtual ~Symbol() {}
@ -45,9 +47,11 @@ public:
StringRef getName() const { return {name.data, name.size}; }
uint64_t getVA() const;
virtual uint64_t getVA() const { return 0; }
uint64_t getFileOffset() const;
virtual uint64_t getFileOffset() const {
llvm_unreachable("attempt to get an offset from a non-defined symbol");
}
virtual bool isWeakDef() const { llvm_unreachable("cannot be weak"); }
@ -70,6 +74,12 @@ public:
static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
uint64_t getVA() const override { return isec->getVA() + value; }
uint64_t getFileOffset() const override {
return isec->getFileOffset() + value;
}
InputSection *isec;
uint32_t value;
@ -115,17 +125,32 @@ private:
const llvm::object::Archive::Symbol sym;
};
inline uint64_t Symbol::getVA() const {
if (auto *d = dyn_cast<Defined>(this))
return d->isec->getVA() + d->value;
return 0;
}
// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which
// does e.g. cleanup of static global variables. The ABI document says that the
// pointer can point to any address in one of the dylib's segments, but in
// practice ld64 seems to set it to point to the header, so that's what's
// implemented here.
//
// The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet
// tested this on an ARM platform.
//
// DSOHandle effectively functions like a Defined symbol, but it doesn't belong
// to an InputSection.
class DSOHandle : public Symbol {
public:
DSOHandle(const MachHeaderSection *header)
: Symbol(DSOHandleKind, name), header(header) {}
inline uint64_t Symbol::getFileOffset() const {
if (auto *d = dyn_cast<Defined>(this))
return d->isec->getFileOffset() + d->value;
llvm_unreachable("attempt to get an offset from an undefined symbol");
}
const MachHeaderSection *header;
uint64_t getVA() const override;
uint64_t getFileOffset() const override;
static constexpr StringRef name = "___dso_handle";
static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
};
union SymbolUnion {
alignas(Defined) char a[sizeof(Defined)];

View File

@ -273,6 +273,7 @@ private:
};
struct InStruct {
MachHeaderSection *header = nullptr;
BindingSection *binding = nullptr;
GotSection *got = nullptr;
LazyPointerSection *lazyPointers = nullptr;

View File

@ -53,7 +53,7 @@ public:
std::unique_ptr<FileOutputBuffer> &buffer;
uint64_t addr = 0;
uint64_t fileOff = 0;
MachHeaderSection *headerSection = nullptr;
MachHeaderSection *header = nullptr;
LazyBindingSection *lazyBindingSection = nullptr;
ExportSection *exportSection = nullptr;
StringTableSection *stringTableSection = nullptr;
@ -264,20 +264,18 @@ void Writer::scanRelocations() {
}
void Writer::createLoadCommands() {
headerSection->addLoadCommand(
in.header->addLoadCommand(
make<LCDyldInfo>(in.binding, lazyBindingSection, exportSection));
headerSection->addLoadCommand(
make<LCSymtab>(symtabSection, stringTableSection));
headerSection->addLoadCommand(make<LCDysymtab>());
in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
in.header->addLoadCommand(make<LCDysymtab>());
switch (config->outputType) {
case MH_EXECUTE:
headerSection->addLoadCommand(make<LCMain>());
headerSection->addLoadCommand(make<LCLoadDylinker>());
in.header->addLoadCommand(make<LCMain>());
in.header->addLoadCommand(make<LCLoadDylinker>());
break;
case MH_DYLIB:
headerSection->addLoadCommand(
make<LCDylib>(LC_ID_DYLIB, config->installName));
in.header->addLoadCommand(make<LCDylib>(LC_ID_DYLIB, config->installName));
break;
default:
llvm_unreachable("unhandled output file type");
@ -285,19 +283,19 @@ void Writer::createLoadCommands() {
uint8_t segIndex = 0;
for (OutputSegment *seg : outputSegments) {
headerSection->addLoadCommand(make<LCSegment>(seg->name, seg));
in.header->addLoadCommand(make<LCSegment>(seg->name, seg));
seg->index = segIndex++;
}
uint64_t dylibOrdinal = 1;
for (InputFile *file : inputFiles) {
if (auto *dylibFile = dyn_cast<DylibFile>(file)) {
headerSection->addLoadCommand(
in.header->addLoadCommand(
make<LCDylib>(LC_LOAD_DYLIB, dylibFile->dylibName));
dylibFile->ordinal = dylibOrdinal++;
if (dylibFile->reexport)
headerSection->addLoadCommand(
in.header->addLoadCommand(
make<LCDylib>(LC_REEXPORT_DYLIB, dylibFile->dylibName));
}
}
@ -406,7 +404,6 @@ static void sortSegmentsAndSections() {
void Writer::createOutputSections() {
// First, create hidden sections
headerSection = make<MachHeaderSection>();
lazyBindingSection = make<LazyBindingSection>();
stringTableSection = make<StringTableSection>();
symtabSection = make<SymtabSection>(*stringTableSection);
@ -539,6 +536,7 @@ void Writer::run() {
void macho::writeResult() { Writer().run(); }
void macho::createSyntheticSections() {
in.header = make<MachHeaderSection>();
in.binding = make<BindingSection>();
in.got = make<GotSection>();
in.lazyPointers = make<LazyPointerSection>();

View File

@ -0,0 +1,16 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
# RUN: lld -flavor darwinnew %t.o -o %t
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
# CHECK: leaq {{.*}} # 100000000
# RUN: lld -flavor darwinnew -dylib %t.o -o %t.dylib
# RUN: llvm-objdump -d --no-show-raw-insn %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK
# DYLIB-CHECK: leaq {{.*}} # 0
.globl _main
.text
_main:
leaq ___dso_handle(%rip), %rdx
ret

View File

@ -0,0 +1,20 @@
# REQUIRES: x86
## If for some bizarre reason the input file defines its own ___dso_handle, we
## should raise an error. At least, we've implemented this behavior if the
## conflicting symbol is a global. A local symbol of the same name will still
## take priority in our implementation, unlike in ld64. But that's a pretty
## far-out edge case that should be safe to ignore.
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
# RUN: not lld -flavor darwinnew -dylib %t.o -o %t.dylib 2>&1 | FileCheck %s -DFILE=%t.o
# CHECK: error: found defined symbol from [[FILE]] with illegal name ___dso_handle
.globl _main, ___dso_handle
.text
_main:
leaq ___dso_handle(%rip), %rdx
ret
___dso_handle:
.space 1