2020-04-03 02:54:05 +08:00
|
|
|
//===- Symbols.h ------------------------------------------------*- C++ -*-===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLD_MACHO_SYMBOLS_H
|
|
|
|
#define LLD_MACHO_SYMBOLS_H
|
|
|
|
|
|
|
|
#include "InputSection.h"
|
|
|
|
#include "Target.h"
|
2020-05-19 11:28:50 +08:00
|
|
|
#include "lld/Common/ErrorHandler.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "lld/Common/Strings.h"
|
|
|
|
#include "llvm/Object/Archive.h"
|
2020-09-25 05:44:14 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
|
|
|
|
namespace lld {
|
|
|
|
namespace macho {
|
|
|
|
|
|
|
|
class InputSection;
|
2020-07-31 05:28:41 +08:00
|
|
|
class MachHeaderSection;
|
2020-04-22 04:37:57 +08:00
|
|
|
class DylibFile;
|
2020-04-03 02:54:05 +08:00
|
|
|
class ArchiveFile;
|
|
|
|
|
|
|
|
struct StringRefZ {
|
|
|
|
StringRefZ(const char *s) : data(s), size(-1) {}
|
|
|
|
StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
|
|
|
|
|
|
|
|
const char *data;
|
|
|
|
const uint32_t size;
|
|
|
|
};
|
|
|
|
|
|
|
|
class Symbol {
|
|
|
|
public:
|
|
|
|
enum Kind {
|
|
|
|
DefinedKind,
|
|
|
|
UndefinedKind,
|
2020-09-25 05:44:14 +08:00
|
|
|
CommonKind,
|
2020-04-22 04:37:57 +08:00
|
|
|
DylibKind,
|
2020-05-15 03:43:51 +08:00
|
|
|
LazyKind,
|
2020-07-31 05:28:41 +08:00
|
|
|
DSOHandleKind,
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
2020-07-25 06:55:25 +08:00
|
|
|
virtual ~Symbol() {}
|
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
Kind kind() const { return static_cast<Kind>(symbolKind); }
|
|
|
|
|
|
|
|
StringRef getName() const { return {name.data, name.size}; }
|
|
|
|
|
2020-07-31 05:28:41 +08:00
|
|
|
virtual uint64_t getVA() const { return 0; }
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2020-07-31 05:28:41 +08:00
|
|
|
virtual uint64_t getFileOffset() const {
|
|
|
|
llvm_unreachable("attempt to get an offset from a non-defined symbol");
|
|
|
|
}
|
2020-05-19 11:28:50 +08:00
|
|
|
|
2020-07-25 06:55:25 +08:00
|
|
|
virtual bool isWeakDef() const { llvm_unreachable("cannot be weak"); }
|
|
|
|
|
2020-08-13 10:50:09 +08:00
|
|
|
virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
|
|
|
|
|
2020-08-25 12:57:59 +08:00
|
|
|
// Whether this symbol is in the GOT or TLVPointer sections.
|
|
|
|
bool isInGot() const { return gotIndex != UINT32_MAX; }
|
|
|
|
|
2020-08-28 06:54:42 +08:00
|
|
|
// Whether this symbol is in the StubsSection.
|
|
|
|
bool isInStubs() const { return stubsIndex != UINT32_MAX; }
|
|
|
|
|
2020-08-13 10:50:09 +08:00
|
|
|
// The index of this symbol in the GOT or the TLVPointer section, depending
|
|
|
|
// on whether it is a thread-local. A given symbol cannot be referenced by
|
|
|
|
// both these sections at once.
|
2020-06-14 11:00:06 +08:00
|
|
|
uint32_t gotIndex = UINT32_MAX;
|
|
|
|
|
2020-08-28 06:54:42 +08:00
|
|
|
uint32_t stubsIndex = UINT32_MAX;
|
|
|
|
|
2020-09-05 09:02:07 +08:00
|
|
|
uint32_t symtabIndex = UINT32_MAX;
|
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
protected:
|
2020-04-22 04:37:57 +08:00
|
|
|
Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {}
|
2020-04-03 02:54:05 +08:00
|
|
|
|
|
|
|
Kind symbolKind;
|
|
|
|
StringRefZ name;
|
|
|
|
};
|
|
|
|
|
|
|
|
class Defined : public Symbol {
|
|
|
|
public:
|
2020-08-25 12:57:59 +08:00
|
|
|
Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef,
|
|
|
|
bool isExternal)
|
2020-08-28 06:59:30 +08:00
|
|
|
: Symbol(DefinedKind, name), isec(isec), value(value),
|
|
|
|
overridesWeakDef(false), weakDef(isWeakDef), external(isExternal) {}
|
2020-07-25 06:55:25 +08:00
|
|
|
|
|
|
|
bool isWeakDef() const override { return weakDef; }
|
2020-09-18 23:40:46 +08:00
|
|
|
bool isTlv() const override {
|
|
|
|
return !isAbsolute() && isThreadLocalVariables(isec->flags);
|
|
|
|
}
|
2020-08-13 10:50:09 +08:00
|
|
|
|
2020-08-25 12:57:59 +08:00
|
|
|
bool isExternal() const { return external; }
|
2020-09-18 23:40:46 +08:00
|
|
|
bool isAbsolute() const { return isec == nullptr; }
|
2020-08-25 12:57:59 +08:00
|
|
|
|
2020-09-18 23:40:46 +08:00
|
|
|
uint64_t getVA() const override;
|
|
|
|
uint64_t getFileOffset() const override;
|
2020-07-31 05:28:41 +08:00
|
|
|
|
2020-09-18 23:40:46 +08:00
|
|
|
static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
|
2020-07-31 05:28:41 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
InputSection *isec;
|
|
|
|
uint32_t value;
|
|
|
|
|
2020-08-28 06:59:30 +08:00
|
|
|
bool overridesWeakDef : 1;
|
|
|
|
|
2020-07-25 06:55:25 +08:00
|
|
|
private:
|
2020-08-28 06:59:30 +08:00
|
|
|
const bool weakDef : 1;
|
|
|
|
const bool external : 1;
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
class Undefined : public Symbol {
|
|
|
|
public:
|
2020-04-22 04:37:57 +08:00
|
|
|
Undefined(StringRefZ name) : Symbol(UndefinedKind, name) {}
|
2020-04-03 02:54:05 +08:00
|
|
|
|
|
|
|
static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
|
|
|
|
};
|
|
|
|
|
2020-09-25 05:44:14 +08:00
|
|
|
// On Unix, it is traditionally allowed to write variable definitions without
|
|
|
|
// initialization expressions (such as "int foo;") to header files. These are
|
|
|
|
// called tentative definitions.
|
|
|
|
//
|
|
|
|
// Using tentative definitions is usually considered a bad practice; you should
|
|
|
|
// write only declarations (such as "extern int foo;") to header files.
|
|
|
|
// Nevertheless, the linker and the compiler have to do something to support
|
|
|
|
// bad code by allowing duplicate definitions for this particular case.
|
|
|
|
//
|
|
|
|
// The compiler creates common symbols when it sees tentative definitions.
|
|
|
|
// (You can suppress this behavior and let the compiler create a regular
|
|
|
|
// defined symbol by passing -fno-common.) When linking the final binary, if
|
|
|
|
// there are remaining common symbols after name resolution is complete, the
|
|
|
|
// linker converts them to regular defined symbols in a __common section.
|
|
|
|
class CommonSymbol : public Symbol {
|
|
|
|
public:
|
|
|
|
CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align)
|
|
|
|
: Symbol(CommonKind, name), file(file), size(size),
|
|
|
|
align(align != 1 ? align : llvm::PowerOf2Ceil(size)) {
|
|
|
|
// TODO: cap maximum alignment
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
|
|
|
|
|
|
|
|
InputFile *const file;
|
|
|
|
const uint64_t size;
|
|
|
|
const uint32_t align;
|
|
|
|
};
|
|
|
|
|
2020-04-22 04:37:57 +08:00
|
|
|
class DylibSymbol : public Symbol {
|
|
|
|
public:
|
2020-08-13 10:50:09 +08:00
|
|
|
DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv)
|
|
|
|
: Symbol(DylibKind, name), file(file), weakDef(isWeakDef), tlv(isTlv) {}
|
2020-07-25 06:55:25 +08:00
|
|
|
|
|
|
|
bool isWeakDef() const override { return weakDef; }
|
2020-08-13 10:50:09 +08:00
|
|
|
bool isTlv() const override { return tlv; }
|
2020-08-28 06:54:42 +08:00
|
|
|
bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
|
2020-08-13 10:50:09 +08:00
|
|
|
|
2020-04-22 04:37:57 +08:00
|
|
|
static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
|
|
|
|
|
|
|
|
DylibFile *file;
|
2020-08-28 06:54:42 +08:00
|
|
|
uint32_t stubsHelperIndex = UINT32_MAX;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
uint32_t lazyBindOffset = UINT32_MAX;
|
2020-07-25 06:55:25 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
const bool weakDef;
|
2020-08-13 10:50:09 +08:00
|
|
|
const bool tlv;
|
2020-04-22 04:37:57 +08:00
|
|
|
};
|
|
|
|
|
2020-05-15 03:43:51 +08:00
|
|
|
class LazySymbol : public Symbol {
|
|
|
|
public:
|
|
|
|
LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
|
|
|
|
: Symbol(LazyKind, sym.getName()), file(file), sym(sym) {}
|
|
|
|
|
|
|
|
static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
|
|
|
|
|
|
|
|
void fetchArchiveMember();
|
|
|
|
|
|
|
|
private:
|
|
|
|
ArchiveFile *file;
|
|
|
|
const llvm::object::Archive::Symbol sym;
|
|
|
|
};
|
|
|
|
|
2020-07-31 05:28:41 +08:00
|
|
|
// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which
|
|
|
|
// does e.g. cleanup of static global variables. The ABI document says that the
|
|
|
|
// pointer can point to any address in one of the dylib's segments, but in
|
|
|
|
// practice ld64 seems to set it to point to the header, so that's what's
|
|
|
|
// implemented here.
|
|
|
|
//
|
|
|
|
// The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet
|
|
|
|
// tested this on an ARM platform.
|
|
|
|
//
|
|
|
|
// DSOHandle effectively functions like a Defined symbol, but it doesn't belong
|
|
|
|
// to an InputSection.
|
|
|
|
class DSOHandle : public Symbol {
|
|
|
|
public:
|
|
|
|
DSOHandle(const MachHeaderSection *header)
|
|
|
|
: Symbol(DSOHandleKind, name), header(header) {}
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2020-07-31 05:28:41 +08:00
|
|
|
const MachHeaderSection *header;
|
|
|
|
|
|
|
|
uint64_t getVA() const override;
|
|
|
|
|
|
|
|
uint64_t getFileOffset() const override;
|
|
|
|
|
2020-08-28 06:59:48 +08:00
|
|
|
bool isWeakDef() const override { return false; }
|
|
|
|
|
|
|
|
bool isTlv() const override { return false; }
|
|
|
|
|
2020-07-31 05:28:41 +08:00
|
|
|
static constexpr StringRef name = "___dso_handle";
|
|
|
|
|
2020-08-28 06:59:48 +08:00
|
|
|
static bool classof(const Symbol *s) { return s->kind() == DSOHandleKind; }
|
2020-07-31 05:28:41 +08:00
|
|
|
};
|
2020-05-19 11:28:50 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
union SymbolUnion {
|
|
|
|
alignas(Defined) char a[sizeof(Defined)];
|
|
|
|
alignas(Undefined) char b[sizeof(Undefined)];
|
2020-09-25 05:44:14 +08:00
|
|
|
alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
|
|
|
|
alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
|
|
|
|
alignas(LazySymbol) char e[sizeof(LazySymbol)];
|
|
|
|
alignas(DSOHandle) char f[sizeof(DSOHandle)];
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T, typename... ArgT>
|
2020-08-28 06:59:30 +08:00
|
|
|
T *replaceSymbol(Symbol *s, ArgT &&... arg) {
|
2020-04-03 02:54:05 +08:00
|
|
|
static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
|
|
|
|
static_assert(alignof(T) <= alignof(SymbolUnion),
|
|
|
|
"SymbolUnion not aligned enough");
|
|
|
|
assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
|
|
|
|
"Not a Symbol");
|
|
|
|
|
2020-08-28 06:59:30 +08:00
|
|
|
return new (s) T(std::forward<ArgT>(arg)...);
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace macho
|
|
|
|
|
|
|
|
std::string toString(const macho::Symbol &);
|
|
|
|
} // namespace lld
|
|
|
|
|
|
|
|
#endif
|