[lld-macho] Devirtualize TargetInfo::getRelocAttrs

This method is called on each relocation when parsing input files, so
the overhead of using virtual functions ends up being quite large.  We
now have a single non-virtual method, which reads from the appropriate
array of relocation attributes set in the TargetInfo constructor.

This change results in a modest 2.3% reduction in link time for
chromium_framework measured on an x86-64 VPS, and 0.7% on an arm64 Mac.

    N           Min           Max        Median           Avg        Stddev
x  10     11.869417     12.032609     11.935041     11.938268   0.045802324
+  10     11.581526     11.785265     11.649885     11.659507   0.054634834
Difference at 95.0% confidence
	-0.278761 +/- 0.0473673
	-2.33502% +/- 0.396768%
	(Student's t, pooled s = 0.0504124)

Differential Revision: https://reviews.llvm.org/D130000
This commit is contained in:
Daniel Bertalan 2022-07-18 13:37:40 +02:00
parent 477c2c6f4a
commit 1fb9466c6a
5 changed files with 73 additions and 89 deletions

View File

@ -38,35 +38,27 @@ struct ARM : TargetInfo {
uint64_t entryAddr) const override;
void relaxGotLoad(uint8_t *loc, uint8_t type) const override;
const RelocAttrs &getRelocAttrs(uint8_t type) const override;
uint64_t getPageSize() const override { return 4 * 1024; }
void handleDtraceReloc(const Symbol *sym, const Reloc &r,
uint8_t *loc) const override;
};
} // namespace
const RelocAttrs &ARM::getRelocAttrs(uint8_t type) const {
static const std::array<RelocAttrs, 10> relocAttrsArray{{
static constexpr std::array<RelocAttrs, 10> relocAttrsArray{{
#define B(x) RelocAttrBits::x
{"VANILLA", /* FIXME populate this */ B(_0)},
{"PAIR", /* FIXME populate this */ B(_0)},
{"SECTDIFF", /* FIXME populate this */ B(_0)},
{"LOCAL_SECTDIFF", /* FIXME populate this */ B(_0)},
{"PB_LA_PTR", /* FIXME populate this */ B(_0)},
{"BR24", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
{"BR22", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
{"32BIT_BRANCH", /* FIXME populate this */ B(_0)},
{"HALF", /* FIXME populate this */ B(_0)},
{"HALF_SECTDIFF", /* FIXME populate this */ B(_0)},
{"VANILLA", /* FIXME populate this */ B(_0)},
{"PAIR", /* FIXME populate this */ B(_0)},
{"SECTDIFF", /* FIXME populate this */ B(_0)},
{"LOCAL_SECTDIFF", /* FIXME populate this */ B(_0)},
{"PB_LA_PTR", /* FIXME populate this */ B(_0)},
{"BR24", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
{"BR22", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
{"32BIT_BRANCH", /* FIXME populate this */ B(_0)},
{"HALF", /* FIXME populate this */ B(_0)},
{"HALF_SECTDIFF", /* FIXME populate this */ B(_0)},
#undef B
}};
assert(type < relocAttrsArray.size() && "invalid relocation type");
if (type >= relocAttrsArray.size())
return invalidRelocAttrs;
return relocAttrsArray[type];
}
}};
int64_t ARM::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset,
relocation_info rel) const {
@ -167,6 +159,8 @@ ARM::ARM(uint32_t cpuSubtype) : TargetInfo(ILP32()) {
stubSize = 0 /* FIXME */;
stubHelperHeaderSize = 0 /* FIXME */;
stubHelperEntrySize = 0 /* FIXME */;
relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()};
}
TargetInfo *macho::createARMTargetInfo(uint32_t cpuSubtype) {

View File

@ -34,7 +34,6 @@ struct ARM64 : ARM64Common {
void writeStubHelperHeader(uint8_t *buf) const override;
void writeStubHelperEntry(uint8_t *buf, const Symbol &,
uint64_t entryAddr) const override;
const RelocAttrs &getRelocAttrs(uint8_t type) const override;
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
void applyOptimizationHints(uint8_t *, const ConcatInputSection *,
ArrayRef<uint64_t>) const override;
@ -48,31 +47,24 @@ struct ARM64 : ARM64Common {
// absolute version of this relocation. The semantics of the absolute relocation
// are weird -- it results in the value of the GOT slot being written, instead
// of the address. Let's not support it unless we find a real-world use case.
const RelocAttrs &ARM64::getRelocAttrs(uint8_t type) const {
static const std::array<RelocAttrs, 11> relocAttrsArray{{
static constexpr std::array<RelocAttrs, 11> relocAttrsArray{{
#define B(x) RelocAttrBits::x
{"UNSIGNED",
B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)},
{"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)},
{"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
{"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)},
{"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)},
{"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)},
{"GOT_LOAD_PAGEOFF12",
B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
{"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
{"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)},
{"TLVP_LOAD_PAGEOFF12",
B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)},
{"ADDEND", B(ADDEND)},
{"UNSIGNED",
B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)},
{"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)},
{"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
{"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)},
{"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)},
{"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)},
{"GOT_LOAD_PAGEOFF12",
B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
{"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
{"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)},
{"TLVP_LOAD_PAGEOFF12",
B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)},
{"ADDEND", B(ADDEND)},
#undef B
}};
assert(type < relocAttrsArray.size() && "invalid relocation type");
if (type >= relocAttrsArray.size())
return invalidRelocAttrs;
return relocAttrsArray[type];
}
}};
static constexpr uint32_t stubCode[] = {
0x90000010, // 00: adrp x16, __la_symbol_ptr@page
@ -150,6 +142,8 @@ ARM64::ARM64() : ARM64Common(LP64()) {
stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
stubHelperEntrySize = sizeof(stubHelperEntryCode);
relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()};
}
namespace {

View File

@ -33,36 +33,29 @@ struct ARM64_32 : ARM64Common {
void writeStubHelperHeader(uint8_t *buf) const override;
void writeStubHelperEntry(uint8_t *buf, const Symbol &,
uint64_t entryAddr) const override;
const RelocAttrs &getRelocAttrs(uint8_t type) const override;
};
} // namespace
// These are very similar to ARM64's relocation attributes, except that we don't
// have the BYTE8 flag set.
const RelocAttrs &ARM64_32::getRelocAttrs(uint8_t type) const {
static const std::array<RelocAttrs, 11> relocAttrsArray{{
static constexpr std::array<RelocAttrs, 11> relocAttrsArray{{
#define B(x) RelocAttrBits::x
{"UNSIGNED", B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
{"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4)},
{"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
{"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)},
{"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)},
{"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)},
{"GOT_LOAD_PAGEOFF12",
B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
{"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
{"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)},
{"TLVP_LOAD_PAGEOFF12",
B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)},
{"ADDEND", B(ADDEND)},
{"UNSIGNED", B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
{"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4)},
{"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
{"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)},
{"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)},
{"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)},
{"GOT_LOAD_PAGEOFF12",
B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
{"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
{"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)},
{"TLVP_LOAD_PAGEOFF12",
B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)},
{"ADDEND", B(ADDEND)},
#undef B
}};
assert(type < relocAttrsArray.size() && "invalid relocation type");
if (type >= relocAttrsArray.size())
return invalidRelocAttrs;
return relocAttrsArray[type];
}
}};
// The stub code is fairly similar to ARM64's, except that we load pointers into
// 32-bit 'w' registers, instead of the 64-bit 'x' ones.
@ -112,6 +105,8 @@ ARM64_32::ARM64_32() : ARM64Common(ILP32()) {
stubSize = sizeof(stubCode);
stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
stubHelperEntrySize = sizeof(stubHelperEntryCode);
relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()};
}
TargetInfo *macho::createARM64_32TargetInfo() {

View File

@ -37,36 +37,28 @@ struct X86_64 : TargetInfo {
uint64_t entryAddr) const override;
void relaxGotLoad(uint8_t *loc, uint8_t type) const override;
const RelocAttrs &getRelocAttrs(uint8_t type) const override;
uint64_t getPageSize() const override { return 4 * 1024; }
void handleDtraceReloc(const Symbol *sym, const Reloc &r,
uint8_t *loc) const override;
};
} // namespace
const RelocAttrs &X86_64::getRelocAttrs(uint8_t type) const {
static const std::array<RelocAttrs, 10> relocAttrsArray{{
static constexpr std::array<RelocAttrs, 10> relocAttrsArray{{
#define B(x) RelocAttrBits::x
{"UNSIGNED",
B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)},
{"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
{"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
{"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
{"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
{"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)},
{"SIGNED_1", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
{"SIGNED_2", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
{"SIGNED_4", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
{"TLV", B(PCREL) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)},
{"UNSIGNED",
B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)},
{"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
{"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
{"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
{"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
{"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)},
{"SIGNED_1", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
{"SIGNED_2", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
{"SIGNED_4", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
{"TLV", B(PCREL) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)},
#undef B
}};
assert(type < relocAttrsArray.size() && "invalid relocation type");
if (type >= relocAttrsArray.size())
return invalidRelocAttrs;
return relocAttrsArray[type];
}
}};
static int pcrelOffset(uint8_t type) {
switch (type) {
@ -196,6 +188,8 @@ X86_64::X86_64() : TargetInfo(LP64()) {
stubSize = sizeof(stub);
stubHelperHeaderSize = sizeof(stubHelperHeader);
stubHelperEntrySize = sizeof(stubHelperEntry);
relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()};
}
TargetInfo *macho::createX86_64TargetInfo() {

View File

@ -65,14 +65,19 @@ public:
// on a level of address indirection.
virtual void relaxGotLoad(uint8_t *loc, uint8_t type) const = 0;
virtual const RelocAttrs &getRelocAttrs(uint8_t type) const = 0;
virtual uint64_t getPageSize() const = 0;
virtual void populateThunk(InputSection *thunk, Symbol *funcSym) {
llvm_unreachable("target does not use thunks");
}
const RelocAttrs &getRelocAttrs(uint8_t type) const {
assert(type < relocAttrs.size() && "invalid relocation type");
if (type >= relocAttrs.size())
return invalidRelocAttrs;
return relocAttrs[type];
}
bool hasAttr(uint8_t type, RelocAttrBits bit) const {
return getRelocAttrs(type).hasAttr(bit);
}
@ -111,6 +116,8 @@ public:
uint8_t subtractorRelocType;
uint8_t unsignedRelocType;
llvm::ArrayRef<RelocAttrs> relocAttrs;
// We contrive this value as sufficiently far from any valid address that it
// will always be out-of-range for any architecture. UINT64_MAX is not a
// good choice because it is (a) only 1 away from wrapping to 0, and (b) the