llvm-project/lld/ELF/Relocations.h

191 lines
5.4 KiB
C
Raw Normal View History

//===- Relocations.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_RELOCATIONS_H
#define LLD_ELF_RELOCATIONS_H
#include "lld/Common/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include <map>
#include <vector>
namespace lld {
namespace elf {
class Symbol;
class InputSection;
class InputSectionBase;
class OutputSection;
class OutputSection;
// Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
typedef uint32_t RelType;
2016-10-21 12:52:11 +08:00
// List of target-independent relocation types. Relocations read
// from files are converted to these types so that the main code
// doesn't have to know about architecture-specific details.
enum RelExpr {
R_INVALID,
R_ABS,
R_ADDEND,
R_ARM_SBREL,
R_GOT,
R_GOTONLY_PC,
R_GOTONLY_PC_FROM_END,
R_GOTREL,
R_GOTREL_FROM_END,
R_GOT_FROM_END,
R_GOT_OFF,
R_GOT_PAGE_PC,
R_GOT_PC,
R_HINT,
R_MIPS_GOTREL,
R_MIPS_GOT_GP,
R_MIPS_GOT_GP_PC,
2017-03-26 12:10:43 +08:00
R_MIPS_GOT_LOCAL_PAGE,
R_MIPS_GOT_OFF,
R_MIPS_GOT_OFF32,
R_MIPS_TLSGD,
R_MIPS_TLSLD,
R_NEG_TLS,
R_NONE,
R_PAGE_PC,
R_PC,
R_PLT,
R_PLT_PAGE_PC,
2017-03-26 12:10:43 +08:00
R_PLT_PC,
R_PPC_OPD,
R_PPC_PLT_OPD,
R_PPC_TOC,
R_RELAX_GOT_PC,
R_RELAX_GOT_PC_NOPIC,
R_RELAX_TLS_GD_TO_IE,
R_RELAX_TLS_GD_TO_IE_ABS,
2017-03-26 12:10:43 +08:00
R_RELAX_TLS_GD_TO_IE_END,
R_RELAX_TLS_GD_TO_IE_PAGE_PC,
R_RELAX_TLS_GD_TO_LE,
R_RELAX_TLS_GD_TO_LE_NEG,
R_RELAX_TLS_IE_TO_LE,
R_RELAX_TLS_LD_TO_LE,
R_SIZE,
R_TLS,
R_TLSDESC,
R_TLSDESC_CALL,
2017-03-26 12:10:43 +08:00
R_TLSDESC_PAGE,
R_TLSGD,
R_TLSGD_PC,
R_TLSLD,
2016-10-21 12:52:11 +08:00
R_TLSLD_PC,
};
Add `isRelExprOneOf` helper In various places in LLD's hot loops, we have expressions of the form "E == R_FOO || E == R_BAR || ..." (E is a RelExpr). Some of these expressions are quite long, and even though they usually go just a very small number of ways and so should be well predicted, they can still occupy branch predictor resources harming other parts of the code, or they won't be predicted well if they overflow branch predictor resources or if the branches are too dense and the branch predictor can't track them all (the compiler can in theory avoid this, at a cost in text size). And some of these expressions are so large and executed so frequently that even when well-predicted they probably still have a nontrivial cost. This speedup should be pretty portable. The cost of these simple bit tests is independent of: - the target we are linking for - the distribution of RelExpr's for a given link (which can depend on how the input files were compiled) - what compiler was used to compile LLD (it is just a simple bit test; hopefully the compiler gets it right!) - adding new target-dependent relocations (e.g. needsPlt doesn't pay any extra cost checking R_PPC_PLT_OPD on x86-64 builds) I did some rough measurements on clang-fsds and this patch gives over about 4% speedup for a regular -O1 link, about 2.5% for -O3 --gc-sections and over 5% for -O0. Sorry, I don't have my current machine set up for doing really accurate measurements right now. This also is just a bit cleaner. Thanks for Joerg for suggesting for this approach. Differential Revision: https://reviews.llvm.org/D27156 llvm-svn: 288314
2016-12-01 13:43:48 +08:00
// Build a bitmask with one bit set for each RelExpr.
//
// Constexpr function arguments can't be used in static asserts, so we
// use template arguments to build the mask.
// But function template partial specializations don't exist (needed
// for base case of the recursion), so we need a dummy struct.
template <RelExpr... Exprs> struct RelExprMaskBuilder {
static inline uint64_t build() { return 0; }
};
// Specialization for recursive case.
template <RelExpr Head, RelExpr... Tail>
struct RelExprMaskBuilder<Head, Tail...> {
static inline uint64_t build() {
static_assert(0 <= Head && Head < 64,
"RelExpr is too large for 64-bit mask!");
return (uint64_t(1) << Head) | RelExprMaskBuilder<Tail...>::build();
}
};
// Return true if `Expr` is one of `Exprs`.
// There are fewer than 64 RelExpr's, so we can represent any set of
// RelExpr's as a constant bit mask and test for membership with a
// couple cheap bitwise operations.
template <RelExpr... Exprs> bool isRelExprOneOf(RelExpr Expr) {
assert(0 <= Expr && (int)Expr < 64 &&
"RelExpr is too large for 64-bit mask!");
Add `isRelExprOneOf` helper In various places in LLD's hot loops, we have expressions of the form "E == R_FOO || E == R_BAR || ..." (E is a RelExpr). Some of these expressions are quite long, and even though they usually go just a very small number of ways and so should be well predicted, they can still occupy branch predictor resources harming other parts of the code, or they won't be predicted well if they overflow branch predictor resources or if the branches are too dense and the branch predictor can't track them all (the compiler can in theory avoid this, at a cost in text size). And some of these expressions are so large and executed so frequently that even when well-predicted they probably still have a nontrivial cost. This speedup should be pretty portable. The cost of these simple bit tests is independent of: - the target we are linking for - the distribution of RelExpr's for a given link (which can depend on how the input files were compiled) - what compiler was used to compile LLD (it is just a simple bit test; hopefully the compiler gets it right!) - adding new target-dependent relocations (e.g. needsPlt doesn't pay any extra cost checking R_PPC_PLT_OPD on x86-64 builds) I did some rough measurements on clang-fsds and this patch gives over about 4% speedup for a regular -O1 link, about 2.5% for -O3 --gc-sections and over 5% for -O0. Sorry, I don't have my current machine set up for doing really accurate measurements right now. This also is just a bit cleaner. Thanks for Joerg for suggesting for this approach. Differential Revision: https://reviews.llvm.org/D27156 llvm-svn: 288314
2016-12-01 13:43:48 +08:00
return (uint64_t(1) << Expr) & RelExprMaskBuilder<Exprs...>::build();
}
2016-10-21 12:52:11 +08:00
// Architecture-neutral representation of relocation.
struct Relocation {
RelExpr Expr;
RelType Type;
uint64_t Offset;
int64_t Addend;
Symbol *Sym;
};
template <class ELFT> void scanRelocations(InputSectionBase &);
class ThunkSection;
class Thunk;
struct InputSectionDescription;
class ThunkCreator {
public:
// Return true if Thunks have been added to OutputSections
bool createThunks(ArrayRef<OutputSection *> OutputSections);
// The number of completed passes of createThunks this permits us
// to do one time initialization on Pass 0 and put a limit on the
// number of times it can be called to prevent infinite loops.
uint32_t Pass = 0;
private:
void mergeThunks(ArrayRef<OutputSection *> OutputSections);
ThunkSection *getISDThunkSec(OutputSection *OS, InputSection *IS,
InputSectionDescription *ISD, uint32_t Type,
uint64_t Src);
ThunkSection *getISThunkSec(InputSection *IS);
void createInitialThunkSections(ArrayRef<OutputSection *> OutputSections);
void forEachInputSectionDescription(
ArrayRef<OutputSection *> OutputSections,
std::function<void(OutputSection *, InputSectionDescription *)> Fn);
std::pair<Thunk *, bool> getThunk(Symbol &Sym, RelType Type, uint64_t Src);
ThunkSection *addThunkSection(OutputSection *OS, InputSectionDescription *,
uint64_t Off);
bool normalizeExistingThunk(Relocation &Rel, uint64_t Src);
// Record all the available Thunks for a Symbol
llvm::DenseMap<Symbol *, std::vector<Thunk *>> ThunkedSymbols;
// Find a Thunk from the Thunks symbol definition, we can use this to find
// the Thunk from a relocation to the Thunks symbol definition.
llvm::DenseMap<Symbol *, Thunk *> Thunks;
// Track InputSections that have an inline ThunkSection placed in front
// an inline ThunkSection may have control fall through to the section below
// so we need to make sure that there is only one of them.
// The Mips LA25 Thunk is an example of an inline ThunkSection.
llvm::DenseMap<InputSection *, ThunkSection *> ThunkedSections;
};
// Return a int64_t to make sure we get the sign extension out of the way as
// early as possible.
template <class ELFT>
static inline int64_t getAddend(const typename ELFT::Rel &Rel) {
return 0;
}
template <class ELFT>
static inline int64_t getAddend(const typename ELFT::Rela &Rel) {
return Rel.r_addend;
}
} // namespace elf
} // namespace lld
#endif