forked from OSchip/llvm-project
[ELF] - Implemented optimizations for @tlsld and @tlsgd
Implements @tlsld (LD to LE) and @tlsgd (GD to LE) optimizations. Patch does not implement the GD->IE case for @tlsgd. Differential revision: http://reviews.llvm.org/D14870 llvm-svn: 254101
This commit is contained in:
parent
2d0ef14f5d
commit
6713cf8a52
|
@ -98,7 +98,9 @@ void InputSectionBase<ELFT>::relocate(
|
|||
uint8_t *Buf, uint8_t *BufEnd,
|
||||
iterator_range<const Elf_Rel_Impl<ELFT, isRela> *> Rels) {
|
||||
typedef Elf_Rel_Impl<ELFT, isRela> RelType;
|
||||
for (const RelType &RI : Rels) {
|
||||
size_t Num = Rels.end() - Rels.begin();
|
||||
for (size_t I = 0; I < Num; ++I) {
|
||||
const RelType &RI = *(Rels.begin() + I);
|
||||
uint32_t SymIndex = RI.getSymbol(Config->Mips64EL);
|
||||
uint32_t Type = RI.getType(Config->Mips64EL);
|
||||
uintX_t Offset = getOffset(RI.r_offset);
|
||||
|
@ -108,7 +110,8 @@ void InputSectionBase<ELFT>::relocate(
|
|||
uint8_t *BufLoc = Buf + Offset;
|
||||
uintX_t AddrLoc = OutSec->getVA() + Offset;
|
||||
|
||||
if (Target->isTlsLocalDynamicReloc(Type)) {
|
||||
if (Target->isTlsLocalDynamicReloc(Type) &&
|
||||
!Target->isTlsOptimized(Type, nullptr)) {
|
||||
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
|
||||
Out<ELFT>::Got->getVA() +
|
||||
Out<ELFT>::LocalModuleTlsIndexOffset +
|
||||
|
@ -127,16 +130,20 @@ void InputSectionBase<ELFT>::relocate(
|
|||
|
||||
SymbolBody &Body = *File->getSymbolBody(SymIndex)->repl();
|
||||
|
||||
if (Target->isTlsGlobalDynamicReloc(Type)) {
|
||||
if (Target->isTlsGlobalDynamicReloc(Type) &&
|
||||
!Target->isTlsOptimized(Type, &Body)) {
|
||||
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
|
||||
Out<ELFT>::Got->getEntryAddr(Body) +
|
||||
getAddend<ELFT>(RI));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Target->isTlsOptimized(Type, Body)) {
|
||||
Target->relocateTlsOptimize(BufLoc, BufEnd, AddrLoc,
|
||||
getSymVA<ELFT>(Body));
|
||||
if (Target->isTlsOptimized(Type, &Body)) {
|
||||
// By optimizing TLS relocations, it is sometimes needed to skip
|
||||
// relocations that immediately follow TLS relocations. This function
|
||||
// knows how many slots we need to skip.
|
||||
I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc,
|
||||
getSymVA<ELFT>(Body));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -80,9 +80,17 @@ public:
|
|||
void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
|
||||
uint64_t SA) const override;
|
||||
bool isRelRelative(uint32_t Type) const override;
|
||||
bool isTlsOptimized(unsigned Type, const SymbolBody &S) const override;
|
||||
void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const override;
|
||||
bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override;
|
||||
unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
|
||||
uint64_t P, uint64_t SA) const override;
|
||||
|
||||
private:
|
||||
void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const;
|
||||
void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const;
|
||||
void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const;
|
||||
};
|
||||
|
||||
class PPC64TargetInfo final : public TargetInfo {
|
||||
|
@ -161,7 +169,7 @@ TargetInfo *createTarget() {
|
|||
|
||||
TargetInfo::~TargetInfo() {}
|
||||
|
||||
bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody &S) const {
|
||||
bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -177,8 +185,11 @@ unsigned TargetInfo::getPltRefReloc(unsigned Type) const { return PCRelReloc; }
|
|||
|
||||
bool TargetInfo::isRelRelative(uint32_t Type) const { return true; }
|
||||
|
||||
void TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const {}
|
||||
unsigned TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
||||
uint32_t Type, uint64_t P,
|
||||
uint64_t SA) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void TargetInfo::writeGotHeaderEntries(uint8_t *Buf) const {}
|
||||
|
||||
|
@ -364,7 +375,7 @@ bool X86_64TargetInfo::relocNeedsCopy(uint32_t Type,
|
|||
|
||||
bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
|
||||
if (Type == R_X86_64_GOTTPOFF)
|
||||
return !isTlsOptimized(Type, S);
|
||||
return !isTlsOptimized(Type, &S);
|
||||
return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL ||
|
||||
relocNeedsPlt(Type, S);
|
||||
}
|
||||
|
@ -435,10 +446,54 @@ bool X86_64TargetInfo::isRelRelative(uint32_t Type) const {
|
|||
}
|
||||
|
||||
bool X86_64TargetInfo::isTlsOptimized(unsigned Type,
|
||||
const SymbolBody &S) const {
|
||||
if (Config->Shared || !S.isTLS())
|
||||
const SymbolBody *S) const {
|
||||
if (Config->Shared || (S && !S->isTLS()))
|
||||
return false;
|
||||
return Type == R_X86_64_GOTTPOFF && !canBePreempted(&S, true);
|
||||
return Type == R_X86_64_TLSLD || Type == R_X86_64_DTPOFF32 ||
|
||||
(Type == R_X86_64_TLSGD && !canBePreempted(S, true)) ||
|
||||
(Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true));
|
||||
}
|
||||
|
||||
// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
|
||||
// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
|
||||
// how LD can be optimized to LE:
|
||||
// leaq bar@tlsld(%rip), %rdi
|
||||
// callq __tls_get_addr@PLT
|
||||
// leaq bar@dtpoff(%rax), %rcx
|
||||
// Is converted to:
|
||||
// .word 0x6666
|
||||
// .byte 0x66
|
||||
// mov %fs:0,%rax
|
||||
// leaq bar@tpoff(%rax), %rcx
|
||||
void X86_64TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd,
|
||||
uint64_t P, uint64_t SA) const {
|
||||
const uint8_t Inst[] = {
|
||||
0x66, 0x66, //.word 0x6666
|
||||
0x66, //.byte 0x66
|
||||
0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax
|
||||
};
|
||||
memcpy(Loc - 3, Inst, sizeof(Inst));
|
||||
}
|
||||
|
||||
// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
|
||||
// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
|
||||
// how GD can be optimized to LE:
|
||||
// .byte 0x66
|
||||
// leaq x@tlsgd(%rip), %rdi
|
||||
// .word 0x6666
|
||||
// rex64
|
||||
// call __tls_get_addr@plt
|
||||
// Is converted to:
|
||||
// mov %fs:0x0,%rax
|
||||
// lea x@tpoff,%rax
|
||||
void X86_64TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd,
|
||||
uint64_t P, uint64_t SA) const {
|
||||
const uint8_t Inst[] = {
|
||||
0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
|
||||
0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff,%rax
|
||||
};
|
||||
memcpy(Loc - 4, Inst, sizeof(Inst));
|
||||
relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF32, P, SA);
|
||||
}
|
||||
|
||||
// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
|
||||
|
@ -446,8 +501,8 @@ bool X86_64TargetInfo::isTlsOptimized(unsigned Type,
|
|||
// This function does that. Read "ELF Handling For Thread-Local Storage,
|
||||
// 5.5 x86-x64 linker optimizations" (http://www.akkadia.org/drepper/tls.pdf)
|
||||
// by Ulrich Drepper for details.
|
||||
void X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
||||
uint64_t P, uint64_t SA) const {
|
||||
void X86_64TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd,
|
||||
uint64_t P, uint64_t SA) const {
|
||||
// Ulrich's document section 6.5 says that @gottpoff(%rip) must be
|
||||
// used in MOVQ or ADDQ instructions only.
|
||||
// "MOVQ foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVQ $foo, %REG".
|
||||
|
@ -476,6 +531,33 @@ void X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
|||
relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
|
||||
}
|
||||
|
||||
// This function applies a TLS relocation with an optimization as described
|
||||
// in the Ulrich's document. As a result of rewriting instructions at the
|
||||
// relocation target, relocations immediately follow the TLS relocation (which
|
||||
// would be applied to rewritten instructions) may have to be skipped.
|
||||
// This function returns a number of relocations that need to be skipped.
|
||||
unsigned X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
||||
uint32_t Type, uint64_t P,
|
||||
uint64_t SA) const {
|
||||
switch (Type) {
|
||||
case R_X86_64_GOTTPOFF:
|
||||
relocateTlsIeToLe(Loc, BufEnd, P, SA);
|
||||
return 0;
|
||||
case R_X86_64_TLSLD:
|
||||
relocateTlsLdToLe(Loc, BufEnd, P, SA);
|
||||
// The next relocation should be against __tls_get_addr, so skip it
|
||||
return 1;
|
||||
case R_X86_64_TLSGD:
|
||||
relocateTlsGdToLe(Loc, BufEnd, P, SA);
|
||||
// The next relocation should be against __tls_get_addr, so skip it
|
||||
return 1;
|
||||
case R_X86_64_DTPOFF32:
|
||||
relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
|
||||
return 0;
|
||||
}
|
||||
llvm_unreachable("Unknown TLS optimization");
|
||||
}
|
||||
|
||||
void X86_64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
|
||||
uint64_t P, uint64_t SA) const {
|
||||
switch (Type) {
|
||||
|
|
|
@ -59,9 +59,10 @@ public:
|
|||
virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0;
|
||||
virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
|
||||
uint64_t P, uint64_t SA) const = 0;
|
||||
virtual bool isTlsOptimized(unsigned Type, const SymbolBody &S) const;
|
||||
virtual void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const;
|
||||
virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const;
|
||||
virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
||||
uint32_t Type, uint64_t P,
|
||||
uint64_t SA) const;
|
||||
virtual ~TargetInfo();
|
||||
|
||||
protected:
|
||||
|
|
|
@ -203,6 +203,8 @@ void Writer<ELFT>::scanRelocs(
|
|||
uint32_t Type = RI.getType(Config->Mips64EL);
|
||||
|
||||
if (Target->isTlsLocalDynamicReloc(Type)) {
|
||||
if (Target->isTlsOptimized(Type, nullptr))
|
||||
continue;
|
||||
if (Out<ELFT>::LocalModuleTlsIndexOffset == uint32_t(-1)) {
|
||||
Out<ELFT>::LocalModuleTlsIndexOffset =
|
||||
Out<ELFT>::Got->addLocalModuleTlsIndex();
|
||||
|
@ -220,6 +222,8 @@ void Writer<ELFT>::scanRelocs(
|
|||
Body = Body->repl();
|
||||
|
||||
if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) {
|
||||
if (Target->isTlsOptimized(Type, Body))
|
||||
continue;
|
||||
if (Body->isInGot())
|
||||
continue;
|
||||
Out<ELFT>::Got->addDynTlsEntry(Body);
|
||||
|
|
|
@ -20,12 +20,21 @@
|
|||
// DISASM-NEXT: 1103f: 4d 8d bf fc ff ff ff leaq -4(%r15), %r15
|
||||
// DISASM-NEXT: 11046: 48 81 c4 fc ff ff ff addq $-4, %rsp
|
||||
// DISASM-NEXT: 1104d: 49 81 c4 fc ff ff ff addq $-4, %r12
|
||||
|
||||
// Corrupred output:
|
||||
// DISASM-NEXT: 11054: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
|
||||
// DISASM-NEXT: 1105b: 48 d1 81 c4 f8 ff ff rolq -1852(%rcx)
|
||||
// DISASM-NEXT: 11062: ff 48 d1 decl -47(%rax)
|
||||
// DISASM-NEXT: 11065: 81 c4 f8 ff ff ff addl $4294967288, %esp
|
||||
// LD to LE:
|
||||
// DISASM-NEXT: 1106b: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
|
||||
// DISASM-NEXT: 11077: 48 8d 88 f8 ff ff ff leaq -8(%rax), %rcx
|
||||
// DISASM-NEXT: 1107e: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
|
||||
// DISASM-NEXT: 1108a: 48 8d 88 fc ff ff ff leaq -4(%rax), %rcx
|
||||
// GD to LE:
|
||||
// DISASM-NEXT: 11091: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
|
||||
// DISASM-NEXT: 1109a: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
|
||||
// DISASM-NEXT: 110a1: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
|
||||
// DISASM-NEXT: 110aa: 48 8d 80 fc ff ff ff leaq -4(%rax), %rax
|
||||
|
||||
.type tls0,@object
|
||||
.section .tbss,"awT",@nobits
|
||||
|
@ -62,3 +71,23 @@ _start:
|
|||
xchgq tls0@gottpoff(%rip),%rax
|
||||
shlq tls0@gottpoff
|
||||
rolq tls0@gottpoff
|
||||
|
||||
//LD to LE:
|
||||
leaq tls0@tlsld(%rip), %rdi
|
||||
callq __tls_get_addr@PLT
|
||||
leaq tls0@dtpoff(%rax),%rcx
|
||||
leaq tls1@tlsld(%rip), %rdi
|
||||
callq __tls_get_addr@PLT
|
||||
leaq tls1@dtpoff(%rax),%rcx
|
||||
|
||||
//GD to LE:
|
||||
.byte 0x66
|
||||
leaq tls0@tlsgd(%rip),%rdi
|
||||
.word 0x6666
|
||||
rex64
|
||||
call __tls_get_addr@plt
|
||||
.byte 0x66
|
||||
leaq tls1@tlsgd(%rip),%rdi
|
||||
.word 0x6666
|
||||
rex64
|
||||
call __tls_get_addr@plt
|
||||
|
|
Loading…
Reference in New Issue