forked from OSchip/llvm-project
[ELF] - Implemented optimizations for @tlsld and @tlsgd
Implements @tlsld (LD to LE) and @tlsgd (GD to LE) optimizations. Patch does not implement the GD->IE case for @tlsgd. Differential revision: http://reviews.llvm.org/D14870 llvm-svn: 254101
This commit is contained in:
parent
2d0ef14f5d
commit
6713cf8a52
|
@ -98,7 +98,9 @@ void InputSectionBase<ELFT>::relocate(
|
||||||
uint8_t *Buf, uint8_t *BufEnd,
|
uint8_t *Buf, uint8_t *BufEnd,
|
||||||
iterator_range<const Elf_Rel_Impl<ELFT, isRela> *> Rels) {
|
iterator_range<const Elf_Rel_Impl<ELFT, isRela> *> Rels) {
|
||||||
typedef Elf_Rel_Impl<ELFT, isRela> RelType;
|
typedef Elf_Rel_Impl<ELFT, isRela> RelType;
|
||||||
for (const RelType &RI : Rels) {
|
size_t Num = Rels.end() - Rels.begin();
|
||||||
|
for (size_t I = 0; I < Num; ++I) {
|
||||||
|
const RelType &RI = *(Rels.begin() + I);
|
||||||
uint32_t SymIndex = RI.getSymbol(Config->Mips64EL);
|
uint32_t SymIndex = RI.getSymbol(Config->Mips64EL);
|
||||||
uint32_t Type = RI.getType(Config->Mips64EL);
|
uint32_t Type = RI.getType(Config->Mips64EL);
|
||||||
uintX_t Offset = getOffset(RI.r_offset);
|
uintX_t Offset = getOffset(RI.r_offset);
|
||||||
|
@ -108,7 +110,8 @@ void InputSectionBase<ELFT>::relocate(
|
||||||
uint8_t *BufLoc = Buf + Offset;
|
uint8_t *BufLoc = Buf + Offset;
|
||||||
uintX_t AddrLoc = OutSec->getVA() + Offset;
|
uintX_t AddrLoc = OutSec->getVA() + Offset;
|
||||||
|
|
||||||
if (Target->isTlsLocalDynamicReloc(Type)) {
|
if (Target->isTlsLocalDynamicReloc(Type) &&
|
||||||
|
!Target->isTlsOptimized(Type, nullptr)) {
|
||||||
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
|
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
|
||||||
Out<ELFT>::Got->getVA() +
|
Out<ELFT>::Got->getVA() +
|
||||||
Out<ELFT>::LocalModuleTlsIndexOffset +
|
Out<ELFT>::LocalModuleTlsIndexOffset +
|
||||||
|
@ -127,16 +130,20 @@ void InputSectionBase<ELFT>::relocate(
|
||||||
|
|
||||||
SymbolBody &Body = *File->getSymbolBody(SymIndex)->repl();
|
SymbolBody &Body = *File->getSymbolBody(SymIndex)->repl();
|
||||||
|
|
||||||
if (Target->isTlsGlobalDynamicReloc(Type)) {
|
if (Target->isTlsGlobalDynamicReloc(Type) &&
|
||||||
|
!Target->isTlsOptimized(Type, &Body)) {
|
||||||
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
|
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
|
||||||
Out<ELFT>::Got->getEntryAddr(Body) +
|
Out<ELFT>::Got->getEntryAddr(Body) +
|
||||||
getAddend<ELFT>(RI));
|
getAddend<ELFT>(RI));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Target->isTlsOptimized(Type, Body)) {
|
if (Target->isTlsOptimized(Type, &Body)) {
|
||||||
Target->relocateTlsOptimize(BufLoc, BufEnd, AddrLoc,
|
// By optimizing TLS relocations, it is sometimes needed to skip
|
||||||
getSymVA<ELFT>(Body));
|
// relocations that immediately follow TLS relocations. This function
|
||||||
|
// knows how many slots we need to skip.
|
||||||
|
I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc,
|
||||||
|
getSymVA<ELFT>(Body));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -80,9 +80,17 @@ public:
|
||||||
void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
|
void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
|
||||||
uint64_t SA) const override;
|
uint64_t SA) const override;
|
||||||
bool isRelRelative(uint32_t Type) const override;
|
bool isRelRelative(uint32_t Type) const override;
|
||||||
bool isTlsOptimized(unsigned Type, const SymbolBody &S) const override;
|
bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override;
|
||||||
void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
|
||||||
uint64_t SA) const override;
|
uint64_t P, uint64_t SA) const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||||
|
uint64_t SA) const;
|
||||||
|
void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||||
|
uint64_t SA) const;
|
||||||
|
void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||||
|
uint64_t SA) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PPC64TargetInfo final : public TargetInfo {
|
class PPC64TargetInfo final : public TargetInfo {
|
||||||
|
@ -161,7 +169,7 @@ TargetInfo *createTarget() {
|
||||||
|
|
||||||
TargetInfo::~TargetInfo() {}
|
TargetInfo::~TargetInfo() {}
|
||||||
|
|
||||||
bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody &S) const {
|
bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,8 +185,11 @@ unsigned TargetInfo::getPltRefReloc(unsigned Type) const { return PCRelReloc; }
|
||||||
|
|
||||||
bool TargetInfo::isRelRelative(uint32_t Type) const { return true; }
|
bool TargetInfo::isRelRelative(uint32_t Type) const { return true; }
|
||||||
|
|
||||||
void TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
unsigned TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
||||||
uint64_t SA) const {}
|
uint32_t Type, uint64_t P,
|
||||||
|
uint64_t SA) const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
void TargetInfo::writeGotHeaderEntries(uint8_t *Buf) const {}
|
void TargetInfo::writeGotHeaderEntries(uint8_t *Buf) const {}
|
||||||
|
|
||||||
|
@ -364,7 +375,7 @@ bool X86_64TargetInfo::relocNeedsCopy(uint32_t Type,
|
||||||
|
|
||||||
bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
|
bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
|
||||||
if (Type == R_X86_64_GOTTPOFF)
|
if (Type == R_X86_64_GOTTPOFF)
|
||||||
return !isTlsOptimized(Type, S);
|
return !isTlsOptimized(Type, &S);
|
||||||
return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL ||
|
return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL ||
|
||||||
relocNeedsPlt(Type, S);
|
relocNeedsPlt(Type, S);
|
||||||
}
|
}
|
||||||
|
@ -435,10 +446,54 @@ bool X86_64TargetInfo::isRelRelative(uint32_t Type) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool X86_64TargetInfo::isTlsOptimized(unsigned Type,
|
bool X86_64TargetInfo::isTlsOptimized(unsigned Type,
|
||||||
const SymbolBody &S) const {
|
const SymbolBody *S) const {
|
||||||
if (Config->Shared || !S.isTLS())
|
if (Config->Shared || (S && !S->isTLS()))
|
||||||
return false;
|
return false;
|
||||||
return Type == R_X86_64_GOTTPOFF && !canBePreempted(&S, true);
|
return Type == R_X86_64_TLSLD || Type == R_X86_64_DTPOFF32 ||
|
||||||
|
(Type == R_X86_64_TLSGD && !canBePreempted(S, true)) ||
|
||||||
|
(Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
|
||||||
|
// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
|
||||||
|
// how LD can be optimized to LE:
|
||||||
|
// leaq bar@tlsld(%rip), %rdi
|
||||||
|
// callq __tls_get_addr@PLT
|
||||||
|
// leaq bar@dtpoff(%rax), %rcx
|
||||||
|
// Is converted to:
|
||||||
|
// .word 0x6666
|
||||||
|
// .byte 0x66
|
||||||
|
// mov %fs:0,%rax
|
||||||
|
// leaq bar@tpoff(%rax), %rcx
|
||||||
|
void X86_64TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd,
|
||||||
|
uint64_t P, uint64_t SA) const {
|
||||||
|
const uint8_t Inst[] = {
|
||||||
|
0x66, 0x66, //.word 0x6666
|
||||||
|
0x66, //.byte 0x66
|
||||||
|
0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax
|
||||||
|
};
|
||||||
|
memcpy(Loc - 3, Inst, sizeof(Inst));
|
||||||
|
}
|
||||||
|
|
||||||
|
// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
|
||||||
|
// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
|
||||||
|
// how GD can be optimized to LE:
|
||||||
|
// .byte 0x66
|
||||||
|
// leaq x@tlsgd(%rip), %rdi
|
||||||
|
// .word 0x6666
|
||||||
|
// rex64
|
||||||
|
// call __tls_get_addr@plt
|
||||||
|
// Is converted to:
|
||||||
|
// mov %fs:0x0,%rax
|
||||||
|
// lea x@tpoff,%rax
|
||||||
|
void X86_64TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd,
|
||||||
|
uint64_t P, uint64_t SA) const {
|
||||||
|
const uint8_t Inst[] = {
|
||||||
|
0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
|
||||||
|
0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff,%rax
|
||||||
|
};
|
||||||
|
memcpy(Loc - 4, Inst, sizeof(Inst));
|
||||||
|
relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF32, P, SA);
|
||||||
}
|
}
|
||||||
|
|
||||||
// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
|
// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
|
||||||
|
@ -446,8 +501,8 @@ bool X86_64TargetInfo::isTlsOptimized(unsigned Type,
|
||||||
// This function does that. Read "ELF Handling For Thread-Local Storage,
|
// This function does that. Read "ELF Handling For Thread-Local Storage,
|
||||||
// 5.5 x86-x64 linker optimizations" (http://www.akkadia.org/drepper/tls.pdf)
|
// 5.5 x86-x64 linker optimizations" (http://www.akkadia.org/drepper/tls.pdf)
|
||||||
// by Ulrich Drepper for details.
|
// by Ulrich Drepper for details.
|
||||||
void X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
void X86_64TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd,
|
||||||
uint64_t P, uint64_t SA) const {
|
uint64_t P, uint64_t SA) const {
|
||||||
// Ulrich's document section 6.5 says that @gottpoff(%rip) must be
|
// Ulrich's document section 6.5 says that @gottpoff(%rip) must be
|
||||||
// used in MOVQ or ADDQ instructions only.
|
// used in MOVQ or ADDQ instructions only.
|
||||||
// "MOVQ foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVQ $foo, %REG".
|
// "MOVQ foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVQ $foo, %REG".
|
||||||
|
@ -476,6 +531,33 @@ void X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
||||||
relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
|
relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This function applies a TLS relocation with an optimization as described
|
||||||
|
// in the Ulrich's document. As a result of rewriting instructions at the
|
||||||
|
// relocation target, relocations immediately follow the TLS relocation (which
|
||||||
|
// would be applied to rewritten instructions) may have to be skipped.
|
||||||
|
// This function returns a number of relocations that need to be skipped.
|
||||||
|
unsigned X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
||||||
|
uint32_t Type, uint64_t P,
|
||||||
|
uint64_t SA) const {
|
||||||
|
switch (Type) {
|
||||||
|
case R_X86_64_GOTTPOFF:
|
||||||
|
relocateTlsIeToLe(Loc, BufEnd, P, SA);
|
||||||
|
return 0;
|
||||||
|
case R_X86_64_TLSLD:
|
||||||
|
relocateTlsLdToLe(Loc, BufEnd, P, SA);
|
||||||
|
// The next relocation should be against __tls_get_addr, so skip it
|
||||||
|
return 1;
|
||||||
|
case R_X86_64_TLSGD:
|
||||||
|
relocateTlsGdToLe(Loc, BufEnd, P, SA);
|
||||||
|
// The next relocation should be against __tls_get_addr, so skip it
|
||||||
|
return 1;
|
||||||
|
case R_X86_64_DTPOFF32:
|
||||||
|
relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
llvm_unreachable("Unknown TLS optimization");
|
||||||
|
}
|
||||||
|
|
||||||
void X86_64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
|
void X86_64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
|
||||||
uint64_t P, uint64_t SA) const {
|
uint64_t P, uint64_t SA) const {
|
||||||
switch (Type) {
|
switch (Type) {
|
||||||
|
|
|
@ -59,9 +59,10 @@ public:
|
||||||
virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0;
|
virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0;
|
||||||
virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
|
virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
|
||||||
uint64_t P, uint64_t SA) const = 0;
|
uint64_t P, uint64_t SA) const = 0;
|
||||||
virtual bool isTlsOptimized(unsigned Type, const SymbolBody &S) const;
|
virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const;
|
||||||
virtual void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
||||||
uint64_t SA) const;
|
uint32_t Type, uint64_t P,
|
||||||
|
uint64_t SA) const;
|
||||||
virtual ~TargetInfo();
|
virtual ~TargetInfo();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -203,6 +203,8 @@ void Writer<ELFT>::scanRelocs(
|
||||||
uint32_t Type = RI.getType(Config->Mips64EL);
|
uint32_t Type = RI.getType(Config->Mips64EL);
|
||||||
|
|
||||||
if (Target->isTlsLocalDynamicReloc(Type)) {
|
if (Target->isTlsLocalDynamicReloc(Type)) {
|
||||||
|
if (Target->isTlsOptimized(Type, nullptr))
|
||||||
|
continue;
|
||||||
if (Out<ELFT>::LocalModuleTlsIndexOffset == uint32_t(-1)) {
|
if (Out<ELFT>::LocalModuleTlsIndexOffset == uint32_t(-1)) {
|
||||||
Out<ELFT>::LocalModuleTlsIndexOffset =
|
Out<ELFT>::LocalModuleTlsIndexOffset =
|
||||||
Out<ELFT>::Got->addLocalModuleTlsIndex();
|
Out<ELFT>::Got->addLocalModuleTlsIndex();
|
||||||
|
@ -220,6 +222,8 @@ void Writer<ELFT>::scanRelocs(
|
||||||
Body = Body->repl();
|
Body = Body->repl();
|
||||||
|
|
||||||
if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) {
|
if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) {
|
||||||
|
if (Target->isTlsOptimized(Type, Body))
|
||||||
|
continue;
|
||||||
if (Body->isInGot())
|
if (Body->isInGot())
|
||||||
continue;
|
continue;
|
||||||
Out<ELFT>::Got->addDynTlsEntry(Body);
|
Out<ELFT>::Got->addDynTlsEntry(Body);
|
||||||
|
|
|
@ -20,12 +20,21 @@
|
||||||
// DISASM-NEXT: 1103f: 4d 8d bf fc ff ff ff leaq -4(%r15), %r15
|
// DISASM-NEXT: 1103f: 4d 8d bf fc ff ff ff leaq -4(%r15), %r15
|
||||||
// DISASM-NEXT: 11046: 48 81 c4 fc ff ff ff addq $-4, %rsp
|
// DISASM-NEXT: 11046: 48 81 c4 fc ff ff ff addq $-4, %rsp
|
||||||
// DISASM-NEXT: 1104d: 49 81 c4 fc ff ff ff addq $-4, %r12
|
// DISASM-NEXT: 1104d: 49 81 c4 fc ff ff ff addq $-4, %r12
|
||||||
|
|
||||||
// Corrupred output:
|
// Corrupred output:
|
||||||
// DISASM-NEXT: 11054: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
|
// DISASM-NEXT: 11054: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
|
||||||
// DISASM-NEXT: 1105b: 48 d1 81 c4 f8 ff ff rolq -1852(%rcx)
|
// DISASM-NEXT: 1105b: 48 d1 81 c4 f8 ff ff rolq -1852(%rcx)
|
||||||
// DISASM-NEXT: 11062: ff 48 d1 decl -47(%rax)
|
// DISASM-NEXT: 11062: ff 48 d1 decl -47(%rax)
|
||||||
// DISASM-NEXT: 11065: 81 c4 f8 ff ff ff addl $4294967288, %esp
|
// DISASM-NEXT: 11065: 81 c4 f8 ff ff ff addl $4294967288, %esp
|
||||||
|
// LD to LE:
|
||||||
|
// DISASM-NEXT: 1106b: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
|
||||||
|
// DISASM-NEXT: 11077: 48 8d 88 f8 ff ff ff leaq -8(%rax), %rcx
|
||||||
|
// DISASM-NEXT: 1107e: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
|
||||||
|
// DISASM-NEXT: 1108a: 48 8d 88 fc ff ff ff leaq -4(%rax), %rcx
|
||||||
|
// GD to LE:
|
||||||
|
// DISASM-NEXT: 11091: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
|
||||||
|
// DISASM-NEXT: 1109a: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
|
||||||
|
// DISASM-NEXT: 110a1: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
|
||||||
|
// DISASM-NEXT: 110aa: 48 8d 80 fc ff ff ff leaq -4(%rax), %rax
|
||||||
|
|
||||||
.type tls0,@object
|
.type tls0,@object
|
||||||
.section .tbss,"awT",@nobits
|
.section .tbss,"awT",@nobits
|
||||||
|
@ -62,3 +71,23 @@ _start:
|
||||||
xchgq tls0@gottpoff(%rip),%rax
|
xchgq tls0@gottpoff(%rip),%rax
|
||||||
shlq tls0@gottpoff
|
shlq tls0@gottpoff
|
||||||
rolq tls0@gottpoff
|
rolq tls0@gottpoff
|
||||||
|
|
||||||
|
//LD to LE:
|
||||||
|
leaq tls0@tlsld(%rip), %rdi
|
||||||
|
callq __tls_get_addr@PLT
|
||||||
|
leaq tls0@dtpoff(%rax),%rcx
|
||||||
|
leaq tls1@tlsld(%rip), %rdi
|
||||||
|
callq __tls_get_addr@PLT
|
||||||
|
leaq tls1@dtpoff(%rax),%rcx
|
||||||
|
|
||||||
|
//GD to LE:
|
||||||
|
.byte 0x66
|
||||||
|
leaq tls0@tlsgd(%rip),%rdi
|
||||||
|
.word 0x6666
|
||||||
|
rex64
|
||||||
|
call __tls_get_addr@plt
|
||||||
|
.byte 0x66
|
||||||
|
leaq tls1@tlsgd(%rip),%rdi
|
||||||
|
.word 0x6666
|
||||||
|
rex64
|
||||||
|
call __tls_get_addr@plt
|
||||||
|
|
Loading…
Reference in New Issue