[ELF] - Implemented optimizations for @tlsld and @tlsgd

Implements @tlsld (LD to LE) and @tlsgd (GD to LE) optimizations.
Patch does not implement the GD->IE case for @tlsgd.

Differential revision: http://reviews.llvm.org/D14870

llvm-svn: 254101
This commit is contained in:
George Rimar 2015-11-25 21:46:05 +00:00
parent 2d0ef14f5d
commit 6713cf8a52
5 changed files with 145 additions and 22 deletions

View File

@ -98,7 +98,9 @@ void InputSectionBase<ELFT>::relocate(
uint8_t *Buf, uint8_t *BufEnd, uint8_t *Buf, uint8_t *BufEnd,
iterator_range<const Elf_Rel_Impl<ELFT, isRela> *> Rels) { iterator_range<const Elf_Rel_Impl<ELFT, isRela> *> Rels) {
typedef Elf_Rel_Impl<ELFT, isRela> RelType; typedef Elf_Rel_Impl<ELFT, isRela> RelType;
for (const RelType &RI : Rels) { size_t Num = Rels.end() - Rels.begin();
for (size_t I = 0; I < Num; ++I) {
const RelType &RI = *(Rels.begin() + I);
uint32_t SymIndex = RI.getSymbol(Config->Mips64EL); uint32_t SymIndex = RI.getSymbol(Config->Mips64EL);
uint32_t Type = RI.getType(Config->Mips64EL); uint32_t Type = RI.getType(Config->Mips64EL);
uintX_t Offset = getOffset(RI.r_offset); uintX_t Offset = getOffset(RI.r_offset);
@ -108,7 +110,8 @@ void InputSectionBase<ELFT>::relocate(
uint8_t *BufLoc = Buf + Offset; uint8_t *BufLoc = Buf + Offset;
uintX_t AddrLoc = OutSec->getVA() + Offset; uintX_t AddrLoc = OutSec->getVA() + Offset;
if (Target->isTlsLocalDynamicReloc(Type)) { if (Target->isTlsLocalDynamicReloc(Type) &&
!Target->isTlsOptimized(Type, nullptr)) {
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc, Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
Out<ELFT>::Got->getVA() + Out<ELFT>::Got->getVA() +
Out<ELFT>::LocalModuleTlsIndexOffset + Out<ELFT>::LocalModuleTlsIndexOffset +
@ -127,16 +130,20 @@ void InputSectionBase<ELFT>::relocate(
SymbolBody &Body = *File->getSymbolBody(SymIndex)->repl(); SymbolBody &Body = *File->getSymbolBody(SymIndex)->repl();
if (Target->isTlsGlobalDynamicReloc(Type)) { if (Target->isTlsGlobalDynamicReloc(Type) &&
!Target->isTlsOptimized(Type, &Body)) {
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc, Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
Out<ELFT>::Got->getEntryAddr(Body) + Out<ELFT>::Got->getEntryAddr(Body) +
getAddend<ELFT>(RI)); getAddend<ELFT>(RI));
continue; continue;
} }
if (Target->isTlsOptimized(Type, Body)) { if (Target->isTlsOptimized(Type, &Body)) {
Target->relocateTlsOptimize(BufLoc, BufEnd, AddrLoc, // By optimizing TLS relocations, it is sometimes needed to skip
getSymVA<ELFT>(Body)); // relocations that immediately follow TLS relocations. This function
// knows how many slots we need to skip.
I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc,
getSymVA<ELFT>(Body));
continue; continue;
} }

View File

@ -80,9 +80,17 @@ public:
void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
uint64_t SA) const override; uint64_t SA) const override;
bool isRelRelative(uint32_t Type) const override; bool isRelRelative(uint32_t Type) const override;
bool isTlsOptimized(unsigned Type, const SymbolBody &S) const override; bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override;
void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
uint64_t SA) const override; uint64_t P, uint64_t SA) const override;
private:
void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const;
void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const;
void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const;
}; };
class PPC64TargetInfo final : public TargetInfo { class PPC64TargetInfo final : public TargetInfo {
@ -161,7 +169,7 @@ TargetInfo *createTarget() {
TargetInfo::~TargetInfo() {} TargetInfo::~TargetInfo() {}
bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody &S) const { bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const {
return false; return false;
} }
@ -177,8 +185,11 @@ unsigned TargetInfo::getPltRefReloc(unsigned Type) const { return PCRelReloc; }
bool TargetInfo::isRelRelative(uint32_t Type) const { return true; } bool TargetInfo::isRelRelative(uint32_t Type) const { return true; }
void TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, unsigned TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
uint64_t SA) const {} uint32_t Type, uint64_t P,
uint64_t SA) const {
return 0;
}
void TargetInfo::writeGotHeaderEntries(uint8_t *Buf) const {} void TargetInfo::writeGotHeaderEntries(uint8_t *Buf) const {}
@ -364,7 +375,7 @@ bool X86_64TargetInfo::relocNeedsCopy(uint32_t Type,
bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
if (Type == R_X86_64_GOTTPOFF) if (Type == R_X86_64_GOTTPOFF)
return !isTlsOptimized(Type, S); return !isTlsOptimized(Type, &S);
return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL || return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL ||
relocNeedsPlt(Type, S); relocNeedsPlt(Type, S);
} }
@ -435,10 +446,54 @@ bool X86_64TargetInfo::isRelRelative(uint32_t Type) const {
} }
bool X86_64TargetInfo::isTlsOptimized(unsigned Type, bool X86_64TargetInfo::isTlsOptimized(unsigned Type,
const SymbolBody &S) const { const SymbolBody *S) const {
if (Config->Shared || !S.isTLS()) if (Config->Shared || (S && !S->isTLS()))
return false; return false;
return Type == R_X86_64_GOTTPOFF && !canBePreempted(&S, true); return Type == R_X86_64_TLSLD || Type == R_X86_64_DTPOFF32 ||
(Type == R_X86_64_TLSGD && !canBePreempted(S, true)) ||
(Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true));
}
// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
// how LD can be optimized to LE:
// leaq bar@tlsld(%rip), %rdi
// callq __tls_get_addr@PLT
// leaq bar@dtpoff(%rax), %rcx
// Is converted to:
// .word 0x6666
// .byte 0x66
// mov %fs:0,%rax
// leaq bar@tpoff(%rax), %rcx
void X86_64TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd,
uint64_t P, uint64_t SA) const {
const uint8_t Inst[] = {
0x66, 0x66, //.word 0x6666
0x66, //.byte 0x66
0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax
};
memcpy(Loc - 3, Inst, sizeof(Inst));
}
// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
// how GD can be optimized to LE:
// .byte 0x66
// leaq x@tlsgd(%rip), %rdi
// .word 0x6666
// rex64
// call __tls_get_addr@plt
// Is converted to:
// mov %fs:0x0,%rax
// lea x@tpoff,%rax
void X86_64TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd,
uint64_t P, uint64_t SA) const {
const uint8_t Inst[] = {
0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff,%rax
};
memcpy(Loc - 4, Inst, sizeof(Inst));
relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF32, P, SA);
} }
// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
@ -446,8 +501,8 @@ bool X86_64TargetInfo::isTlsOptimized(unsigned Type,
// This function does that. Read "ELF Handling For Thread-Local Storage, // This function does that. Read "ELF Handling For Thread-Local Storage,
// 5.5 x86-x64 linker optimizations" (http://www.akkadia.org/drepper/tls.pdf) // 5.5 x86-x64 linker optimizations" (http://www.akkadia.org/drepper/tls.pdf)
// by Ulrich Drepper for details. // by Ulrich Drepper for details.
void X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, void X86_64TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd,
uint64_t P, uint64_t SA) const { uint64_t P, uint64_t SA) const {
// Ulrich's document section 6.5 says that @gottpoff(%rip) must be // Ulrich's document section 6.5 says that @gottpoff(%rip) must be
// used in MOVQ or ADDQ instructions only. // used in MOVQ or ADDQ instructions only.
// "MOVQ foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVQ $foo, %REG". // "MOVQ foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVQ $foo, %REG".
@ -476,6 +531,33 @@ void X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA); relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
} }
// This function applies a TLS relocation with an optimization as described
// in the Ulrich's document. As a result of rewriting instructions at the
// relocation target, relocations immediately follow the TLS relocation (which
// would be applied to rewritten instructions) may have to be skipped.
// This function returns a number of relocations that need to be skipped.
unsigned X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
uint32_t Type, uint64_t P,
uint64_t SA) const {
switch (Type) {
case R_X86_64_GOTTPOFF:
relocateTlsIeToLe(Loc, BufEnd, P, SA);
return 0;
case R_X86_64_TLSLD:
relocateTlsLdToLe(Loc, BufEnd, P, SA);
// The next relocation should be against __tls_get_addr, so skip it
return 1;
case R_X86_64_TLSGD:
relocateTlsGdToLe(Loc, BufEnd, P, SA);
// The next relocation should be against __tls_get_addr, so skip it
return 1;
case R_X86_64_DTPOFF32:
relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
return 0;
}
llvm_unreachable("Unknown TLS optimization");
}
void X86_64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, void X86_64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
uint64_t P, uint64_t SA) const { uint64_t P, uint64_t SA) const {
switch (Type) { switch (Type) {

View File

@ -59,9 +59,10 @@ public:
virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0; virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0;
virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
uint64_t P, uint64_t SA) const = 0; uint64_t P, uint64_t SA) const = 0;
virtual bool isTlsOptimized(unsigned Type, const SymbolBody &S) const; virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const;
virtual void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P, virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
uint64_t SA) const; uint32_t Type, uint64_t P,
uint64_t SA) const;
virtual ~TargetInfo(); virtual ~TargetInfo();
protected: protected:

View File

@ -203,6 +203,8 @@ void Writer<ELFT>::scanRelocs(
uint32_t Type = RI.getType(Config->Mips64EL); uint32_t Type = RI.getType(Config->Mips64EL);
if (Target->isTlsLocalDynamicReloc(Type)) { if (Target->isTlsLocalDynamicReloc(Type)) {
if (Target->isTlsOptimized(Type, nullptr))
continue;
if (Out<ELFT>::LocalModuleTlsIndexOffset == uint32_t(-1)) { if (Out<ELFT>::LocalModuleTlsIndexOffset == uint32_t(-1)) {
Out<ELFT>::LocalModuleTlsIndexOffset = Out<ELFT>::LocalModuleTlsIndexOffset =
Out<ELFT>::Got->addLocalModuleTlsIndex(); Out<ELFT>::Got->addLocalModuleTlsIndex();
@ -220,6 +222,8 @@ void Writer<ELFT>::scanRelocs(
Body = Body->repl(); Body = Body->repl();
if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) { if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) {
if (Target->isTlsOptimized(Type, Body))
continue;
if (Body->isInGot()) if (Body->isInGot())
continue; continue;
Out<ELFT>::Got->addDynTlsEntry(Body); Out<ELFT>::Got->addDynTlsEntry(Body);

View File

@ -20,12 +20,21 @@
// DISASM-NEXT: 1103f: 4d 8d bf fc ff ff ff leaq -4(%r15), %r15 // DISASM-NEXT: 1103f: 4d 8d bf fc ff ff ff leaq -4(%r15), %r15
// DISASM-NEXT: 11046: 48 81 c4 fc ff ff ff addq $-4, %rsp // DISASM-NEXT: 11046: 48 81 c4 fc ff ff ff addq $-4, %rsp
// DISASM-NEXT: 1104d: 49 81 c4 fc ff ff ff addq $-4, %r12 // DISASM-NEXT: 1104d: 49 81 c4 fc ff ff ff addq $-4, %r12
// Corrupred output: // Corrupred output:
// DISASM-NEXT: 11054: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax // DISASM-NEXT: 11054: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
// DISASM-NEXT: 1105b: 48 d1 81 c4 f8 ff ff rolq -1852(%rcx) // DISASM-NEXT: 1105b: 48 d1 81 c4 f8 ff ff rolq -1852(%rcx)
// DISASM-NEXT: 11062: ff 48 d1 decl -47(%rax) // DISASM-NEXT: 11062: ff 48 d1 decl -47(%rax)
// DISASM-NEXT: 11065: 81 c4 f8 ff ff ff addl $4294967288, %esp // DISASM-NEXT: 11065: 81 c4 f8 ff ff ff addl $4294967288, %esp
// LD to LE:
// DISASM-NEXT: 1106b: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
// DISASM-NEXT: 11077: 48 8d 88 f8 ff ff ff leaq -8(%rax), %rcx
// DISASM-NEXT: 1107e: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
// DISASM-NEXT: 1108a: 48 8d 88 fc ff ff ff leaq -4(%rax), %rcx
// GD to LE:
// DISASM-NEXT: 11091: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
// DISASM-NEXT: 1109a: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
// DISASM-NEXT: 110a1: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
// DISASM-NEXT: 110aa: 48 8d 80 fc ff ff ff leaq -4(%rax), %rax
.type tls0,@object .type tls0,@object
.section .tbss,"awT",@nobits .section .tbss,"awT",@nobits
@ -62,3 +71,23 @@ _start:
xchgq tls0@gottpoff(%rip),%rax xchgq tls0@gottpoff(%rip),%rax
shlq tls0@gottpoff shlq tls0@gottpoff
rolq tls0@gottpoff rolq tls0@gottpoff
//LD to LE:
leaq tls0@tlsld(%rip), %rdi
callq __tls_get_addr@PLT
leaq tls0@dtpoff(%rax),%rcx
leaq tls1@tlsld(%rip), %rdi
callq __tls_get_addr@PLT
leaq tls1@dtpoff(%rax),%rcx
//GD to LE:
.byte 0x66
leaq tls0@tlsgd(%rip),%rdi
.word 0x6666
rex64
call __tls_get_addr@plt
.byte 0x66
leaq tls1@tlsgd(%rip),%rdi
.word 0x6666
rex64
call __tls_get_addr@plt