[ELF] - Implement the TLS relocation optimization for 32-bit x86.

Implement the TLS relocation optimization for 32-bit x86 that is described in
"ELF Handling For Thread-Local Storage" by Ulrich Drepper, chapter 5,
"IA-32 Linker Optimizations". Specifically, this patch implements these
optimizations: LD->LE, GD->IE, GD->LD, and IE->LE.

Differential revision: http://reviews.llvm.org/D15292

llvm-svn: 255103
This commit is contained in:
George Rimar 2015-12-09 09:55:54 +00:00
parent 9938425b31
commit 2558e12bac
4 changed files with 283 additions and 3 deletions

View File

@ -88,6 +88,20 @@ public:
bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override; bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override;
void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
uint64_t SA, uint8_t *PairedLoc = nullptr) const override; uint64_t SA, uint8_t *PairedLoc = nullptr) const override;
bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override;
unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
uint64_t P, uint64_t SA,
const SymbolBody &S) const override;
private:
void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const;
void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const;
void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const;
void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const;
}; };
class X86_64TargetInfo final : public TargetInfo { class X86_64TargetInfo final : public TargetInfo {
@ -260,7 +274,7 @@ bool X86TargetInfo::isTlsDynReloc(unsigned Type) const {
if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 || if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 ||
Type == R_386_TLS_GOTIE) Type == R_386_TLS_GOTIE)
return Config->Shared; return Config->Shared;
return false; return Type == R_386_TLS_GD;
} }
void X86TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, void X86TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
@ -311,8 +325,11 @@ bool X86TargetInfo::relocNeedsCopy(uint32_t Type, const SymbolBody &S) const {
} }
bool X86TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { bool X86TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
return Type == R_386_TLS_GOTIE || Type == R_386_GOT32 || if (S.isTLS() && Type == R_386_TLS_GD)
relocNeedsPlt(Type, S); return Target->isTlsOptimized(Type, &S) && canBePreempted(&S, true);
if (Type == R_386_TLS_GOTIE)
return !isTlsOptimized(Type, &S);
return Type == R_386_GOT32 || relocNeedsPlt(Type, S);
} }
bool X86TargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const { bool X86TargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const {
@ -358,6 +375,121 @@ void X86TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
} }
} }
bool X86TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const {
if (Config->Shared || (S && !S->isTLS()))
return false;
return Type == R_386_TLS_LDO_32 || Type == R_386_TLS_LDM ||
Type == R_386_TLS_GD ||
(Type == R_386_TLS_GOTIE && !canBePreempted(S, true));
}
unsigned X86TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
uint32_t Type, uint64_t P,
uint64_t SA,
const SymbolBody &S) const {
switch (Type) {
case R_386_TLS_GD:
if (canBePreempted(&S, true))
relocateTlsGdToIe(Loc, BufEnd, P, SA);
else
relocateTlsGdToLe(Loc, BufEnd, P, SA);
// The next relocation should be against __tls_get_addr, so skip it
return 1;
case R_386_TLS_GOTIE:
relocateTlsIeToLe(Loc, BufEnd, P, SA);
return 0;
case R_386_TLS_LDM:
relocateTlsLdToLe(Loc, BufEnd, P, SA);
// The next relocation should be against __tls_get_addr, so skip it
return 1;
case R_386_TLS_LDO_32:
relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA);
return 0;
}
llvm_unreachable("Unknown TLS optimization");
}
// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.1
// IA-32 Linker Optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
// how GD can be optimized to IE:
// leal x@tlsgd(, %ebx, 1),
// call __tls_get_addr@plt
// Is converted to:
// movl %gs:0, %eax
// addl x@gotntpoff(%ebx), %eax
void X86TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const {
const uint8_t Inst[] = {
0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
0x03, 0x83, 0x00, 0x00, 0x00, 0x00 // addl 0(%ebx), %eax
};
memcpy(Loc - 3, Inst, sizeof(Inst));
relocateOne(Loc + 5, BufEnd, R_386_32, P,
SA - Out<ELF32LE>::Got->getVA() -
Out<ELF32LE>::Got->getNumEntries() * 4);
}
// GD can be optimized to LE:
// leal x@tlsgd(, %ebx, 1),
// call __tls_get_addr@plt
// Can be converted to:
// movl %gs:0,%eax
// addl $x@ntpoff,%eax
// But gold emits subl $foo@tpoff,%eax instead of addl.
// These instructions are completely equal in behavior.
// This method generates subl to be consistent with gold.
void X86TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const {
const uint8_t Inst[] = {
0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 // subl 0(%ebx), %eax
};
memcpy(Loc - 3, Inst, sizeof(Inst));
relocateOne(Loc + 5, BufEnd, R_386_32, P,
Out<ELF32LE>::TlsPhdr->p_memsz - SA);
}
// LD can be optimized to LE:
// leal foo(%reg),%eax
// call ___tls_get_addr
// Is converted to:
// movl %gs:0,%eax
// nop
// leal 0(%esi,1),%esi
void X86TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const {
const uint8_t Inst[] = {
0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
0x90, // nop
0x8d, 0x74, 0x26, 0x00 // leal 0(%esi,1),%esi
};
memcpy(Loc - 2, Inst, sizeof(Inst));
}
// In some conditions, R_386_TLS_GOTIE relocation can be optimized to
// R_386_TLS_LE so that it does not use GOT.
// This function does that. Read "ELF Handling For Thread-Local Storage,
// 5.1 IA-32 Linker Optimizations" (http://www.akkadia.org/drepper/tls.pdf)
// by Ulrich Drepper for details.
void X86TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
uint64_t SA) const {
// Ulrich's document section 6.2 says that @gotntpoff can be
// used with MOVL or ADDL instructions.
// "MOVL foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVL $foo, %REG".
// "ADDL foo@GOTNTPOFF(%RIP), %REG" is transformed to "LEAL foo(%REG), %REG"
// Note: gold converts to ADDL instead of LEAL.
uint8_t *Inst = Loc - 2;
uint8_t *RegSlot = Loc - 1;
uint8_t Reg = (Loc[-1] >> 3) & 7;
bool IsMov = *Inst == 0x8b;
*Inst = IsMov ? 0xc7 : 0x8d;
if (IsMov)
*RegSlot = 0xc0 | ((*RegSlot >> 3) & 7);
else
*RegSlot = 0x80 | Reg | (Reg << 3);
relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA);
}
X86_64TargetInfo::X86_64TargetInfo() { X86_64TargetInfo::X86_64TargetInfo() {
CopyReloc = R_X86_64_COPY; CopyReloc = R_X86_64_COPY;
PCRelReloc = R_X86_64_PC32; PCRelReloc = R_X86_64_PC32;

View File

@ -0,0 +1,20 @@
.type tlsshared0,@object
.section .tbss,"awT",@nobits
.globl tlsshared0
.align 4
tlsshared0:
.long 0
.size tlsshared0, 4
.type tlsshared1,@object
.globl tlsshared1
.align 4
tlsshared1:
.long 0
.size tlsshared1, 4
.text
.globl __tls_get_addr
.align 16, 0x90
.type __tls_get_addr,@function
__tls_get_addr:

View File

@ -0,0 +1,59 @@
// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %p/Inputs/tls-opt-gdiele-i686.s -o %tso.o
// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o
// RUN: ld.lld -shared %tso.o -o %tso
// RUN: ld.lld %t.o %tso -o %tout
// RUN: llvm-readobj -r %tout | FileCheck --check-prefix=NORELOC %s
// RUN: llvm-objdump -d %tout | FileCheck --check-prefix=DISASM %s
// NORELOC: Relocations [
// NORELOC-NEXT: Section ({{.*}}) .rel.dyn {
// NORELOC-NEXT: 0x12050 R_386_TLS_TPOFF tlsshared0 0x0
// NORELOC-NEXT: 0x12054 R_386_TLS_TPOFF tlsshared1 0x0
// NORELOC-NEXT: }
// NORELOC-NEXT: ]
// DISASM: Disassembly of section .text:
// DISASM-NEXT: _start:
// DISASM-NEXT: 11000: 65 a1 00 00 00 00 movl %gs:0, %eax
// DISASM-NEXT: 11006: 03 83 f8 ff ff ff addl -8(%ebx), %eax
// DISASM-NEXT: 1100c: 65 a1 00 00 00 00 movl %gs:0, %eax
// DISASM-NEXT: 11012: 03 83 fc ff ff ff addl -4(%ebx), %eax
// DISASM-NEXT: 11018: 65 a1 00 00 00 00 movl %gs:0, %eax
// DISASM-NEXT: 1101e: 81 e8 08 00 00 00 subl $8, %eax
// DISASM-NEXT: 11024: 65 a1 00 00 00 00 movl %gs:0, %eax
// DISASM-NEXT: 1102a: 81 e8 04 00 00 00 subl $4, %eax
.type tlsexe1,@object
.section .tbss,"awT",@nobits
.globl tlsexe1
.align 4
tlsexe1:
.long 0
.size tlsexe1, 4
.type tlsexe2,@object
.section .tbss,"awT",@nobits
.globl tlsexe2
.align 4
tlsexe2:
.long 0
.size tlsexe2, 4
.section .text
.globl ___tls_get_addr
.type ___tls_get_addr,@function
___tls_get_addr:
.section .text
.globl _start
_start:
//GD->IE
leal tlsshared0@tlsgd(,%ebx,1),%eax
call ___tls_get_addr@plt
leal tlsshared1@tlsgd(,%ebx,1),%eax
call ___tls_get_addr@plt
//GD->IE
leal tlsexe1@tlsgd(,%ebx,1),%eax
call ___tls_get_addr@plt
leal tlsexe2@tlsgd(,%ebx,1),%eax
call ___tls_get_addr@plt

View File

@ -0,0 +1,69 @@
// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o
// RUN: ld.lld %t.o -o %t1
// RUN: llvm-readobj -r %t1 | FileCheck --check-prefix=NORELOC %s
// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s
// NORELOC: Relocations [
// NORELOC-NEXT: ]
// DISASM: Disassembly of section .text:
// DISASM-NEXT: _start:
// LD -> LE:
// DISASM-NEXT: 11000: 65 a1 00 00 00 00 movl %gs:0, %eax
// DISASM-NEXT: 11006: 90 nop
// DISASM-NEXT: 11007: 8d 74 26 00 leal (%esi), %esi
// DISASM-NEXT: 1100b: 8d 90 f8 ff ff ff leal -8(%eax), %edx
// DISASM-NEXT: 11011: 65 a1 00 00 00 00 movl %gs:0, %eax
// DISASM-NEXT: 11017: 90 nop
// DISASM-NEXT: 11018: 8d 74 26 00 leal (%esi), %esi
// DISASM-NEXT: 1101c: 8d 90 fc ff ff ff leal -4(%eax), %edx
// IE -> LE:
// 4294967288 == 0xFFFFFFF8
// 4294967292 == 0xFFFFFFFC
// DISASM-NEXT: 11022: 65 a1 00 00 00 00 movl %gs:0, %eax
// DISASM-NEXT: 11028: c7 c0 f8 ff ff ff movl $4294967288, %eax
// DISASM-NEXT: 1102e: 65 a1 00 00 00 00 movl %gs:0, %eax
// DISASM-NEXT: 11034: c7 c0 fc ff ff ff movl $4294967292, %eax
// DISASM-NEXT: 1103a: 65 a1 00 00 00 00 movl %gs:0, %eax
// DISASM-NEXT: 11040: 8d 80 f8 ff ff ff leal -8(%eax), %eax
// DISASM-NEXT: 11046: 65 a1 00 00 00 00 movl %gs:0, %eax
// DISASM-NEXT: 1104c: 8d 80 fc ff ff ff leal -4(%eax), %eax
.type tls0,@object
.section .tbss,"awT",@nobits
.globl tls0
.align 4
tls0:
.long 0
.size tls0, 4
.type tls1,@object
.globl tls1
.align 4
tls1:
.long 0
.size tls1, 4
.section .text
.globl ___tls_get_addr
.type ___tls_get_addr,@function
___tls_get_addr:
.section .text
.globl _start
_start:
//LD -> LE:
leal tls0@tlsldm(%ebx),%eax
call ___tls_get_addr@plt
leal tls0@dtpoff(%eax),%edx
leal tls1@tlsldm(%ebx),%eax
call ___tls_get_addr@plt
leal tls1@dtpoff(%eax),%edx
//IE -> LE:
movl %gs:0,%eax
movl tls0@gotntpoff(%ebx),%eax
movl %gs:0,%eax
movl tls1@gotntpoff(%ebx),%eax
movl %gs:0,%eax
addl tls0@gotntpoff(%ebx),%eax
movl %gs:0,%eax
addl tls1@gotntpoff(%ebx),%eax