forked from OSchip/llvm-project
[ELF] - Implement the TLS relocation optimization for 32-bit x86.
Implement the TLS relocation optimization for 32-bit x86 that is described in "ELF Handling For Thread-Local Storage" by Ulrich Drepper, chapter 5, "IA-32 Linker Optimizations". Specifically, this patch implements these optimizations: LD->LE, GD->IE, GD->LD, and IE->LE. Differential revision: http://reviews.llvm.org/D15292 llvm-svn: 255103
This commit is contained in:
parent
9938425b31
commit
2558e12bac
|
@ -88,6 +88,20 @@ public:
|
|||
bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override;
|
||||
void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
|
||||
uint64_t SA, uint8_t *PairedLoc = nullptr) const override;
|
||||
bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override;
|
||||
unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
|
||||
uint64_t P, uint64_t SA,
|
||||
const SymbolBody &S) const override;
|
||||
|
||||
private:
|
||||
void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const;
|
||||
void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const;
|
||||
void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const;
|
||||
void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const;
|
||||
};
|
||||
|
||||
class X86_64TargetInfo final : public TargetInfo {
|
||||
|
@ -260,7 +274,7 @@ bool X86TargetInfo::isTlsDynReloc(unsigned Type) const {
|
|||
if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 ||
|
||||
Type == R_386_TLS_GOTIE)
|
||||
return Config->Shared;
|
||||
return false;
|
||||
return Type == R_386_TLS_GD;
|
||||
}
|
||||
|
||||
void X86TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
|
||||
|
@ -311,8 +325,11 @@ bool X86TargetInfo::relocNeedsCopy(uint32_t Type, const SymbolBody &S) const {
|
|||
}
|
||||
|
||||
bool X86TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
|
||||
return Type == R_386_TLS_GOTIE || Type == R_386_GOT32 ||
|
||||
relocNeedsPlt(Type, S);
|
||||
if (S.isTLS() && Type == R_386_TLS_GD)
|
||||
return Target->isTlsOptimized(Type, &S) && canBePreempted(&S, true);
|
||||
if (Type == R_386_TLS_GOTIE)
|
||||
return !isTlsOptimized(Type, &S);
|
||||
return Type == R_386_GOT32 || relocNeedsPlt(Type, S);
|
||||
}
|
||||
|
||||
bool X86TargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const {
|
||||
|
@ -358,6 +375,121 @@ void X86TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
|
|||
}
|
||||
}
|
||||
|
||||
bool X86TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const {
|
||||
if (Config->Shared || (S && !S->isTLS()))
|
||||
return false;
|
||||
return Type == R_386_TLS_LDO_32 || Type == R_386_TLS_LDM ||
|
||||
Type == R_386_TLS_GD ||
|
||||
(Type == R_386_TLS_GOTIE && !canBePreempted(S, true));
|
||||
}
|
||||
|
||||
unsigned X86TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
|
||||
uint32_t Type, uint64_t P,
|
||||
uint64_t SA,
|
||||
const SymbolBody &S) const {
|
||||
switch (Type) {
|
||||
case R_386_TLS_GD:
|
||||
if (canBePreempted(&S, true))
|
||||
relocateTlsGdToIe(Loc, BufEnd, P, SA);
|
||||
else
|
||||
relocateTlsGdToLe(Loc, BufEnd, P, SA);
|
||||
// The next relocation should be against __tls_get_addr, so skip it
|
||||
return 1;
|
||||
case R_386_TLS_GOTIE:
|
||||
relocateTlsIeToLe(Loc, BufEnd, P, SA);
|
||||
return 0;
|
||||
case R_386_TLS_LDM:
|
||||
relocateTlsLdToLe(Loc, BufEnd, P, SA);
|
||||
// The next relocation should be against __tls_get_addr, so skip it
|
||||
return 1;
|
||||
case R_386_TLS_LDO_32:
|
||||
relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA);
|
||||
return 0;
|
||||
}
|
||||
llvm_unreachable("Unknown TLS optimization");
|
||||
}
|
||||
|
||||
// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.1
|
||||
// IA-32 Linker Optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
|
||||
// how GD can be optimized to IE:
|
||||
// leal x@tlsgd(, %ebx, 1),
|
||||
// call __tls_get_addr@plt
|
||||
// Is converted to:
|
||||
// movl %gs:0, %eax
|
||||
// addl x@gotntpoff(%ebx), %eax
|
||||
void X86TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const {
|
||||
const uint8_t Inst[] = {
|
||||
0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
|
||||
0x03, 0x83, 0x00, 0x00, 0x00, 0x00 // addl 0(%ebx), %eax
|
||||
};
|
||||
memcpy(Loc - 3, Inst, sizeof(Inst));
|
||||
relocateOne(Loc + 5, BufEnd, R_386_32, P,
|
||||
SA - Out<ELF32LE>::Got->getVA() -
|
||||
Out<ELF32LE>::Got->getNumEntries() * 4);
|
||||
}
|
||||
|
||||
// GD can be optimized to LE:
|
||||
// leal x@tlsgd(, %ebx, 1),
|
||||
// call __tls_get_addr@plt
|
||||
// Can be converted to:
|
||||
// movl %gs:0,%eax
|
||||
// addl $x@ntpoff,%eax
|
||||
// But gold emits subl $foo@tpoff,%eax instead of addl.
|
||||
// These instructions are completely equal in behavior.
|
||||
// This method generates subl to be consistent with gold.
|
||||
void X86TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const {
|
||||
const uint8_t Inst[] = {
|
||||
0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
|
||||
0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 // subl 0(%ebx), %eax
|
||||
};
|
||||
memcpy(Loc - 3, Inst, sizeof(Inst));
|
||||
relocateOne(Loc + 5, BufEnd, R_386_32, P,
|
||||
Out<ELF32LE>::TlsPhdr->p_memsz - SA);
|
||||
}
|
||||
|
||||
// LD can be optimized to LE:
|
||||
// leal foo(%reg),%eax
|
||||
// call ___tls_get_addr
|
||||
// Is converted to:
|
||||
// movl %gs:0,%eax
|
||||
// nop
|
||||
// leal 0(%esi,1),%esi
|
||||
void X86TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const {
|
||||
const uint8_t Inst[] = {
|
||||
0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
|
||||
0x90, // nop
|
||||
0x8d, 0x74, 0x26, 0x00 // leal 0(%esi,1),%esi
|
||||
};
|
||||
memcpy(Loc - 2, Inst, sizeof(Inst));
|
||||
}
|
||||
|
||||
// In some conditions, R_386_TLS_GOTIE relocation can be optimized to
|
||||
// R_386_TLS_LE so that it does not use GOT.
|
||||
// This function does that. Read "ELF Handling For Thread-Local Storage,
|
||||
// 5.1 IA-32 Linker Optimizations" (http://www.akkadia.org/drepper/tls.pdf)
|
||||
// by Ulrich Drepper for details.
|
||||
void X86TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
|
||||
uint64_t SA) const {
|
||||
// Ulrich's document section 6.2 says that @gotntpoff can be
|
||||
// used with MOVL or ADDL instructions.
|
||||
// "MOVL foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVL $foo, %REG".
|
||||
// "ADDL foo@GOTNTPOFF(%RIP), %REG" is transformed to "LEAL foo(%REG), %REG"
|
||||
// Note: gold converts to ADDL instead of LEAL.
|
||||
uint8_t *Inst = Loc - 2;
|
||||
uint8_t *RegSlot = Loc - 1;
|
||||
uint8_t Reg = (Loc[-1] >> 3) & 7;
|
||||
bool IsMov = *Inst == 0x8b;
|
||||
*Inst = IsMov ? 0xc7 : 0x8d;
|
||||
if (IsMov)
|
||||
*RegSlot = 0xc0 | ((*RegSlot >> 3) & 7);
|
||||
else
|
||||
*RegSlot = 0x80 | Reg | (Reg << 3);
|
||||
relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA);
|
||||
}
|
||||
|
||||
X86_64TargetInfo::X86_64TargetInfo() {
|
||||
CopyReloc = R_X86_64_COPY;
|
||||
PCRelReloc = R_X86_64_PC32;
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
.type tlsshared0,@object
|
||||
.section .tbss,"awT",@nobits
|
||||
.globl tlsshared0
|
||||
.align 4
|
||||
tlsshared0:
|
||||
.long 0
|
||||
.size tlsshared0, 4
|
||||
|
||||
.type tlsshared1,@object
|
||||
.globl tlsshared1
|
||||
.align 4
|
||||
tlsshared1:
|
||||
.long 0
|
||||
.size tlsshared1, 4
|
||||
|
||||
.text
|
||||
.globl __tls_get_addr
|
||||
.align 16, 0x90
|
||||
.type __tls_get_addr,@function
|
||||
__tls_get_addr:
|
|
@ -0,0 +1,59 @@
|
|||
// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %p/Inputs/tls-opt-gdiele-i686.s -o %tso.o
|
||||
// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o
|
||||
// RUN: ld.lld -shared %tso.o -o %tso
|
||||
// RUN: ld.lld %t.o %tso -o %tout
|
||||
// RUN: llvm-readobj -r %tout | FileCheck --check-prefix=NORELOC %s
|
||||
// RUN: llvm-objdump -d %tout | FileCheck --check-prefix=DISASM %s
|
||||
|
||||
// NORELOC: Relocations [
|
||||
// NORELOC-NEXT: Section ({{.*}}) .rel.dyn {
|
||||
// NORELOC-NEXT: 0x12050 R_386_TLS_TPOFF tlsshared0 0x0
|
||||
// NORELOC-NEXT: 0x12054 R_386_TLS_TPOFF tlsshared1 0x0
|
||||
// NORELOC-NEXT: }
|
||||
// NORELOC-NEXT: ]
|
||||
|
||||
// DISASM: Disassembly of section .text:
|
||||
// DISASM-NEXT: _start:
|
||||
// DISASM-NEXT: 11000: 65 a1 00 00 00 00 movl %gs:0, %eax
|
||||
// DISASM-NEXT: 11006: 03 83 f8 ff ff ff addl -8(%ebx), %eax
|
||||
// DISASM-NEXT: 1100c: 65 a1 00 00 00 00 movl %gs:0, %eax
|
||||
// DISASM-NEXT: 11012: 03 83 fc ff ff ff addl -4(%ebx), %eax
|
||||
// DISASM-NEXT: 11018: 65 a1 00 00 00 00 movl %gs:0, %eax
|
||||
// DISASM-NEXT: 1101e: 81 e8 08 00 00 00 subl $8, %eax
|
||||
// DISASM-NEXT: 11024: 65 a1 00 00 00 00 movl %gs:0, %eax
|
||||
// DISASM-NEXT: 1102a: 81 e8 04 00 00 00 subl $4, %eax
|
||||
|
||||
.type tlsexe1,@object
|
||||
.section .tbss,"awT",@nobits
|
||||
.globl tlsexe1
|
||||
.align 4
|
||||
tlsexe1:
|
||||
.long 0
|
||||
.size tlsexe1, 4
|
||||
|
||||
.type tlsexe2,@object
|
||||
.section .tbss,"awT",@nobits
|
||||
.globl tlsexe2
|
||||
.align 4
|
||||
tlsexe2:
|
||||
.long 0
|
||||
.size tlsexe2, 4
|
||||
|
||||
.section .text
|
||||
.globl ___tls_get_addr
|
||||
.type ___tls_get_addr,@function
|
||||
___tls_get_addr:
|
||||
|
||||
.section .text
|
||||
.globl _start
|
||||
_start:
|
||||
//GD->IE
|
||||
leal tlsshared0@tlsgd(,%ebx,1),%eax
|
||||
call ___tls_get_addr@plt
|
||||
leal tlsshared1@tlsgd(,%ebx,1),%eax
|
||||
call ___tls_get_addr@plt
|
||||
//GD->IE
|
||||
leal tlsexe1@tlsgd(,%ebx,1),%eax
|
||||
call ___tls_get_addr@plt
|
||||
leal tlsexe2@tlsgd(,%ebx,1),%eax
|
||||
call ___tls_get_addr@plt
|
|
@ -0,0 +1,69 @@
|
|||
// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o
|
||||
// RUN: ld.lld %t.o -o %t1
|
||||
// RUN: llvm-readobj -r %t1 | FileCheck --check-prefix=NORELOC %s
|
||||
// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s
|
||||
|
||||
// NORELOC: Relocations [
|
||||
// NORELOC-NEXT: ]
|
||||
|
||||
// DISASM: Disassembly of section .text:
|
||||
// DISASM-NEXT: _start:
|
||||
// LD -> LE:
|
||||
// DISASM-NEXT: 11000: 65 a1 00 00 00 00 movl %gs:0, %eax
|
||||
// DISASM-NEXT: 11006: 90 nop
|
||||
// DISASM-NEXT: 11007: 8d 74 26 00 leal (%esi), %esi
|
||||
// DISASM-NEXT: 1100b: 8d 90 f8 ff ff ff leal -8(%eax), %edx
|
||||
// DISASM-NEXT: 11011: 65 a1 00 00 00 00 movl %gs:0, %eax
|
||||
// DISASM-NEXT: 11017: 90 nop
|
||||
// DISASM-NEXT: 11018: 8d 74 26 00 leal (%esi), %esi
|
||||
// DISASM-NEXT: 1101c: 8d 90 fc ff ff ff leal -4(%eax), %edx
|
||||
// IE -> LE:
|
||||
// 4294967288 == 0xFFFFFFF8
|
||||
// 4294967292 == 0xFFFFFFFC
|
||||
// DISASM-NEXT: 11022: 65 a1 00 00 00 00 movl %gs:0, %eax
|
||||
// DISASM-NEXT: 11028: c7 c0 f8 ff ff ff movl $4294967288, %eax
|
||||
// DISASM-NEXT: 1102e: 65 a1 00 00 00 00 movl %gs:0, %eax
|
||||
// DISASM-NEXT: 11034: c7 c0 fc ff ff ff movl $4294967292, %eax
|
||||
// DISASM-NEXT: 1103a: 65 a1 00 00 00 00 movl %gs:0, %eax
|
||||
// DISASM-NEXT: 11040: 8d 80 f8 ff ff ff leal -8(%eax), %eax
|
||||
// DISASM-NEXT: 11046: 65 a1 00 00 00 00 movl %gs:0, %eax
|
||||
// DISASM-NEXT: 1104c: 8d 80 fc ff ff ff leal -4(%eax), %eax
|
||||
.type tls0,@object
|
||||
.section .tbss,"awT",@nobits
|
||||
.globl tls0
|
||||
.align 4
|
||||
tls0:
|
||||
.long 0
|
||||
.size tls0, 4
|
||||
|
||||
.type tls1,@object
|
||||
.globl tls1
|
||||
.align 4
|
||||
tls1:
|
||||
.long 0
|
||||
.size tls1, 4
|
||||
|
||||
.section .text
|
||||
.globl ___tls_get_addr
|
||||
.type ___tls_get_addr,@function
|
||||
___tls_get_addr:
|
||||
|
||||
.section .text
|
||||
.globl _start
|
||||
_start:
|
||||
//LD -> LE:
|
||||
leal tls0@tlsldm(%ebx),%eax
|
||||
call ___tls_get_addr@plt
|
||||
leal tls0@dtpoff(%eax),%edx
|
||||
leal tls1@tlsldm(%ebx),%eax
|
||||
call ___tls_get_addr@plt
|
||||
leal tls1@dtpoff(%eax),%edx
|
||||
//IE -> LE:
|
||||
movl %gs:0,%eax
|
||||
movl tls0@gotntpoff(%ebx),%eax
|
||||
movl %gs:0,%eax
|
||||
movl tls1@gotntpoff(%ebx),%eax
|
||||
movl %gs:0,%eax
|
||||
addl tls0@gotntpoff(%ebx),%eax
|
||||
movl %gs:0,%eax
|
||||
addl tls1@gotntpoff(%ebx),%eax
|
Loading…
Reference in New Issue