forked from OSchip/llvm-project
[ARM][AArch64] Increase TLS alignment to reserve space for Android's TCB
ARM and AArch64 use TLS variant 1, where the first two words after the thread pointer are reserved for the TCB, followed by the executable's TLS segment. Both the thread pointer and the TLS segment are aligned to at least the TLS segment's alignment. Android/Bionic historically has not supported ELF TLS, and it has allocated memory after the thread pointer for several Bionic TLS slots (currently 9 but soon only 8). At least one of these allocations (TLS_SLOT_STACK_GUARD == 5) is widespread throughout Android/AArch64 binaries and can't be changed. To reconcile this disagreement about TLS memory layout, set the minimum alignment for executable TLS segments to 8 words on ARM/AArch64, which reserves at least 8 words of memory after the TP (2 for the ABI-specified TCB and 6 for alignment padding). For simplicity, and because lld doesn't know when it's targeting Android, increase the alignment regardless of operating system. Differential Revision: https://reviews.llvm.org/D53906 llvm-svn: 350681
This commit is contained in:
parent
a19cb2eb6f
commit
d7d2369c09
|
@ -575,6 +575,10 @@ static int64_t getTlsTpOffset() {
|
||||||
// Variant 1. The thread pointer points to a TCB with a fixed 2-word size,
|
// Variant 1. The thread pointer points to a TCB with a fixed 2-word size,
|
||||||
// followed by a variable amount of alignment padding, followed by the TLS
|
// followed by a variable amount of alignment padding, followed by the TLS
|
||||||
// segment.
|
// segment.
|
||||||
|
//
|
||||||
|
// NB: While the ARM/AArch64 ABI formally has a 2-word TCB size, lld
|
||||||
|
// effectively increases the TCB size to 8 words for Android compatibility.
|
||||||
|
// It accomplishes this by increasing the segment's alignment.
|
||||||
return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align);
|
return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align);
|
||||||
case EM_386:
|
case EM_386:
|
||||||
case EM_X86_64:
|
case EM_X86_64:
|
||||||
|
|
|
@ -2181,12 +2181,24 @@ template <class ELFT> void Writer<ELFT>::setPhdrs() {
|
||||||
P->p_memsz = alignTo(P->p_memsz, Target->PageSize);
|
P->p_memsz = alignTo(P->p_memsz, Target->PageSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (P->p_type == PT_TLS && P->p_memsz) {
|
||||||
|
if (!Config->Shared &&
|
||||||
|
(Config->EMachine == EM_ARM || Config->EMachine == EM_AARCH64)) {
|
||||||
|
// On ARM/AArch64, reserve extra space (8 words) between the thread
|
||||||
|
// pointer and an executable's TLS segment by overaligning the segment.
|
||||||
|
// This reservation is needed for backwards compatibility with Android's
|
||||||
|
// TCB, which allocates several slots after the thread pointer (e.g.
|
||||||
|
// TLS_SLOT_STACK_GUARD==5). For simplicity, this overalignment is also
|
||||||
|
// done on other operating systems.
|
||||||
|
P->p_align = std::max<uint64_t>(P->p_align, Config->Wordsize * 8);
|
||||||
|
}
|
||||||
|
|
||||||
// The TLS pointer goes after PT_TLS for variant 2 targets. At least glibc
|
// The TLS pointer goes after PT_TLS for variant 2 targets. At least glibc
|
||||||
// will align it, so round up the size to make sure the offsets are
|
// will align it, so round up the size to make sure the offsets are
|
||||||
// correct.
|
// correct.
|
||||||
if (P->p_type == PT_TLS && P->p_memsz)
|
|
||||||
P->p_memsz = alignTo(P->p_memsz, P->p_align);
|
P->p_memsz = alignTo(P->p_memsz, P->p_align);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// A helper struct for checkSectionOverlap.
|
// A helper struct for checkSectionOverlap.
|
||||||
|
|
|
@ -26,9 +26,9 @@ _start:
|
||||||
// CHECK: _start:
|
// CHECK: _start:
|
||||||
// CHECK-NEXT: 210ff8: 41 d0 3b d5 mrs x1, TPIDR_EL0
|
// CHECK-NEXT: 210ff8: 41 d0 3b d5 mrs x1, TPIDR_EL0
|
||||||
// CHECK-NEXT: 210ffc: 00 00 a0 d2 movz x0, #0, lsl #16
|
// CHECK-NEXT: 210ffc: 00 00 a0 d2 movz x0, #0, lsl #16
|
||||||
// CHECK-NEXT: 211000: 01 02 80 f2 movk x1, #16
|
// CHECK-NEXT: 211000: 01 08 80 f2 movk x1, #64
|
||||||
// CHECK-NEXT: 211004: 00 00 a0 d2 movz x0, #0, lsl #16
|
// CHECK-NEXT: 211004: 00 00 a0 d2 movz x0, #0, lsl #16
|
||||||
// CHECK-NEXT: 211008: 01 02 80 f2 movk x1, #16
|
// CHECK-NEXT: 211008: 01 08 80 f2 movk x1, #64
|
||||||
// CHECK-NEXT: 21100c: c0 03 5f d6 ret
|
// CHECK-NEXT: 21100c: c0 03 5f d6 ret
|
||||||
|
|
||||||
.type v,@object
|
.type v,@object
|
||||||
|
|
|
@ -5,15 +5,15 @@
|
||||||
# RUN: llvm-objdump -d %tout | FileCheck %s
|
# RUN: llvm-objdump -d %tout | FileCheck %s
|
||||||
# RUN: llvm-readobj -s -r %tout | FileCheck -check-prefix=RELOC %s
|
# RUN: llvm-readobj -s -r %tout | FileCheck -check-prefix=RELOC %s
|
||||||
|
|
||||||
#Local-Dynamic to Initial-Exec relax creates no
|
#Local-Dynamic to Local-Exec relax creates no
|
||||||
#RELOC: Relocations [
|
#RELOC: Relocations [
|
||||||
#RELOC-NEXT: ]
|
#RELOC-NEXT: ]
|
||||||
|
|
||||||
# TCB size = 0x16 and foo is first element from TLS register.
|
# TCB size = 64 and foo is first element from TLS register.
|
||||||
# CHECK: Disassembly of section .text:
|
# CHECK: Disassembly of section .text:
|
||||||
# CHECK: _start:
|
# CHECK: _start:
|
||||||
# CHECK: 210000: 00 00 a0 d2 movz x0, #0, lsl #16
|
# CHECK: 210000: 00 00 a0 d2 movz x0, #0, lsl #16
|
||||||
# CHECK: 210004: 00 02 80 f2 movk x0, #16
|
# CHECK: 210004: 00 08 80 f2 movk x0, #64
|
||||||
# CHECK: 210008: 1f 20 03 d5 nop
|
# CHECK: 210008: 1f 20 03 d5 nop
|
||||||
# CHECK: 21000c: 1f 20 03 d5 nop
|
# CHECK: 21000c: 1f 20 03 d5 nop
|
||||||
|
|
||||||
|
|
|
@ -9,13 +9,13 @@
|
||||||
# RELOC: Relocations [
|
# RELOC: Relocations [
|
||||||
# RELOC-NEXT: ]
|
# RELOC-NEXT: ]
|
||||||
|
|
||||||
# TCB size = 0x16 and foo is first element from TLS register.
|
# TCB size = 64 and foo is first element from TLS register.
|
||||||
# CHECK: Disassembly of section .text:
|
# CHECK: Disassembly of section .text:
|
||||||
# CHECK: _start:
|
# CHECK: _start:
|
||||||
# CHECK-NEXT: 210000: 00 00 a0 d2 movz x0, #0, lsl #16
|
# CHECK-NEXT: 210000: 00 00 a0 d2 movz x0, #0, lsl #16
|
||||||
# CHECK-NEXT: 210004: 80 02 80 f2 movk x0, #20
|
# CHECK-NEXT: 210004: 80 08 80 f2 movk x0, #68
|
||||||
# CHECK-NEXT: 210008: 00 00 a0 d2 movz x0, #0, lsl #16
|
# CHECK-NEXT: 210008: 00 00 a0 d2 movz x0, #0, lsl #16
|
||||||
# CHECK-NEXT: 21000c: 00 02 80 f2 movk x0, #16
|
# CHECK-NEXT: 21000c: 00 08 80 f2 movk x0, #64
|
||||||
|
|
||||||
.section .tdata
|
.section .tdata
|
||||||
.align 2
|
.align 2
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# RUN: llvm-objdump -d %tout | FileCheck %s
|
# RUN: llvm-objdump -d %tout | FileCheck %s
|
||||||
# RUN: llvm-readobj -s -r %tout | FileCheck -check-prefix=RELOC %s
|
# RUN: llvm-readobj -s -r %tout | FileCheck -check-prefix=RELOC %s
|
||||||
|
|
||||||
#Local-Dynamic to Initial-Exec relax creates no
|
#Local-Dynamic to Local-Exec relax creates no
|
||||||
#RELOC: Relocations [
|
#RELOC: Relocations [
|
||||||
#RELOC-NEXT: ]
|
#RELOC-NEXT: ]
|
||||||
|
|
||||||
|
@ -17,12 +17,12 @@ _start:
|
||||||
add x0, x0, :tprel_hi12:v2
|
add x0, x0, :tprel_hi12:v2
|
||||||
add x0, x0, :tprel_lo12_nc:v2
|
add x0, x0, :tprel_lo12_nc:v2
|
||||||
|
|
||||||
# TCB size = 0x16 and foo is first element from TLS register.
|
# TCB size = 64 and foo is first element from TLS register.
|
||||||
#CHECK: Disassembly of section .text:
|
#CHECK: Disassembly of section .text:
|
||||||
#CHECK: _start:
|
#CHECK: _start:
|
||||||
#CHECK: 210000: 40 d0 3b d5 mrs x0, TPIDR_EL0
|
#CHECK: 210000: 40 d0 3b d5 mrs x0, TPIDR_EL0
|
||||||
#CHECK: 210004: 00 00 40 91 add x0, x0, #0, lsl #12
|
#CHECK: 210004: 00 00 40 91 add x0, x0, #0, lsl #12
|
||||||
#CHECK: 210008: 00 40 00 91 add x0, x0, #16
|
#CHECK: 210008: 00 00 01 91 add x0, x0, #64
|
||||||
#CHECK: 21000c: 40 d0 3b d5 mrs x0, TPIDR_EL0
|
#CHECK: 21000c: 40 d0 3b d5 mrs x0, TPIDR_EL0
|
||||||
#CHECK: 210010: 00 fc 7f 91 add x0, x0, #4095, lsl #12
|
#CHECK: 210010: 00 fc 7f 91 add x0, x0, #4095, lsl #12
|
||||||
#CHECK: 210014: 00 e0 3f 91 add x0, x0, #4088
|
#CHECK: 210014: 00 e0 3f 91 add x0, x0, #4088
|
||||||
|
@ -36,9 +36,9 @@ v1:
|
||||||
.word 0
|
.word 0
|
||||||
.size v1, 4
|
.size v1, 4
|
||||||
|
|
||||||
# The current offset from the thread pointer is 20. Raise it to just below the
|
# The current offset from the thread pointer is 68. Raise it to just below the
|
||||||
# 24-bit limit.
|
# 24-bit limit.
|
||||||
.space (0xfffff8 - 20)
|
.space (0xfffff8 - 68)
|
||||||
|
|
||||||
.type v2,@object
|
.type v2,@object
|
||||||
.globl v2
|
.globl v2
|
||||||
|
|
|
@ -26,27 +26,27 @@ _start: mrs x8, TPIDR_EL0
|
||||||
|
|
||||||
// CHECK: _start:
|
// CHECK: _start:
|
||||||
// CHECK-NEXT: 210000: 48 d0 3b d5 mrs x8, TPIDR_EL0
|
// CHECK-NEXT: 210000: 48 d0 3b d5 mrs x8, TPIDR_EL0
|
||||||
// 0x0 + c10 = 0xc10 = tcb (16-bytes) + var0
|
// 0x0 + c40 = 0xc40 = tcb (64-bytes) + var0
|
||||||
// CHECK-NEXT: 210004: 08 01 40 91 add x8, x8, #0, lsl #12
|
// CHECK-NEXT: 210004: 08 01 40 91 add x8, x8, #0, lsl #12
|
||||||
// CHECK-NEXT: 210008: 14 05 c3 3d ldr q20, [x8, #3088]
|
// CHECK-NEXT: 210008: 14 11 c3 3d ldr q20, [x8, #3136]
|
||||||
// 0x1000 + 0x820 = 0x1820 = tcb + var1
|
// 0x1000 + 0x850 = 0x1850 = tcb + var1
|
||||||
// CHECK-NEXT: 21000c: 08 05 40 91 add x8, x8, #1, lsl #12
|
// CHECK-NEXT: 21000c: 08 05 40 91 add x8, x8, #1, lsl #12
|
||||||
// CHECK-NEXT: 210010: 00 11 44 f9 ldr x0, [x8, #2080]
|
// CHECK-NEXT: 210010: 00 29 44 f9 ldr x0, [x8, #2128]
|
||||||
// 0x2000 + 0x428 = 0x2428 = tcb + var2
|
// 0x2000 + 0x458 = 0x2458 = tcb + var2
|
||||||
// CHECK-NEXT: 210014: 08 09 40 91 add x8, x8, #2, lsl #12
|
// CHECK-NEXT: 210014: 08 09 40 91 add x8, x8, #2, lsl #12
|
||||||
// CHECK-NEXT: 210018: 00 29 44 b9 ldr w0, [x8, #1064]
|
// CHECK-NEXT: 210018: 00 59 44 b9 ldr w0, [x8, #1112]
|
||||||
// 0x3000 + 0x2c = 0x302c = tcb + var3
|
// 0x3000 + 0x5c = 0x305c = tcb + var3
|
||||||
// CHECK-NEXT: 21001c: 08 0d 40 91 add x8, x8, #3, lsl #12
|
// CHECK-NEXT: 21001c: 08 0d 40 91 add x8, x8, #3, lsl #12
|
||||||
// CHECK-NEXT: 210020: 00 59 40 79 ldrh w0, [x8, #44]
|
// CHECK-NEXT: 210020: 00 b9 40 79 ldrh w0, [x8, #92]
|
||||||
// 0x3000 + 0xc2e = 0x32ce = tcb + var4
|
// 0x3000 + 0xc5e = 0x3c5e = tcb + var4
|
||||||
// CHECK-NEXT: 210024: 08 0d 40 91 add x8, x8, #3, lsl #12
|
// CHECK-NEXT: 210024: 08 0d 40 91 add x8, x8, #3, lsl #12
|
||||||
// CHECK-NEXT: 210028: 00 b9 70 39 ldrb w0, [x8, #3118]
|
// CHECK-NEXT: 210028: 00 79 71 39 ldrb w0, [x8, #3166]
|
||||||
|
|
||||||
// CHECK-SYMS: 0000000000000c00 0 TLS GLOBAL DEFAULT 2 var0
|
// CHECK-SYMS: 0000000000000c00 16 TLS GLOBAL DEFAULT 2 var0
|
||||||
// CHECK-SYMS-NEXT: 0000000000001810 4 TLS GLOBAL DEFAULT 2 var1
|
// CHECK-SYMS-NEXT: 0000000000001810 8 TLS GLOBAL DEFAULT 2 var1
|
||||||
// CHECK-SYMS-NEXT: 0000000000002418 2 TLS GLOBAL DEFAULT 2 var2
|
// CHECK-SYMS-NEXT: 0000000000002418 4 TLS GLOBAL DEFAULT 2 var2
|
||||||
// CHECK-SYMS-NEXT: 000000000000301c 1 TLS GLOBAL DEFAULT 2 var3
|
// CHECK-SYMS-NEXT: 000000000000301c 2 TLS GLOBAL DEFAULT 2 var3
|
||||||
// CHECK-SYMS-NEXT: 0000000000003c1e 0 TLS GLOBAL DEFAULT 2 var4
|
// CHECK-SYMS-NEXT: 0000000000003c1e 1 TLS GLOBAL DEFAULT 2 var4
|
||||||
|
|
||||||
.globl var0
|
.globl var0
|
||||||
.globl var1
|
.globl var1
|
||||||
|
@ -59,12 +59,12 @@ _start: mrs x8, TPIDR_EL0
|
||||||
.type var3,@object
|
.type var3,@object
|
||||||
|
|
||||||
.section .tbss,"awT",@nobits
|
.section .tbss,"awT",@nobits
|
||||||
.balign 16
|
.balign 64
|
||||||
.space 1024 * 3
|
.space 1024 * 3
|
||||||
var0:
|
var0:
|
||||||
.quad 0
|
.quad 0
|
||||||
.quad 0
|
.quad 0
|
||||||
.size var1, 16
|
.size var0, 16
|
||||||
.space 1024 * 3
|
.space 1024 * 3
|
||||||
var1:
|
var1:
|
||||||
.quad 0
|
.quad 0
|
||||||
|
@ -72,14 +72,14 @@ var1:
|
||||||
.space 1024 * 3
|
.space 1024 * 3
|
||||||
var2:
|
var2:
|
||||||
.word 0
|
.word 0
|
||||||
.size var1, 4
|
.size var2, 4
|
||||||
|
|
||||||
.space 1024 * 3
|
.space 1024 * 3
|
||||||
var3:
|
var3:
|
||||||
.hword 0
|
.hword 0
|
||||||
.size var2, 2
|
.size var3, 2
|
||||||
.space 1024 * 3
|
.space 1024 * 3
|
||||||
var4:
|
var4:
|
||||||
.byte 0
|
.byte 0
|
||||||
.size var3, 1
|
.size var4, 1
|
||||||
.space 1024 * 3
|
.space 1024 * 3
|
||||||
|
|
|
@ -69,9 +69,9 @@ x:
|
||||||
|
|
||||||
// CHECK: Disassembly of section .text:
|
// CHECK: Disassembly of section .text:
|
||||||
// CHECK-NEXT: _start:
|
// CHECK-NEXT: _start:
|
||||||
// offset of x from Thread pointer = (TcbSize + 0x0 = 0x8)
|
// offset of x from Thread pointer = (TcbSize + 0x0 = 0x20)
|
||||||
// CHECK-NEXT: 11000: 08 00 00 00
|
// CHECK-NEXT: 11000: 20 00 00 00
|
||||||
// offset of z from Thread pointer = (TcbSize + 0x8 = 0x10)
|
// offset of z from Thread pointer = (TcbSize + 0x8 = 0x28)
|
||||||
// CHECK-NEXT: 11004: 10 00 00 00
|
// CHECK-NEXT: 11004: 28 00 00 00
|
||||||
// offset of y from Thread pointer = (TcbSize + 0x4 = 0xc)
|
// offset of y from Thread pointer = (TcbSize + 0x4 = 0x24)
|
||||||
// CHECK-NEXT: 11008: 0c 00 00 00
|
// CHECK-NEXT: 11008: 24 00 00 00
|
||||||
|
|
|
@ -37,5 +37,5 @@ x2:
|
||||||
.type x2, %object
|
.type x2, %object
|
||||||
|
|
||||||
// CHECK: Contents of section .got:
|
// CHECK: Contents of section .got:
|
||||||
// x1 at offset 8 from TP, x2 at offset c from TP. Offsets include TCB size of 8
|
// x1 at offset 0x20 from TP, x2 at offset 0x24 from TP. Offsets include TCB size of 0x20
|
||||||
// CHECK-NEXT: 13064 08000000 0c000000
|
// CHECK-NEXT: 13064 20000000 24000000
|
||||||
|
|
Loading…
Reference in New Issue