[ARM][AArch64] Revert Android Bionic PT_TLS overaligning hack

This reverts D53906.

D53906 increased p_align of PT_TLS on ARM/AArch64 to 32/64 to make the
static TLS layout compatible with Android Bionic's ELF TLS. However,
this may cause glibc ARM/AArch64 programs to crash (see PR41527).

The faulty PT_TLS in the executable satisfies p_vaddr%p_align != 0. The
remainder is normally 0 but may be non-zero with the hack in place. The
problem is that we increase PT_TLS's p_align after OutputSections'
addresses are fixed (assignAddress()). It is possible that
p_vaddr%old_p_align = 0 while p_vaddr%new_p_align != 0.

For a thread local variable defined in the executable, lld computed TLS
offset (local exec) is different from glibc computed TLS offset from
another module (initial exec/generic dynamic). Note: PR41527 said the
bug affects initial exec but actually generic dynamic is affected as
well.

(glibc is correct in that it compute offsets that satisfy
`offset%p_align == p_vaddr%p_align`, which is a basic ELF requirement.
This hack appears to work on FreeBSD rtld, musl<=1.1.22, and Bionic, but
that is just because they (and lld) incorrectly compute offsets that
satisfy `offset%p_align = 0` instead.)

Android developers are fine to revert this patch, carry this patch in
their tree before figuring out a long-term solution (e.g. a dummy .tdata
with sh_addralign=64 sh_size={0,1} in crtbegin*.o files. The overhead is
now insignificant after D62059).

Reviewed By: rprichard, srhines

Differential Revision: https://reviews.llvm.org/D62055

llvm-svn: 361090
This commit is contained in:
Fangrui Song 2019-05-18 03:16:00 +00:00
parent 5f36a28556
commit ed2ad77ccb
9 changed files with 44 additions and 61 deletions

View File

@ -590,10 +590,6 @@ static int64_t getTlsTpOffset() {
// Variant 1. The thread pointer points to a TCB with a fixed 2-word size, // Variant 1. The thread pointer points to a TCB with a fixed 2-word size,
// followed by a variable amount of alignment padding, followed by the TLS // followed by a variable amount of alignment padding, followed by the TLS
// segment. // segment.
//
// NB: While the ARM/AArch64 ABI formally has a 2-word TCB size, lld
// effectively increases the TCB size to 8 words for Android compatibility.
// It accomplishes this by increasing the segment's alignment.
return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align); return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align);
case EM_386: case EM_386:
case EM_X86_64: case EM_X86_64:

View File

@ -2190,19 +2190,6 @@ template <class ELFT> void Writer<ELFT>::setPhdrs() {
// rounds up. // rounds up.
P->p_memsz = alignTo(P->p_memsz, Config->CommonPageSize); P->p_memsz = alignTo(P->p_memsz, Config->CommonPageSize);
} }
if (P->p_type == PT_TLS && P->p_memsz) {
if (!Config->Shared &&
(Config->EMachine == EM_ARM || Config->EMachine == EM_AARCH64)) {
// On ARM/AArch64, reserve extra space (8 words) between the thread
// pointer and an executable's TLS segment by overaligning the segment.
// This reservation is needed for backwards compatibility with Android's
// TCB, which allocates several slots after the thread pointer (e.g.
// TLS_SLOT_STACK_GUARD==5). For simplicity, this overalignment is also
// done on other operating systems.
P->p_align = std::max<uint64_t>(P->p_align, Config->Wordsize * 8);
}
}
} }
} }

View File

@ -26,9 +26,9 @@ _start:
// CHECK: _start: // CHECK: _start:
// CHECK-NEXT: 210ff8: 41 d0 3b d5 mrs x1, TPIDR_EL0 // CHECK-NEXT: 210ff8: 41 d0 3b d5 mrs x1, TPIDR_EL0
// CHECK-NEXT: 210ffc: 00 00 a0 d2 movz x0, #0, lsl #16 // CHECK-NEXT: 210ffc: 00 00 a0 d2 movz x0, #0, lsl #16
// CHECK-NEXT: 211000: 01 08 80 f2 movk x1, #64 // CHECK-NEXT: 211000: 01 02 80 f2 movk x1, #16
// CHECK-NEXT: 211004: 00 00 a0 d2 movz x0, #0, lsl #16 // CHECK-NEXT: 211004: 00 00 a0 d2 movz x0, #0, lsl #16
// CHECK-NEXT: 211008: 01 08 80 f2 movk x1, #64 // CHECK-NEXT: 211008: 01 02 80 f2 movk x1, #16
// CHECK-NEXT: 21100c: c0 03 5f d6 ret // CHECK-NEXT: 21100c: c0 03 5f d6 ret
.type v,@object .type v,@object

View File

@ -9,12 +9,12 @@
#RELOC: Relocations [ #RELOC: Relocations [
#RELOC-NEXT: ] #RELOC-NEXT: ]
# TCB size = 64 and foo is first element from TLS register. # TCB size = 0x16 and foo is first element from TLS register.
# CHECK: Disassembly of section .text: # CHECK: Disassembly of section .text:
# CHECK-EMPTY: # CHECK-EMPTY:
# CHECK: _start: # CHECK: _start:
# CHECK: 210000: 00 00 a0 d2 movz x0, #0, lsl #16 # CHECK: 210000: 00 00 a0 d2 movz x0, #0, lsl #16
# CHECK: 210004: 00 08 80 f2 movk x0, #64 # CHECK: 210004: 00 02 80 f2 movk x0, #16
# CHECK: 210008: 1f 20 03 d5 nop # CHECK: 210008: 1f 20 03 d5 nop
# CHECK: 21000c: 1f 20 03 d5 nop # CHECK: 21000c: 1f 20 03 d5 nop

View File

@ -9,14 +9,14 @@
# RELOC: Relocations [ # RELOC: Relocations [
# RELOC-NEXT: ] # RELOC-NEXT: ]
# TCB size = 64 and foo is first element from TLS register. # TCB size = 0x16 and foo is first element from TLS register.
# CHECK: Disassembly of section .text: # CHECK: Disassembly of section .text:
# CHECK-EMPTY: # CHECK-EMPTY:
# CHECK: _start: # CHECK: _start:
# CHECK-NEXT: 210000: 00 00 a0 d2 movz x0, #0, lsl #16 # CHECK-NEXT: 210000: 00 00 a0 d2 movz x0, #0, lsl #16
# CHECK-NEXT: 210004: 80 08 80 f2 movk x0, #68 # CHECK-NEXT: 210004: 80 02 80 f2 movk x0, #20
# CHECK-NEXT: 210008: 00 00 a0 d2 movz x0, #0, lsl #16 # CHECK-NEXT: 210008: 00 00 a0 d2 movz x0, #0, lsl #16
# CHECK-NEXT: 21000c: 00 08 80 f2 movk x0, #64 # CHECK-NEXT: 21000c: 00 02 80 f2 movk x0, #16
.section .tdata .section .tdata
.align 2 .align 2

View File

@ -17,12 +17,12 @@ _start:
add x0, x0, :tprel_hi12:v2 add x0, x0, :tprel_hi12:v2
add x0, x0, :tprel_lo12_nc:v2 add x0, x0, :tprel_lo12_nc:v2
# TCB size = 64 and foo is first element from TLS register. # TCB size = 0x16 and foo is first element from TLS register.
#CHECK: Disassembly of section .text: #CHECK: Disassembly of section .text:
#CHECK: _start: #CHECK: _start:
#CHECK: 210000: 40 d0 3b d5 mrs x0, TPIDR_EL0 #CHECK: 210000: 40 d0 3b d5 mrs x0, TPIDR_EL0
#CHECK: 210004: 00 00 40 91 add x0, x0, #0, lsl #12 #CHECK: 210004: 00 00 40 91 add x0, x0, #0, lsl #12
#CHECK: 210008: 00 00 01 91 add x0, x0, #64 #CHECK: 210008: 00 40 00 91 add x0, x0, #16
#CHECK: 21000c: 40 d0 3b d5 mrs x0, TPIDR_EL0 #CHECK: 21000c: 40 d0 3b d5 mrs x0, TPIDR_EL0
#CHECK: 210010: 00 fc 7f 91 add x0, x0, #4095, lsl #12 #CHECK: 210010: 00 fc 7f 91 add x0, x0, #4095, lsl #12
#CHECK: 210014: 00 e0 3f 91 add x0, x0, #4088 #CHECK: 210014: 00 e0 3f 91 add x0, x0, #4088
@ -36,9 +36,9 @@ v1:
.word 0 .word 0
.size v1, 4 .size v1, 4
# The current offset from the thread pointer is 68. Raise it to just below the # The current offset from the thread pointer is 20. Raise it to just below the
# 24-bit limit. # 24-bit limit.
.space (0xfffff8 - 68) .space (0xfffff8 - 20)
.type v2,@object .type v2,@object
.globl v2 .globl v2

View File

@ -26,27 +26,27 @@ _start: mrs x8, TPIDR_EL0
// CHECK: _start: // CHECK: _start:
// CHECK-NEXT: 210000: 48 d0 3b d5 mrs x8, TPIDR_EL0 // CHECK-NEXT: 210000: 48 d0 3b d5 mrs x8, TPIDR_EL0
// 0x0 + c40 = 0xc40 = tcb (64-bytes) + var0 // 0x0 + c10 = 0xc10 = tcb (16-bytes) + var0
// CHECK-NEXT: 210004: 08 01 40 91 add x8, x8, #0, lsl #12 // CHECK-NEXT: 210004: 08 01 40 91 add x8, x8, #0, lsl #12
// CHECK-NEXT: 210008: 14 11 c3 3d ldr q20, [x8, #3136] // CHECK-NEXT: 210008: 14 05 c3 3d ldr q20, [x8, #3088]
// 0x1000 + 0x850 = 0x1850 = tcb + var1 // 0x1000 + 0x820 = 0x1820 = tcb + var1
// CHECK-NEXT: 21000c: 08 05 40 91 add x8, x8, #1, lsl #12 // CHECK-NEXT: 21000c: 08 05 40 91 add x8, x8, #1, lsl #12
// CHECK-NEXT: 210010: 00 29 44 f9 ldr x0, [x8, #2128] // CHECK-NEXT: 210010: 00 11 44 f9 ldr x0, [x8, #2080]
// 0x2000 + 0x458 = 0x2458 = tcb + var2 // 0x2000 + 0x428 = 0x2428 = tcb + var2
// CHECK-NEXT: 210014: 08 09 40 91 add x8, x8, #2, lsl #12 // CHECK-NEXT: 210014: 08 09 40 91 add x8, x8, #2, lsl #12
// CHECK-NEXT: 210018: 00 59 44 b9 ldr w0, [x8, #1112] // CHECK-NEXT: 210018: 00 29 44 b9 ldr w0, [x8, #1064]
// 0x3000 + 0x5c = 0x305c = tcb + var3 // 0x3000 + 0x2c = 0x302c = tcb + var3
// CHECK-NEXT: 21001c: 08 0d 40 91 add x8, x8, #3, lsl #12 // CHECK-NEXT: 21001c: 08 0d 40 91 add x8, x8, #3, lsl #12
// CHECK-NEXT: 210020: 00 b9 40 79 ldrh w0, [x8, #92] // CHECK-NEXT: 210020: 00 59 40 79 ldrh w0, [x8, #44]
// 0x3000 + 0xc5e = 0x3c5e = tcb + var4 // 0x3000 + 0xc2e = 0x32ce = tcb + var4
// CHECK-NEXT: 210024: 08 0d 40 91 add x8, x8, #3, lsl #12 // CHECK-NEXT: 210024: 08 0d 40 91 add x8, x8, #3, lsl #12
// CHECK-NEXT: 210028: 00 79 71 39 ldrb w0, [x8, #3166] // CHECK-NEXT: 210028: 00 b9 70 39 ldrb w0, [x8, #3118]
// CHECK-SYMS: 0000000000000c00 16 TLS GLOBAL DEFAULT 2 var0 // CHECK-SYMS: 0000000000000c00 0 TLS GLOBAL DEFAULT 2 var0
// CHECK-SYMS-NEXT: 0000000000001810 8 TLS GLOBAL DEFAULT 2 var1 // CHECK-SYMS-NEXT: 0000000000001810 4 TLS GLOBAL DEFAULT 2 var1
// CHECK-SYMS-NEXT: 0000000000002418 4 TLS GLOBAL DEFAULT 2 var2 // CHECK-SYMS-NEXT: 0000000000002418 2 TLS GLOBAL DEFAULT 2 var2
// CHECK-SYMS-NEXT: 000000000000301c 2 TLS GLOBAL DEFAULT 2 var3 // CHECK-SYMS-NEXT: 000000000000301c 1 TLS GLOBAL DEFAULT 2 var3
// CHECK-SYMS-NEXT: 0000000000003c1e 1 TLS GLOBAL DEFAULT 2 var4 // CHECK-SYMS-NEXT: 0000000000003c1e 0 TLS GLOBAL DEFAULT 2 var4
.globl var0 .globl var0
.globl var1 .globl var1
@ -59,12 +59,12 @@ _start: mrs x8, TPIDR_EL0
.type var3,@object .type var3,@object
.section .tbss,"awT",@nobits .section .tbss,"awT",@nobits
.balign 64 .balign 16
.space 1024 * 3 .space 1024 * 3
var0: var0:
.quad 0 .quad 0
.quad 0 .quad 0
.size var0, 16 .size var1, 16
.space 1024 * 3 .space 1024 * 3
var1: var1:
.quad 0 .quad 0
@ -72,14 +72,14 @@ var1:
.space 1024 * 3 .space 1024 * 3
var2: var2:
.word 0 .word 0
.size var2, 4 .size var1, 4
.space 1024 * 3 .space 1024 * 3
var3: var3:
.hword 0 .hword 0
.size var3, 2 .size var2, 2
.space 1024 * 3 .space 1024 * 3
var4: var4:
.byte 0 .byte 0
.size var4, 1 .size var3, 1
.space 1024 * 3 .space 1024 * 3

View File

@ -70,9 +70,9 @@ x:
// CHECK: Disassembly of section .text: // CHECK: Disassembly of section .text:
// CHECK-EMPTY: // CHECK-EMPTY:
// CHECK-NEXT: _start: // CHECK-NEXT: _start:
// offset of x from Thread pointer = (TcbSize + 0x0 = 0x20) // offset of x from Thread pointer = (TcbSize + 0x0 = 0x8)
// CHECK-NEXT: 11000: 20 00 00 00 // CHECK-NEXT: 11000: 08 00 00 00
// offset of z from Thread pointer = (TcbSize + 0x8 = 0x28) // offset of z from Thread pointer = (TcbSize + 0x8 = 0x10)
// CHECK-NEXT: 11004: 28 00 00 00 // CHECK-NEXT: 11004: 10 00 00 00
// offset of y from Thread pointer = (TcbSize + 0x4 = 0x24) // offset of y from Thread pointer = (TcbSize + 0x4 = 0xc)
// CHECK-NEXT: 11008: 24 00 00 00 // CHECK-NEXT: 11008: 0c 00 00 00

View File

@ -37,5 +37,5 @@ x2:
.type x2, %object .type x2, %object
// CHECK: Contents of section .got: // CHECK: Contents of section .got:
// x1 at offset 0x20 from TP, x2 at offset 0x24 from TP. Offsets include TCB size of 0x20 // x1 at offset 8 from TP, x2 at offset 0xc from TP. Offsets include TCB size of 8
// CHECK-NEXT: 12064 20000000 24000000 // CHECK-NEXT: 12064 08000000 0c000000