llvm-project/lld/test/ELF/ppc64-dtprel.s

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

176 lines
5.6 KiB
ArmAsm
Raw Normal View History

// REQUIRES: ppc
// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
// RUN: ld.lld -shared %t.o -o %t.so
// RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s
// RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=OutputRelocs %s
// RUN: llvm-readelf -x .got %t.so | FileCheck --check-prefix=HEX-LE %s
// RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=Dis %s
// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
// RUN: ld.lld -shared %t.o -o %t.so
// RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s
// RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=OutputRelocs %s
// RUN: llvm-readelf -x .got %t.so | FileCheck --check-prefix=HEX-BE %s
// RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=Dis %s
.text
.abiversion 2
.globl test
.p2align 4
.type test,@function
test:
.Lfunc_gep0:
addis 2, 12, .TOC.-.Lfunc_gep0@ha
addi 2, 2, .TOC.-.Lfunc_gep0@l
.Lfunc_lep0:
.localentry test, .Lfunc_lep0-.Lfunc_gep0
mflr 0
std 0, 16(1)
stdu 1, -32(1)
addis 3, 2, i@got@tlsld@ha
addi 3, 3, i@got@tlsld@l
bl __tls_get_addr(i@tlsld)
nop
addi 4, 3, i@dtprel
lwa 4, i@dtprel(3)
ld 0, 16(1)
mtlr 0
blr
.globl test_64
.p2align 4
.type test_64,@function
.globl test_adjusted
.p2align 4
.type test_adjusted,@function
test_adjusted:
.Lfunc_gep1:
addis 2, 12, .TOC.-.Lfunc_gep1@ha
addi 2, 2, .TOC.-.Lfunc_gep1@l
.Lfunc_lep1:
.localentry test_adjusted, .Lfunc_lep1-.Lfunc_gep1
mflr 0
std 0, 16(1)
stdu 1, -32(1)
addis 3, 2, k@got@tlsld@ha
addi 3, 3, k@got@tlsld@l
bl __tls_get_addr(k@tlsld)
nop
lis 4, k@dtprel@highesta
ori 4, 4, k@dtprel@highera
lis 5, k@dtprel@ha
addi 5, 5, k@dtprel@l
sldi 4, 4, 32
or 4, 4, 5
add 3, 3, 4
addi 1, 1, 32
ld 0, 16(1)
mtlr 0
blr
.globl test_not_adjusted
.p2align 4
.type test_not_adjusted,@function
test_not_adjusted:
.Lfunc_gep2:
addis 2, 12, .TOC.-.Lfunc_gep2@ha
addi 2, 2, .TOC.-.Lfunc_gep2@l
.Lfunc_lep2:
.localentry test_not_adjusted, .Lfunc_lep2-.Lfunc_gep2
mflr 0
std 0, 16(1)
stdu 1, -32(1)
addis 3, 2, i@got@tlsld@ha
addi 3, 3, i@got@tlsld@l
bl __tls_get_addr(k@tlsld)
nop
lis 4, k@dtprel@highest
ori 4, 4, k@dtprel@higher
sldi 4, 4, 32
oris 4, 4, k@dtprel@h
ori 4, 4, k@dtprel@l
add 3, 3, 4
addi 1, 1, 32
ld 0, 16(1)
mtlr 0
blr
.section .debug_addr,"",@progbits
.quad i@dtprel+32768
.type i,@object
.section .tdata,"awT",@progbits
.space 1024
.p2align 2
i:
.long 55
.size i, 4
.space 1024 * 1024 * 4
.type k,@object
.p2align 2
k:
.long 128
.size k,4
// Verify the input has all the remaining DTPREL based relocations we want to
// test.
// InputRelocs: Relocation section '.rela.text'
// InputRelocs: R_PPC64_DTPREL16 {{[0-9a-f]+}} i + 0
// InputRelocs: R_PPC64_DTPREL16_DS {{[0-9a-f]+}} i + 0
// InputRelocs: R_PPC64_DTPREL16_HIGHESTA {{[0-9a-f]+}} k + 0
// InputRelocs: R_PPC64_DTPREL16_HIGHERA {{[0-9a-f]+}} k + 0
// InputRelocs: R_PPC64_DTPREL16_HA {{[0-9a-f]+}} k + 0
// InputRelocs: R_PPC64_DTPREL16_LO {{[0-9a-f]+}} k + 0
// InputRelocs: R_PPC64_DTPREL16_HIGHEST {{[0-9a-f]+}} k + 0
// InputRelocs: R_PPC64_DTPREL16_HIGHER {{[0-9a-f]+}} k + 0
// InputRelocs: R_PPC64_DTPREL16_HI {{[0-9a-f]+}} k + 0
// InputRelocs: R_PPC64_DTPREL16_LO {{[0-9a-f]+}} k + 0
// InputRelocs: Relocation section '.rela.debug_addr'
// InputRelocs: R_PPC64_DTPREL64 {{[0-9a-f]+}} i + 8000
// Expect a single dynamic relocation in the '.rela.dyn section for the module id.
// OutputRelocs: Relocation section '.rela.dyn' at offset 0x{{[0-9a-f]+}} contains 1 entries:
// OutputRelocs-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend
// OutputRelocs-NEXT: R_PPC64_DTPMOD64
[ELF][PPC] Allow PT_LOAD to have overlapping p_offset ranges This change affects the non-linker script case (precisely, when the `SECTIONS` command is not used). It deletes 3 alignments at PT_LOAD boundaries for the default case: the size of a powerpc64 binary can be decreased by at most 192kb. The technique can be ported to other targets. Let me demonstrate the idea with a maxPageSize=65536 example: When assigning the address to the first output section of a new PT_LOAD, if the end p_vaddr of the previous PT_LOAD is 0x10020, we advance to the next multiple of maxPageSize: 0x20000. The new PT_LOAD will thus have p_vaddr=0x20000. Because p_offset and p_vaddr are congruent modulo maxPageSize, p_offset will be 0x20000, leaving a p_offset gap [0x10020, 0x20000) in the output. Alternatively, if we advance to 0x20020, the new PT_LOAD will have p_vaddr=0x20020. We can pick either 0x10020 or 0x20020 for p_offset! Obviously 0x10020 is the choice because it leaves no gap. At runtime, p_vaddr will be rounded down by pagesize (65536 if pagesize=maxPageSize). This PT_LOAD will load additional initial contents from p_offset ranges [0x10000,0x10020), which will also be loaded by the previous PT_LOAD. This is fine if -z noseparate-code is in effect or if we are not transiting between executable and non-executable segments. ld.bfd -z noseparate-code leverages this technique to keep output small. This patch implements the technique in lld, which is mostly effective on targets with large defaultMaxPageSize (AArch64/MIPS/PPC: 65536). The 3 removed alignments can save almost 3*65536 bytes. Two places that rely on p_vaddr%pagesize = 0 have to be updated. 1) We used to round p_memsz(PT_GNU_RELRO) up to commonPageSize (defaults to 4096 on all targets). Now p_vaddr%commonPageSize may be non-zero. The updated formula takes account of that factor. 2) Our TP offsets formulae are only correct if p_vaddr%p_align = 0. Fix them. See the updated comments in InputSection.cpp for details. On targets that we enable the technique (only PPC64 now), we can potentially make `p_vaddr(PT_TLS)%p_align(PT_TLS) != 0` if `sh_addralign(.tdata) < sh_addralign(.tbss)` This exposes many problems in ld.so implementations, especially the offsets of dynamic TLS blocks. Known issues: FreeBSD 13.0-CURRENT rtld-elf (i386/amd64/powerpc/arm64) glibc (HEAD) i386 and x86_64 https://sourceware.org/bugzilla/show_bug.cgi?id=24606 musl<=1.1.22 on TLS Variant I architectures (aarch64/powerpc64/...) So, force p_vaddr%p_align = 0 by rounding dot up to p_align(PT_TLS). The technique will be enabled (with updated tests) for other targets in subsequent patches. Reviewed By: ruiu Differential Revision: https://reviews.llvm.org/D64906 llvm-svn: 369343
2019-08-20 16:34:25 +08:00
// The got entry for i is at .got+8*1 = 0x4209e0
// i@dtprel = 1024 - 0x8000 = -31744 = 0xffffffffffff8400
// HEX-LE: section '.got':
// HEX-LE-NEXT: 4209c8 c8894200 00000000 00000000 00000000
// HEX-LE-NEXT: 4209d8 00000000 00000000
// HEX-BE: section '.got':
// HEX-BE-NEXT: 4209c8 00000000 004289c8 00000000 00000000
// HEX-BE-NEXT: 4209d8 00000000 00000000
// Dis: <test>:
// Dis: addi 4, 3, -31744
// Dis-NEXT: lwa 4, -31744(3)
// #k@dtprel(1024 + 4 + 1024 * 1024 * 4) = 0x400404
// #highesta(k@dtprel) --> ((0x400404 - 0x8000 + 0x8000) >> 48) & 0xffff = 0
// #highera(k@dtprel) --> ((0x400404 - 0x8000 + 0x8000) >> 32) & 0xffff = 0
// #ha(k@dtprel) --> ((0x400404 - 0x8000 + 0x8000) >> 16) & 0xffff = 64
// #lo(k@dtprel) --> ((0x400404 - 0x8000) & 0xffff = -31740
// Dis: <test_adjusted>:
// Dis: lis 4, 0
// Dis: ori 4, 4, 0
// Dis: lis 5, 64
// Dis: addi 5, 5, -31740
// #highest(k@dtprel) --> ((0x400404 - 0x8000) >> 48) & 0xffff = 0
// #higher(k@dtprel) --> ((0x400404 - 0x8000) >> 32) & 0xffff = 0
// #hi(k@dtprel) --> ((0x400404 - 0x8000) >> 16) & 0xffff = 63
// #lo(k@dtprel) --> ((0x400404 - 0x8000) & 0xffff = 33796
// Dis: <test_not_adjusted>:
// Dis: lis 4, 0
// Dis: ori 4, 4, 0
// Dis: oris 4, 4, 63
// Dis: ori 4, 4, 33796