llvm-project/lld/test/ELF/ppc64-tls-pcrel-ie.s

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

141 lines
4.1 KiB
ArmAsm
Raw Normal View History

# REQUIRES: ppc
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/asm -o %t.o
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/defs -o %t-defs.o
# RUN: ld.lld --shared %t-defs.o --soname=t-defs -o %t-defs.so
# RUN: ld.lld -T %t/lds %t.o %t-defs.so -o %t-ie
# RUN: ld.lld -T %t/lds %t.o %t-defs.o -o %t-le
# RUN: llvm-readelf -r %t-ie | FileCheck %s --check-prefix=IE-RELOC
# RUN: llvm-readelf -s %t-ie | FileCheck %s --check-prefix=IE-SYM
# RUN: llvm-readelf -x .got %t-ie | FileCheck %s --check-prefix=IE-GOT
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-ie | FileCheck %s --check-prefix=IE
# RUN: llvm-readelf -r %t-le | FileCheck %s --check-prefix=LE-RELOC
# RUN: llvm-readelf -s %t-le | FileCheck %s --check-prefix=LE-SYM
# RUN: llvm-readelf -x .got %t-le 2>&1 | FileCheck %s --check-prefix=LE-GOT
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-le | FileCheck %s --check-prefix=LE
## This test checks the Initial Exec PC Relative TLS implementation.
## The IE version checks that the relocations are generated correctly.
## The LE version checks that the Initial Exec to Local Exec relaxation is
## done correctly.
#--- lds
SECTIONS {
.text_addr 0x1001000 : { *(.text_addr) }
.text_val 0x1002000 : { *(.text_val) }
.text_twoval 0x1003000 : { *(.text_twoval) }
.text_incrval 0x1004000 : { *(.text_incrval) }
}
#--- defs
.section .tbss,"awT",@nobits
.globl x
x:
.long 0
.globl y
y:
.long 0
#--- asm
# IE-RELOC: Relocation section '.rela.dyn' at offset 0x10090 contains 2 entries:
# IE-RELOC: 00000000010040e0 0000000100000049 R_PPC64_TPREL64 0000000000000000 x + 0
# IE-RELOC: 00000000010040e8 0000000200000049 R_PPC64_TPREL64 0000000000000000 y + 0
# IE-SYM: Symbol table '.dynsym' contains 3 entries:
# IE-SYM: 1: 0000000000000000 0 TLS GLOBAL DEFAULT UND x
# IE-SYM: 2: 0000000000000000 0 TLS GLOBAL DEFAULT UND y
# IE-GOT: Hex dump of section '.got':
# IE-GOT-NEXT: 0x010040d8 d8c00001 00000000 00000000 00000000
# LE-RELOC: There are no relocations in this file.
[LLD][PowerPC] Fix bug in PC-Relative initial exec There is a bug when initial exec is relaxed to local exec. In the following situation: InitExec.c ``` extern __thread unsigned TGlobal; unsigned getConst(unsigned*); unsigned addVal(unsigned, unsigned*); unsigned GetAddrT() { return addVal(getConst(&TGlobal), &TGlobal); } ``` Def.c ``` __thread unsigned TGlobal; unsigned getConst(unsigned* A) { return *A + 3; } unsigned addVal(unsigned A, unsigned* B) { return A + *B; } ``` The problem is in InitExec.c but Def.c is required if you want to link the example and see the problem. To compile everything: ``` clang -O3 -mcpu=pwr10 -c InitExec.c clang -O3 -mcpu=pwr10 -c Def.c ld.lld InitExec.o Def.o -o IeToLe ``` If you objdump the problem object file: ``` $ llvm-objdump -dr --mcpu=pwr10 InitExec.o ``` you will get the following assembly: ``` 0000000000000000 <GetAddrT>: 0: a6 02 08 7c mflr 0 4: f0 ff c1 fb std 30, -16(1) 8: 10 00 01 f8 std 0, 16(1) c: d1 ff 21 f8 stdu 1, -48(1) 10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal 18: 14 6a c3 7f add 30, 3, 13 0000000000000019: R_PPC64_TLS TGlobal 1c: 78 f3 c3 7f mr 3, 30 20: 01 00 00 48 bl 0x20 0000000000000020: R_PPC64_REL24_NOTOC getConst 24: 78 f3 c4 7f mr 4, 30 28: 30 00 21 38 addi 1, 1, 48 2c: 10 00 01 e8 ld 0, 16(1) 30: f0 ff c1 eb ld 30, -16(1) 34: a6 03 08 7c mtlr 0 38: 00 00 00 48 b 0x38 0000000000000038: R_PPC64_REL24_NOTOC addVal ``` The lines of interest are: ``` 10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal 18: 14 6a c3 7f add 30, 3, 13 0000000000000019: R_PPC64_TLS TGlobal 1c: 78 f3 c3 7f mr 3, 30 ``` Which once linked gets turned into: ``` 10010210: ff ff 03 06 00 90 6d 38 paddi 3, 13, -28672, 0 10010218: 00 00 00 60 nop 1001021c: 78 f3 c3 7f mr 3, 30 ``` The problem is that register 30 is never set after the optimization. Therefore it is not correct to relax the above instructions by replacing the add instruction with a nop. Instead the add instruction should be replaced with a copy (mr) instruction. If the add uses the same resgiter as input and as ouput then it is safe to continue to replace the add with a nop. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D95262
2021-03-22 22:53:43 +08:00
# LE-SYM: Symbol table '.symtab' contains 8 entries:
# LE-SYM: 6: 0000000000000000 0 TLS GLOBAL DEFAULT 6 x
# LE-SYM: 7: 0000000000000004 0 TLS GLOBAL DEFAULT 6 y
# LE-GOT: could not find section '.got'
# IE-LABEL: <IEAddr>:
# IE-NEXT: pld 3, 12512(0), 1
# IE-NEXT: add 3, 3, 13
# IE-NEXT: blr
# LE-LABEL: <IEAddr>:
# LE-NEXT: paddi 3, 13, -28672, 0
# LE-NEXT: nop
# LE-NEXT: blr
.section .text_addr, "ax", %progbits
IEAddr:
pld 3, x@got@tprel@pcrel(0), 1
add 3, 3, x@tls@pcrel
blr
[LLD][PowerPC] Fix bug in PC-Relative initial exec There is a bug when initial exec is relaxed to local exec. In the following situation: InitExec.c ``` extern __thread unsigned TGlobal; unsigned getConst(unsigned*); unsigned addVal(unsigned, unsigned*); unsigned GetAddrT() { return addVal(getConst(&TGlobal), &TGlobal); } ``` Def.c ``` __thread unsigned TGlobal; unsigned getConst(unsigned* A) { return *A + 3; } unsigned addVal(unsigned A, unsigned* B) { return A + *B; } ``` The problem is in InitExec.c but Def.c is required if you want to link the example and see the problem. To compile everything: ``` clang -O3 -mcpu=pwr10 -c InitExec.c clang -O3 -mcpu=pwr10 -c Def.c ld.lld InitExec.o Def.o -o IeToLe ``` If you objdump the problem object file: ``` $ llvm-objdump -dr --mcpu=pwr10 InitExec.o ``` you will get the following assembly: ``` 0000000000000000 <GetAddrT>: 0: a6 02 08 7c mflr 0 4: f0 ff c1 fb std 30, -16(1) 8: 10 00 01 f8 std 0, 16(1) c: d1 ff 21 f8 stdu 1, -48(1) 10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal 18: 14 6a c3 7f add 30, 3, 13 0000000000000019: R_PPC64_TLS TGlobal 1c: 78 f3 c3 7f mr 3, 30 20: 01 00 00 48 bl 0x20 0000000000000020: R_PPC64_REL24_NOTOC getConst 24: 78 f3 c4 7f mr 4, 30 28: 30 00 21 38 addi 1, 1, 48 2c: 10 00 01 e8 ld 0, 16(1) 30: f0 ff c1 eb ld 30, -16(1) 34: a6 03 08 7c mtlr 0 38: 00 00 00 48 b 0x38 0000000000000038: R_PPC64_REL24_NOTOC addVal ``` The lines of interest are: ``` 10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal 18: 14 6a c3 7f add 30, 3, 13 0000000000000019: R_PPC64_TLS TGlobal 1c: 78 f3 c3 7f mr 3, 30 ``` Which once linked gets turned into: ``` 10010210: ff ff 03 06 00 90 6d 38 paddi 3, 13, -28672, 0 10010218: 00 00 00 60 nop 1001021c: 78 f3 c3 7f mr 3, 30 ``` The problem is that register 30 is never set after the optimization. Therefore it is not correct to relax the above instructions by replacing the add instruction with a nop. Instead the add instruction should be replaced with a copy (mr) instruction. If the add uses the same resgiter as input and as ouput then it is safe to continue to replace the add with a nop. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D95262
2021-03-22 22:53:43 +08:00
# IE-LABEL: <IEAddrCopy>:
# IE-NEXT: pld 3, 12496(0), 1
[LLD][PowerPC] Fix bug in PC-Relative initial exec There is a bug when initial exec is relaxed to local exec. In the following situation: InitExec.c ``` extern __thread unsigned TGlobal; unsigned getConst(unsigned*); unsigned addVal(unsigned, unsigned*); unsigned GetAddrT() { return addVal(getConst(&TGlobal), &TGlobal); } ``` Def.c ``` __thread unsigned TGlobal; unsigned getConst(unsigned* A) { return *A + 3; } unsigned addVal(unsigned A, unsigned* B) { return A + *B; } ``` The problem is in InitExec.c but Def.c is required if you want to link the example and see the problem. To compile everything: ``` clang -O3 -mcpu=pwr10 -c InitExec.c clang -O3 -mcpu=pwr10 -c Def.c ld.lld InitExec.o Def.o -o IeToLe ``` If you objdump the problem object file: ``` $ llvm-objdump -dr --mcpu=pwr10 InitExec.o ``` you will get the following assembly: ``` 0000000000000000 <GetAddrT>: 0: a6 02 08 7c mflr 0 4: f0 ff c1 fb std 30, -16(1) 8: 10 00 01 f8 std 0, 16(1) c: d1 ff 21 f8 stdu 1, -48(1) 10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal 18: 14 6a c3 7f add 30, 3, 13 0000000000000019: R_PPC64_TLS TGlobal 1c: 78 f3 c3 7f mr 3, 30 20: 01 00 00 48 bl 0x20 0000000000000020: R_PPC64_REL24_NOTOC getConst 24: 78 f3 c4 7f mr 4, 30 28: 30 00 21 38 addi 1, 1, 48 2c: 10 00 01 e8 ld 0, 16(1) 30: f0 ff c1 eb ld 30, -16(1) 34: a6 03 08 7c mtlr 0 38: 00 00 00 48 b 0x38 0000000000000038: R_PPC64_REL24_NOTOC addVal ``` The lines of interest are: ``` 10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal 18: 14 6a c3 7f add 30, 3, 13 0000000000000019: R_PPC64_TLS TGlobal 1c: 78 f3 c3 7f mr 3, 30 ``` Which once linked gets turned into: ``` 10010210: ff ff 03 06 00 90 6d 38 paddi 3, 13, -28672, 0 10010218: 00 00 00 60 nop 1001021c: 78 f3 c3 7f mr 3, 30 ``` The problem is that register 30 is never set after the optimization. Therefore it is not correct to relax the above instructions by replacing the add instruction with a nop. Instead the add instruction should be replaced with a copy (mr) instruction. If the add uses the same resgiter as input and as ouput then it is safe to continue to replace the add with a nop. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D95262
2021-03-22 22:53:43 +08:00
# IE-NEXT: add 4, 3, 13
# IE-NEXT: blr
# LE-LABEL: <IEAddrCopy>:
# LE-NEXT: paddi 3, 13, -28672, 0
# LE-NEXT: mr 4, 3
# LE-NEXT: blr
.section .text_addr, "ax", %progbits
IEAddrCopy:
pld 3, x@got@tprel@pcrel(0), 1
add 4, 3, x@tls@pcrel
blr
# IE-LABEL: <IEVal>:
# IE-NEXT: pld 3, 8416(0), 1
# IE-NEXT: lwzx 3, 3, 13
# IE-NEXT: blr
# LE-LABEL: <IEVal>:
# LE-NEXT: paddi 3, 13, -28672, 0
# LE-NEXT: lwz 3, 0(3)
# LE-NEXT: blr
.section .text_val, "ax", %progbits
IEVal:
pld 3, x@got@tprel@pcrel(0), 1
lwzx 3, 3, x@tls@pcrel
blr
# IE-LABEL: <IETwoVal>:
# IE-NEXT: pld 3, 4320(0), 1
# IE-NEXT: pld 4, 4320(0), 1
# IE-NEXT: lwzx 3, 3, 13
# IE-NEXT: lwzx 4, 4, 13
# IE-NEXT: blr
# LE-LABEL: <IETwoVal>:
# LE-NEXT: paddi 3, 13, -28672, 0
# LE-NEXT: paddi 4, 13, -28668, 0
# LE-NEXT: lwz 3, 0(3)
# LE-NEXT: lwz 4, 0(4)
# LE-NEXT: blr
.section .text_twoval, "ax", %progbits
IETwoVal:
pld 3, x@got@tprel@pcrel(0), 1
pld 4, y@got@tprel@pcrel(0), 1
lwzx 3, 3, x@tls@pcrel
lwzx 4, 4, y@tls@pcrel
blr
# IE-LABEL: <IEIncrementVal>:
# IE-NEXT: pld 4, 232(0), 1
# IE-NEXT: lwzx 3, 4, 13
# IE-NEXT: stwx 3, 4, 13
# IE-NEXT: blr
# LE-LABEL: <IEIncrementVal>:
# LE-NEXT: paddi 4, 13, -28668, 0
# LE-NEXT: lwz 3, 0(4)
# LE-NEXT: stw 3, 0(4)
# LE-NEXT: blr
.section .text_incrval, "ax", %progbits
IEIncrementVal:
pld 4, y@got@tprel@pcrel(0), 1
lwzx 3, 4, y@tls@pcrel
stwx 3, 4, y@tls@pcrel
blr