forked from OSchip/llvm-project
[LLD][PowerPC] Fix bug in PC-Relative initial exec
There is a bug when initial exec is relaxed to local exec. In the following situation: InitExec.c ``` extern __thread unsigned TGlobal; unsigned getConst(unsigned*); unsigned addVal(unsigned, unsigned*); unsigned GetAddrT() { return addVal(getConst(&TGlobal), &TGlobal); } ``` Def.c ``` __thread unsigned TGlobal; unsigned getConst(unsigned* A) { return *A + 3; } unsigned addVal(unsigned A, unsigned* B) { return A + *B; } ``` The problem is in InitExec.c but Def.c is required if you want to link the example and see the problem. To compile everything: ``` clang -O3 -mcpu=pwr10 -c InitExec.c clang -O3 -mcpu=pwr10 -c Def.c ld.lld InitExec.o Def.o -o IeToLe ``` If you objdump the problem object file: ``` $ llvm-objdump -dr --mcpu=pwr10 InitExec.o ``` you will get the following assembly: ``` 0000000000000000 <GetAddrT>: 0: a6 02 08 7c mflr 0 4: f0 ff c1 fb std 30, -16(1) 8: 10 00 01 f8 std 0, 16(1) c: d1 ff 21 f8 stdu 1, -48(1) 10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal 18: 14 6a c3 7f add 30, 3, 13 0000000000000019: R_PPC64_TLS TGlobal 1c: 78 f3 c3 7f mr 3, 30 20: 01 00 00 48 bl 0x20 0000000000000020: R_PPC64_REL24_NOTOC getConst 24: 78 f3 c4 7f mr 4, 30 28: 30 00 21 38 addi 1, 1, 48 2c: 10 00 01 e8 ld 0, 16(1) 30: f0 ff c1 eb ld 30, -16(1) 34: a6 03 08 7c mtlr 0 38: 00 00 00 48 b 0x38 0000000000000038: R_PPC64_REL24_NOTOC addVal ``` The lines of interest are: ``` 10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal 18: 14 6a c3 7f add 30, 3, 13 0000000000000019: R_PPC64_TLS TGlobal 1c: 78 f3 c3 7f mr 3, 30 ``` Which once linked gets turned into: ``` 10010210: ff ff 03 06 00 90 6d 38 paddi 3, 13, -28672, 0 10010218: 00 00 00 60 nop 1001021c: 78 f3 c3 7f mr 3, 30 ``` The problem is that register 30 is never set after the optimization. Therefore it is not correct to relax the above instructions by replacing the add instruction with a nop. Instead the add instruction should be replaced with a copy (mr) instruction. If the add uses the same resgiter as input and as ouput then it is safe to continue to replace the add with a nop. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D95262
This commit is contained in:
parent
cec244354b
commit
f21704e080
|
@ -920,7 +920,15 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
|
|||
// that comes before it will already have computed the address of the
|
||||
// symbol.
|
||||
if (secondaryOp == 266) {
|
||||
write32(loc - 1, NOP);
|
||||
// Check if the add uses the same result register as the input register.
|
||||
uint32_t rt = (tlsInstr & 0x03E00000) >> 21; // bits 6-10
|
||||
uint32_t ra = (tlsInstr & 0x001F0000) >> 16; // bits 11-15
|
||||
if (ra == rt) {
|
||||
write32(loc - 1, NOP);
|
||||
} else {
|
||||
// mr rt, ra
|
||||
write32(loc - 1, 0x7C000378 | (rt << 16) | (ra << 21) | (ra << 11));
|
||||
}
|
||||
} else {
|
||||
uint32_t dFormOp = getPPCDFormOp(secondaryOp);
|
||||
if (dFormOp == 0)
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
# REQUIRES: ppc
|
||||
# RUN: split-file %s %t
|
||||
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/initexec -o %t/initexec.o
|
||||
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/defs -o %t/defs.o
|
||||
# RUN: ld.lld %t/initexec.o %t/defs.o -o %t/out
|
||||
# RUN: llvm-objdump -d --mcpu=pwr10 --no-show-raw-insn %t/out | FileCheck %s
|
||||
|
||||
# CHECK-LABEL: <GetAddrT>:
|
||||
# CHECK: mflr 0
|
||||
# CHECK-NEXT: std 30, -16(1)
|
||||
# CHECK-NEXT: std 0, 16(1)
|
||||
# CHECK-NEXT: stdu 1, -48(1)
|
||||
# CHECK-NEXT: paddi 3, 13, -28672, 0
|
||||
# CHECK-NEXT: mr 30, 3
|
||||
# CHECK-NEXT: mr 3, 30
|
||||
# CHECK-NEXT: bl
|
||||
# CHECK-NEXT: mr 4, 30
|
||||
# CHECK-NEXT: addi 1, 1, 48
|
||||
# CHECK-NEXT: ld 0, 16(1)
|
||||
# CHECK-NEXT: ld 30, -16(1)
|
||||
# CHECK-NEXT: mtlr 0
|
||||
# CHECK-NEXT: b
|
||||
|
||||
## Generated From:
|
||||
## extern __thread unsigned TGlobal;
|
||||
## unsigned getConst(unsigned*);
|
||||
## unsigned addVal(unsigned, unsigned*);
|
||||
##
|
||||
## unsigned GetAddrT() {
|
||||
## return addVal(getConst(&TGlobal), &TGlobal);
|
||||
## }
|
||||
|
||||
//--- initexec
|
||||
GetAddrT:
|
||||
mflr 0
|
||||
std 30, -16(1)
|
||||
std 0, 16(1)
|
||||
stdu 1, -48(1)
|
||||
pld 3, TGlobal@got@tprel@pcrel(0), 1
|
||||
add 30, 3, TGlobal@tls@pcrel
|
||||
mr 3, 30
|
||||
bl getConst@notoc
|
||||
mr 4, 30
|
||||
addi 1, 1, 48
|
||||
ld 0, 16(1)
|
||||
ld 30, -16(1)
|
||||
mtlr 0
|
||||
b addVal@notoc
|
||||
|
||||
## Generated From:
|
||||
## __thread unsigned TGlobal;
|
||||
##
|
||||
## unsigned getConst(unsigned* A) {
|
||||
## return *A + 3;
|
||||
## }
|
||||
##
|
||||
## unsigned addVal(unsigned A, unsigned* B) {
|
||||
## return A + *B;
|
||||
## }
|
||||
|
||||
//--- defs
|
||||
.globl getConst
|
||||
getConst:
|
||||
lwz 3, 0(3)
|
||||
addi 3, 3, 3
|
||||
clrldi 3, 3, 32
|
||||
blr
|
||||
|
||||
.globl addVal
|
||||
addVal:
|
||||
lwz 4, 0(4)
|
||||
add 3, 4, 3
|
||||
clrldi 3, 3, 32
|
||||
blr
|
||||
|
||||
.section .tbss,"awT",@nobits
|
||||
.globl TGlobal
|
||||
.p2align 2
|
||||
TGlobal:
|
||||
.long 0
|
||||
.size TGlobal, 4
|
|
@ -54,9 +54,9 @@ y:
|
|||
|
||||
# LE-RELOC: There are no relocations in this file.
|
||||
|
||||
# LE-SYM: Symbol table '.symtab' contains 7 entries:
|
||||
# LE-SYM: 5: 0000000000000000 0 TLS GLOBAL DEFAULT 6 x
|
||||
# LE-SYM: 6: 0000000000000004 0 TLS GLOBAL DEFAULT 6 y
|
||||
# LE-SYM: Symbol table '.symtab' contains 8 entries:
|
||||
# LE-SYM: 6: 0000000000000000 0 TLS GLOBAL DEFAULT 6 x
|
||||
# LE-SYM: 7: 0000000000000004 0 TLS GLOBAL DEFAULT 6 y
|
||||
|
||||
# LE-GOT: could not find section '.got'
|
||||
|
||||
|
@ -74,6 +74,20 @@ IEAddr:
|
|||
add 3, 3, x@tls@pcrel
|
||||
blr
|
||||
|
||||
# IE-LABEL: <IEAddrCopy>:
|
||||
# IE-NEXT: pld 3, 12488(0), 1
|
||||
# IE-NEXT: add 4, 3, 13
|
||||
# IE-NEXT: blr
|
||||
# LE-LABEL: <IEAddrCopy>:
|
||||
# LE-NEXT: paddi 3, 13, -28672, 0
|
||||
# LE-NEXT: mr 4, 3
|
||||
# LE-NEXT: blr
|
||||
.section .text_addr, "ax", %progbits
|
||||
IEAddrCopy:
|
||||
pld 3, x@got@tprel@pcrel(0), 1
|
||||
add 4, 3, x@tls@pcrel
|
||||
blr
|
||||
|
||||
# IE-LABEL: <IEVal>:
|
||||
# IE-NEXT: pld 3, 8408(0), 1
|
||||
# IE-NEXT: lwzx 3, 3, 13
|
||||
|
|
Loading…
Reference in New Issue