[LLD][PowerPC] Fix bug in PC-Relative initial exec
There is a bug when initial exec is relaxed to local exec.
In the following situation:
InitExec.c
```
extern __thread unsigned TGlobal;
unsigned getConst(unsigned*);
unsigned addVal(unsigned, unsigned*);
unsigned GetAddrT() {
return addVal(getConst(&TGlobal), &TGlobal);
}
```
Def.c
```
__thread unsigned TGlobal;
unsigned getConst(unsigned* A) {
return *A + 3;
}
unsigned addVal(unsigned A, unsigned* B) {
return A + *B;
}
```
The problem is in InitExec.c but Def.c is required if you want to link the example and see the problem.
To compile everything:
```
clang -O3 -mcpu=pwr10 -c InitExec.c
clang -O3 -mcpu=pwr10 -c Def.c
ld.lld InitExec.o Def.o -o IeToLe
```
If you objdump the problem object file:
```
$ llvm-objdump -dr --mcpu=pwr10 InitExec.o
```
you will get the following assembly:
```
0000000000000000 <GetAddrT>:
0: a6 02 08 7c mflr 0
4: f0 ff c1 fb std 30, -16(1)
8: 10 00 01 f8 std 0, 16(1)
c: d1 ff 21 f8 stdu 1, -48(1)
10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1
0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal
18: 14 6a c3 7f add 30, 3, 13
0000000000000019: R_PPC64_TLS TGlobal
1c: 78 f3 c3 7f mr 3, 30
20: 01 00 00 48 bl 0x20
0000000000000020: R_PPC64_REL24_NOTOC getConst
24: 78 f3 c4 7f mr 4, 30
28: 30 00 21 38 addi 1, 1, 48
2c: 10 00 01 e8 ld 0, 16(1)
30: f0 ff c1 eb ld 30, -16(1)
34: a6 03 08 7c mtlr 0
38: 00 00 00 48 b 0x38
0000000000000038: R_PPC64_REL24_NOTOC addVal
```
The lines of interest are:
```
10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1
0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal
18: 14 6a c3 7f add 30, 3, 13
0000000000000019: R_PPC64_TLS TGlobal
1c: 78 f3 c3 7f mr 3, 30
```
Which once linked gets turned into:
```
10010210: ff ff 03 06 00 90 6d 38 paddi 3, 13, -28672, 0
10010218: 00 00 00 60 nop
1001021c: 78 f3 c3 7f mr 3, 30
```
The problem is that register 30 is never set after the optimization.
Therefore it is not correct to relax the above instructions by replacing
the add instruction with a nop.
Instead the add instruction should be replaced with a copy (mr) instruction.
If the add uses the same resgiter as input and as ouput then it is safe to
continue to replace the add with a nop.
Reviewed By: MaskRay
Differential Revision: https://reviews.llvm.org/D95262
2021-03-22 22:53:43 +08:00
|
|
|
# REQUIRES: ppc
|
|
|
|
# RUN: split-file %s %t
|
|
|
|
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/initexec -o %t/initexec.o
|
|
|
|
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/defs -o %t/defs.o
|
|
|
|
# RUN: ld.lld %t/initexec.o %t/defs.o -o %t/out
|
2022-07-13 12:07:45 +08:00
|
|
|
# RUN: llvm-objdump -d --no-show-raw-insn %t/out | FileCheck %s
|
[LLD][PowerPC] Fix bug in PC-Relative initial exec
There is a bug when initial exec is relaxed to local exec.
In the following situation:
InitExec.c
```
extern __thread unsigned TGlobal;
unsigned getConst(unsigned*);
unsigned addVal(unsigned, unsigned*);
unsigned GetAddrT() {
return addVal(getConst(&TGlobal), &TGlobal);
}
```
Def.c
```
__thread unsigned TGlobal;
unsigned getConst(unsigned* A) {
return *A + 3;
}
unsigned addVal(unsigned A, unsigned* B) {
return A + *B;
}
```
The problem is in InitExec.c but Def.c is required if you want to link the example and see the problem.
To compile everything:
```
clang -O3 -mcpu=pwr10 -c InitExec.c
clang -O3 -mcpu=pwr10 -c Def.c
ld.lld InitExec.o Def.o -o IeToLe
```
If you objdump the problem object file:
```
$ llvm-objdump -dr --mcpu=pwr10 InitExec.o
```
you will get the following assembly:
```
0000000000000000 <GetAddrT>:
0: a6 02 08 7c mflr 0
4: f0 ff c1 fb std 30, -16(1)
8: 10 00 01 f8 std 0, 16(1)
c: d1 ff 21 f8 stdu 1, -48(1)
10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1
0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal
18: 14 6a c3 7f add 30, 3, 13
0000000000000019: R_PPC64_TLS TGlobal
1c: 78 f3 c3 7f mr 3, 30
20: 01 00 00 48 bl 0x20
0000000000000020: R_PPC64_REL24_NOTOC getConst
24: 78 f3 c4 7f mr 4, 30
28: 30 00 21 38 addi 1, 1, 48
2c: 10 00 01 e8 ld 0, 16(1)
30: f0 ff c1 eb ld 30, -16(1)
34: a6 03 08 7c mtlr 0
38: 00 00 00 48 b 0x38
0000000000000038: R_PPC64_REL24_NOTOC addVal
```
The lines of interest are:
```
10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1
0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal
18: 14 6a c3 7f add 30, 3, 13
0000000000000019: R_PPC64_TLS TGlobal
1c: 78 f3 c3 7f mr 3, 30
```
Which once linked gets turned into:
```
10010210: ff ff 03 06 00 90 6d 38 paddi 3, 13, -28672, 0
10010218: 00 00 00 60 nop
1001021c: 78 f3 c3 7f mr 3, 30
```
The problem is that register 30 is never set after the optimization.
Therefore it is not correct to relax the above instructions by replacing
the add instruction with a nop.
Instead the add instruction should be replaced with a copy (mr) instruction.
If the add uses the same resgiter as input and as ouput then it is safe to
continue to replace the add with a nop.
Reviewed By: MaskRay
Differential Revision: https://reviews.llvm.org/D95262
2021-03-22 22:53:43 +08:00
|
|
|
|
|
|
|
# CHECK-LABEL: <GetAddrT>:
|
|
|
|
# CHECK: mflr 0
|
|
|
|
# CHECK-NEXT: std 30, -16(1)
|
|
|
|
# CHECK-NEXT: std 0, 16(1)
|
|
|
|
# CHECK-NEXT: stdu 1, -48(1)
|
|
|
|
# CHECK-NEXT: paddi 3, 13, -28672, 0
|
|
|
|
# CHECK-NEXT: mr 30, 3
|
|
|
|
# CHECK-NEXT: mr 3, 30
|
|
|
|
# CHECK-NEXT: bl
|
|
|
|
# CHECK-NEXT: mr 4, 30
|
|
|
|
# CHECK-NEXT: addi 1, 1, 48
|
|
|
|
# CHECK-NEXT: ld 0, 16(1)
|
|
|
|
# CHECK-NEXT: ld 30, -16(1)
|
|
|
|
# CHECK-NEXT: mtlr 0
|
|
|
|
# CHECK-NEXT: b
|
|
|
|
|
|
|
|
## Generated From:
|
|
|
|
## extern __thread unsigned TGlobal;
|
|
|
|
## unsigned getConst(unsigned*);
|
|
|
|
## unsigned addVal(unsigned, unsigned*);
|
|
|
|
##
|
|
|
|
## unsigned GetAddrT() {
|
|
|
|
## return addVal(getConst(&TGlobal), &TGlobal);
|
|
|
|
## }
|
|
|
|
|
|
|
|
//--- initexec
|
|
|
|
GetAddrT:
|
|
|
|
mflr 0
|
|
|
|
std 30, -16(1)
|
|
|
|
std 0, 16(1)
|
|
|
|
stdu 1, -48(1)
|
|
|
|
pld 3, TGlobal@got@tprel@pcrel(0), 1
|
|
|
|
add 30, 3, TGlobal@tls@pcrel
|
|
|
|
mr 3, 30
|
|
|
|
bl getConst@notoc
|
|
|
|
mr 4, 30
|
|
|
|
addi 1, 1, 48
|
|
|
|
ld 0, 16(1)
|
|
|
|
ld 30, -16(1)
|
|
|
|
mtlr 0
|
|
|
|
b addVal@notoc
|
|
|
|
|
|
|
|
## Generated From:
|
|
|
|
## __thread unsigned TGlobal;
|
|
|
|
##
|
|
|
|
## unsigned getConst(unsigned* A) {
|
|
|
|
## return *A + 3;
|
|
|
|
## }
|
|
|
|
##
|
|
|
|
## unsigned addVal(unsigned A, unsigned* B) {
|
|
|
|
## return A + *B;
|
|
|
|
## }
|
|
|
|
|
|
|
|
//--- defs
|
|
|
|
.globl getConst
|
|
|
|
getConst:
|
|
|
|
lwz 3, 0(3)
|
|
|
|
addi 3, 3, 3
|
|
|
|
clrldi 3, 3, 32
|
|
|
|
blr
|
|
|
|
|
|
|
|
.globl addVal
|
|
|
|
addVal:
|
|
|
|
lwz 4, 0(4)
|
|
|
|
add 3, 4, 3
|
|
|
|
clrldi 3, 3, 32
|
|
|
|
blr
|
|
|
|
|
|
|
|
.section .tbss,"awT",@nobits
|
|
|
|
.globl TGlobal
|
|
|
|
.p2align 2
|
|
|
|
TGlobal:
|
|
|
|
.long 0
|
|
|
|
.size TGlobal, 4
|