[ELF][PPC64] Detect missing R_PPC64_TLSGD/R_PPC64_TLSLD and disable TLS relaxation

Alternative to D91611.

The TLS General Dynamic/Local Dynamic code sequences need to mark
`__tls_get_addr` with R_PPC64_TLSGD or R_PPC64_TLSLD, e.g.

```
addis r3, r2, x@got@tlsgd@ha # R_PPC64_GOT_TLSGD16_HA
addi r3, r3, x@got@tlsgd@l   # R_PPC64_GOT_TLSGD16_LO
bl __tls_get_addr(x@tlsgd)   # R_PPC64_TLSGD followed by R_PPC64_REL24
nop
```

However, there are two deviations form the above:

1. direct call to `__tls_get_addr`. This is essential to implement ld.so in glibc/musl/FreeBSD.

```
bl __tls_get_addr
nop
```

This is only used in a -shared link, and thus not subject to the GD/LD to IE/LE
relaxation issue below.

2. Missing R_PPC64_TLSGD/R_PPC64_TLSGD for compiler generated TLS references

According to Stefan Pintille, "In the early days of the transition from the
ELFv1 ABI that is used for big endian PowerPC Linux distributions to the ELFv2
ABI that is used for little endian PowerPC Linux distributions, there was some
ambiguity in the specification of the relocations for TLS. The GNU linker has
implemented support for correct handling of calls to __tls_get_addr with a
missing relocation.  Unfortunately, we didn't notice that the IBM XL compiler
did not handle TLS according to the updated ABI until we tried linking XL
compiled libraries with LLD."

In short, LLD needs to work around the old IBM XL compiler issue.
Otherwise, if the object file is linked in -no-pie or -pie mode,
the result will be incorrect because the 4 instructions are partially
rewritten (the latter 2 are not changed).

Work around the compiler bug by disable General Dynamic/Local Dynamic to
Initial Exec/Local Exec relaxation. Note, we also disable Initial Exec
to Local Exec relaxation for implementation simplicity, though technically it can be kept.

ppc64-tls-missing-gdld.s demonstrates the updated behavior.

Reviewed By: #powerpc, stefanp, grimar

Differential Revision: https://reviews.llvm.org/D92959
This commit is contained in:
Fangrui Song 2020-12-21 08:45:41 -08:00
parent 554eb1f6dc
commit e25afcfa51
3 changed files with 126 additions and 9 deletions

View File

@ -130,6 +130,10 @@ public:
// [.got, .got + 0xFFFC].
bool ppc64SmallCodeModelTocRelocs = false;
// True if the file has TLSGD/TLSLD GOT relocations without R_PPC64_TLSGD or
// R_PPC64_TLSLD. Disable TLS relaxation to avoid bad code generation.
bool ppc64DisableTLSRelax = false;
// groupId is used for --warn-backrefs which is an optional error
// checking feature. All files within the same --{start,end}-group or
// --{start,end}-lib get the same group ID. Otherwise, each file gets a new

View File

@ -208,9 +208,13 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c,
return 1;
}
// ARM, Hexagon and RISC-V do not support GD/LD to IE/LE relaxation. For
// PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
// relaxation as well.
bool toExecRelax = !config->shared && config->emachine != EM_ARM &&
config->emachine != EM_HEXAGON &&
config->emachine != EM_RISCV;
config->emachine != EM_RISCV &&
!c.file->ppc64DisableTLSRelax;
// If we are producing an executable and the symbol is non-preemptable, it
// must be defined and the code sequence can be relaxed to use Local-Exec.
@ -1527,6 +1531,43 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i,
processRelocAux<ELFT>(sec, expr, type, offset, sym, rel, addend);
}
// R_PPC64_TLSGD/R_PPC64_TLSLD is required to mark `bl __tls_get_addr` for
// General Dynamic/Local Dynamic code sequences. If a GD/LD GOT relocation is
// found but no R_PPC64_TLSGD/R_PPC64_TLSLD is seen, we assume that the
// instructions are generated by very old IBM XL compilers. Work around the
// issue by disabling GD/LD to IE/LE relaxation.
template <class RelTy>
static void checkPPC64TLSRelax(InputSectionBase &sec, ArrayRef<RelTy> rels) {
// Skip if sec is synthetic (sec.file is null) or if sec has been marked.
if (!sec.file || sec.file->ppc64DisableTLSRelax)
return;
bool hasGDLD = false;
for (const RelTy &rel : rels) {
RelType type = rel.getType(false);
switch (type) {
case R_PPC64_TLSGD:
case R_PPC64_TLSLD:
return; // Found a marker
case R_PPC64_GOT_TLSGD16:
case R_PPC64_GOT_TLSGD16_HA:
case R_PPC64_GOT_TLSGD16_HI:
case R_PPC64_GOT_TLSGD16_LO:
case R_PPC64_GOT_TLSLD16:
case R_PPC64_GOT_TLSLD16_HA:
case R_PPC64_GOT_TLSLD16_HI:
case R_PPC64_GOT_TLSLD16_LO:
hasGDLD = true;
break;
}
}
if (hasGDLD) {
sec.file->ppc64DisableTLSRelax = true;
warn(toString(sec.file) +
": disable TLS relaxation due to R_PPC64_GOT_TLS* relocations without "
"R_PPC64_TLSGD/R_PPC64_TLSLD relocations");
}
}
template <class ELFT, class RelTy>
static void scanRelocs(InputSectionBase &sec, ArrayRef<RelTy> rels) {
OffsetGetter getOffset(sec);
@ -1534,6 +1575,9 @@ static void scanRelocs(InputSectionBase &sec, ArrayRef<RelTy> rels) {
// Not all relocations end up in Sec.Relocations, but a lot do.
sec.relocations.reserve(rels.size());
if (config->emachine == EM_PPC64)
checkPPC64TLSRelax<RelTy>(sec, rels);
for (auto i = rels.begin(), end = rels.end(); i != end;)
scanReloc<ELFT>(sec, getOffset, i, rels.begin(), end);

View File

@ -1,37 +1,106 @@
# REQUIRES: ppc
# RUN: llvm-mc --triple=powerpc64le %s --filetype=obj -o %t1.o
# RUN: llvm-mc --triple=powerpc64 %s --filetype=obj -o %t2.o
# RUN: ld.lld --shared --fatal-warnings %t1.o -o /dev/null
# RUN: ld.lld --shared --fatal-warnings %t2.o -o /dev/null
# RUN: split-file %s %t
# RUN: llvm-mc --triple=ppc64le %t/a.s --filetype=obj -o %t/a.o
# RUN: llvm-mc --triple=ppc64le %t/b.s --filetype=obj -o %t/b.o
# RUN: llvm-mc --triple=ppc64le %t/tga.s --filetype=obj -o %t/tga.o
## User code can call __tls_get_addr by specifying the tls_index parameter.
## We need to allow R_PPC64_REL24/R_PPC64_REL24_NOTOC referencing __tls_get_addr
## without a pairing R_PPC64_TLSGD/R_PPC64_TLSLD.
# RUN: ld.lld --shared --fatal-warnings %t/b.o -o /dev/null
## Warn missing R_PPC64_TLSGD/R_PPC64_TLSLD.
# RUN: ld.lld --shared %t/a.o -o %t.so 2>&1 | FileCheck %s --check-prefix=WARN
# RUN: llvm-objdump -d --no-leading-addr %t.so | FileCheck %s --check-prefix=DIS
# RUN: ld.lld %t/a.o %t/tga.o -o %t2 2>&1 | FileCheck %s --check-prefix=WARN
# RUN: llvm-readelf -x .got %t2 | FileCheck %s --check-prefix=HEX
# RUN: llvm-objdump -d --no-leading-addr %t2 | FileCheck %s --check-prefix=DIS
# WARN: warning: {{.*}}.o: disable TLS relaxation due to R_PPC64_GOT_TLS* relocations without R_PPC64_TLSGD/R_PPC64_TLSLD relocations
## .got+0: x is local - relaxed to LE - its DTPMOD/DTPREL slots are link-time constants.
## DTPMOD is 1. DTPREL is st_value-0x8000 = -0x8000.
## .got+16: DTPMOD/DTPREL for _TLS_MODULE_BASE_ is 1 and 0, respectively.
## .got+32: TPOFFSET for x = st_value-0x7000
# HEX: section '.got':
# HEX-NEXT: [[#%x,IGNORE:]] 01000000 00000000 0080ffff ffffffff
# HEX-NEXT: [[#%x,IGNORE:]] 01000000 00000000 00000000 00000000
# HEX-NEXT: [[#%x,IGNORE:]] 0090ffff ffffffff
## .TOC.-32768 = (.got+0x8000)-32768 = .got
# DIS-LABEL: <GeneralDynamic>:
# DIS-NEXT: addis 3, 2, 0
# DIS-NEXT: addi 3, 3, -32768
# DIS-NEXT: bl [[#%x,TGA:]]
# DIS-LABEL: <GeneralDynamic_NOTOC>:
# DIS-NEXT: addis 3, 2, 0
# DIS-NEXT: addi 3, 3, -32768
# DIS-NEXT: bl [[#TGA]]
## LocalDynamic references _TLS_MODULE_BASE_.
## .TOC.-32752 = (.got+0x8000)-32752 = .got+16
# DIS-LABEL: <LocalDynamic>:
# DIS-NEXT: addis 3, 2, 0
# DIS-NEXT: addi 3, 3, -32752
# DIS-NEXT: bl [[#TGA]]
# DIS-LABEL: <LocalDynamic_NOTOC>:
# DIS-NEXT: addis 3, 2, 0
# DIS-NEXT: addi 3, 3, -32752
# DIS-NEXT: bl [[#TGA]]
## Technically we don't have to disable IE to LE relaxation,
## but disabling it for implementation simplicity does not hurt.
# DIS-LABEL: <InitialExec>:
# DIS-NEXT: addis 3, 2, 0
# DIS-NEXT: ld 3, -32736(3)
# DIS-NEXT: add 3, 3, 13
#--- a.s
GeneralDynamic:
addis 3, 2, x@got@tlsgd@ha
addi 3, 3, x@got@tlsgd@l
bl __tls_get_addr
blr
nop
GeneralDynamic_NOTOC:
addis 3, 2, x@got@tlsgd@ha
addi 3, 3, x@got@tlsgd@l
bl __tls_get_addr@notoc
blr
nop
LocalDynamic:
addis 3, 2, x@got@tlsld@ha
addi 3, 3, x@got@tlsld@l
bl __tls_get_addr
blr
nop
LocalDynamic_NOTOC:
addis 3, 2, x@got@tlsld@ha
addi 3, 3, x@got@tlsld@l
bl __tls_get_addr@notoc
blr
nop
InitialExec:
addis 3, 2, x@got@tprel@ha
ld 3, x@got@tprel@l(3)
add 3, 3, x@tls
.globl _start
_start:
.section .tbss,"awT",@nobits
.globl x
x:
.quad 0
#--- b.s
CallOnly:
bl __tls_get_addr
nop
blr
#--- tga.s
.globl __tls_get_addr
__tls_get_addr:
blr