[ELF][PPC64] Implement IPLT code sequence for non-preemptible IFUNC

Non-preemptible IFUNC are placed in in.iplt (.glink on EM_PPC64). If there is a non-GOT non-PLT relocation, for pointer equality, we change the type of the symbol from STT_IFUNC and STT_FUNC and bind it to the .glink entry. On EM_386, EM_X86_64, EM_ARM, and EM_AARCH64, the PLT code sequence loads the address from its associated .got.plt slot. An IPLT also has an associated .got.plt slot and can use the same code sequence. On EM_PPC64, the PLT code sequence is actually a bl instruction in .glink . It jumps to `__glink_PLTresolve` (the PLT header). and `__glink_PLTresolve` computes the .plt slot (relocated by R_PPC64_JUMP_SLOT). An IPLT does not have an associated R_PPC64_JUMP_SLOT, so we cannot use `bl` in .iplt . Instead, create a call stub which has a similar code sequence as PPC64PltCallStub. We don't save the TOC pointer, so such scenarios will not work: a function pointer to a non-preemptible ifunc, which resolves to a function defined in another DSO. This is the restriction described by https://sourceware.org/glibc/wiki/GNU_IFUNC (though on many architectures it works in practice): Requirement (a): Resolver must be defined in the same translation unit as the implementations. If an ifunc is taken address but not called, technically we don't need an entry for it, but we currently do that. This patch makes // clang -fuse-ld=lld -fno-pie -no-pie a.c // clang -fuse-ld=lld -fPIE -pie a.c #include <stdio.h> static void impl(void) { puts("meow"); } void thefunc(void) __attribute__((ifunc("resolver"))); void *resolver(void) { return &impl; } int main(void) { thefunc(); void (*theptr)(void) = &thefunc; theptr(); } work on Linux glibc and FreeBSD. Calling a function pointer pointing to a Non-preemptible IFUNC never worked before. Differential Revision: https://reviews.llvm.org/D71509
2019-12-13 18:30:21 -08:00 · 2019-12-13 18:30:21 -08:00 · 45acc35ac2
parent 6f9b4c6826
commit 45acc35ac2
5 changed files with 84 additions and 33 deletions
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@ -9,6 +9,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
+#include "Thunks.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/Support/Endian.h"

@ -202,6 +203,8 @@ public:
  void writePltHeader(uint8_t *buf) const override;
  void writePlt(uint8_t *buf, const Symbol &sym,
                uint64_t pltEntryAddr) const override;
+  void writeIplt(uint8_t *buf, const Symbol &sym,
+                 uint64_t pltEntryAddr) const override;
  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
  void writeGotHeader(uint8_t *buf) const override;
  bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
@ -298,7 +301,7 @@ PPC64::PPC64() {
  symbolicRel = R_PPC64_ADDR64;
  pltHeaderSize = 60;
  pltEntrySize = 4;
-  ipltEntrySize = 4;
+  ipltEntrySize = 16; // PPC64PltCallStub::size
  gotBaseSymInGotPlt = false;
  gotHeaderEntriesNum = 1;
  gotPltHeaderEntriesNum = 2;
@ -676,6 +679,11 @@ void PPC64::writePlt(uint8_t *buf, const Symbol &sym,
  write32(buf, 0x48000000 | ((-offset) & 0x03FFFFFc));
 }

+void PPC64::writeIplt(uint8_t *buf, const Symbol &sym,
+                      uint64_t /*pltEntryAddr*/) const {
+  writePPC64LoadAndBranch(buf, sym.getGotPltVA() - getPPC64TocBase());
+}
+
 static std::pair<RelType, uint64_t> toAddr16Rel(RelType type, uint64_t val) {
  // Relocations relative to the toc-base need to be adjusted by the Toc offset.
  uint64_t tocBiasedVal = val - ppc64TocOffset;
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@ -761,7 +761,7 @@ bool PPC32PltCallStub::isCompatibleWith(const InputSection &isec,
  return !config->isPic || (isec.file == file && rel.addend == addend);
 }

-static void writePPCLoadAndBranch(uint8_t *buf, int64_t offset) {
+void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset) {
  uint16_t offHa = (offset + 0x8000) >> 16;
  uint16_t offLo = offset & 0xffff;

@ -775,7 +775,7 @@ void PPC64PltCallStub::writeTo(uint8_t *buf) {
  int64_t offset = destination.getGotPltVA() - getPPC64TocBase();
  // Save the TOC pointer to the save-slot reserved in the call frame.
  write32(buf + 0, 0xf8410018); // std     r2,24(r1)
-  writePPCLoadAndBranch(buf + 4, offset);
+  writePPC64LoadAndBranch(buf + 4, offset);
 }

 void PPC64PltCallStub::addSymbols(ThunkSection &isec) {
@ -787,7 +787,7 @@ void PPC64PltCallStub::addSymbols(ThunkSection &isec) {
 void PPC64LongBranchThunk::writeTo(uint8_t *buf) {
  int64_t offset = in.ppc64LongBranchTarget->getEntryVA(&destination, addend) -
                   getPPC64TocBase();
-  writePPCLoadAndBranch(buf, offset);
+  writePPC64LoadAndBranch(buf, offset);
 }

 void PPC64LongBranchThunk::addSymbols(ThunkSection &isec) {
--- a/lld/ELF/Thunks.h
+++ b/lld/ELF/Thunks.h
@ -68,6 +68,8 @@ public:
 // ThunkSection.
 Thunk *addThunk(const InputSection &isec, Relocation &rel);

+void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset);
+
 } // namespace elf
 } // namespace lld

--- a/lld/test/ELF/ppc64-ifunc.s
+++ b/lld/test/ELF/ppc64-ifunc.s
@ -2,66 +2,94 @@

 # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
 # RUN: ld.lld %t.o -o %t
-# RUN: llvm-nm %t | FileCheck --check-prefix=NM %s
+# RUN: llvm-readelf -s %t | FileCheck --check-prefix=SYM %s
 # RUN: llvm-readelf -S %t | FileCheck --check-prefix=SECTIONS %s
 # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
 # RUN: llvm-readobj -r %t | FileCheck --check-prefix=REL %s

 # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
 # RUN: ld.lld %t.o -o %t
-# RUN: llvm-nm %t | FileCheck --check-prefix=NM %s
+# RUN: llvm-readelf -s %t | FileCheck --check-prefix=SYM %s
 # RUN: llvm-readelf -S %t | FileCheck --check-prefix=SECTIONS %s
 # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
 # RUN: llvm-readobj -r %t | FileCheck --check-prefix=REL %s

-# NM-DAG: 0000000010028248 d .TOC.
-# NM-DAG: 00000000100101f8 i ifunc
-# NM-DAG: 00000000100101fc i ifunc2
+# SYM: Value            Size Type   Bind   Vis     Ndx
+# SYM: 0000000010028298    0 NOTYPE LOCAL  HIDDEN    4 .TOC.
+# SYM: 0000000010010288    0 FUNC   GLOBAL DEFAULT   3 ifunc1
+# SYM: 0000000010010210    0 IFUNC  GLOBAL DEFAULT   2 ifunc2
+# SYM: 0000000010010278    0 FUNC   GLOBAL DEFAULT   3 ifunc3

-# SECTIONS: .plt NOBITS 0000000010030250 000250 000010 00 WA 0 0 8
+# SECTIONS: .plt NOBITS 00000000100302a0 0002a0 000018 00 WA 0 0 8

 # __plt_ifunc - . = 0x10010218 - 0x10010208 = 16
 # __plt_ifunc2 - . = 0x1001022c - 0x10010210 = 28
 # CHECK: _start:
 # CHECK-NEXT:                 addis 2, 12, 2
-# CHECK-NEXT:                 addi 2, 2, -32696
-# CHECK-NEXT: 10010208:       bl .+16
+# CHECK-NEXT:                 addi 2, 2, -32636
+# CHECK-NEXT: 1001021c:       bl .+36
 # CHECK-NEXT:                 ld 2, 24(1)
-# CHECK-NEXT: 10010210:       bl .+28
+# CHECK-NEXT: 10010224:       bl .+48
 # CHECK-NEXT:                 ld 2, 24(1)
+# CHECK-NEXT:                 addis 3, 2, -2
+# CHECK-NEXT:                 addi 3, 3, 32752
+# CHECK-NEXT:                 addis 3, 2, -2
+# CHECK-NEXT:                 addi 3, 3, 32736

-# .plt[0] - .TOC. = 0x10030250 - 0x10028248 = (1<<16) - 32760
-# CHECK: __plt_ifunc:
+# .plt[0] - .TOC. = 0x100302b0 - 0x100282a8 = (1<<16) - 32760
+# CHECK: __plt_ifunc2:
 # CHECK-NEXT:     std 2, 24(1)
 # CHECK-NEXT:     addis 12, 2, 1
 # CHECK-NEXT:     ld 12, -32760(12)
 # CHECK-NEXT:     mtctr 12
 # CHECK-NEXT:     bctr

-# .plt[1] - .TOC. = 0x10030250+8 - 0x10028248 = (1<<16) - 32752
-# CHECK: __plt_ifunc2:
+# .plt[1] - .TOC. = 0x100302b0+8 - 0x100282a8 = (1<<16) - 32752
+# CHECK: __plt_ifunc3:
 # CHECK-NEXT:     std 2, 24(1)
 # CHECK-NEXT:     addis 12, 2, 1
 # CHECK-NEXT:     ld 12, -32752(12)
 # CHECK-NEXT:     mtctr 12
 # CHECK-NEXT:     bctr
+# CHECK-EMPTY:

-## Check that we emit 2 R_PPC64_IRELATIVE in .rela.dyn.
-## glibc powerpc64 does not eagerly resolve R_PPC64_IRELATIVE if they are in .rela.plt.
+## .glink has 3 IPLT entries for ifunc1, ifunc2 and ifunc3.
+## ifunc2 and ifunc3 have the same code sequence as their PLT call stubs.
+# CHECK:      Disassembly of section .glink:
+# CHECK-EMPTY:
+# CHECK-NEXT: 0000000010010268 .glink:
+# CHECK-NEXT:     addis 12, 2, 1
+# CHECK-NEXT:     ld 12, -32760(12)
+# CHECK-NEXT:     mtctr 12
+# CHECK-NEXT:     bctr
+# CHECK-EMPTY:
+# CHECK-NEXT: 0000000010010278 ifunc3:
+# CHECK-NEXT:     addis 12, 2, 1
+# CHECK-NEXT:     ld 12, -32752(12)
+# CHECK-NEXT:     mtctr 12
+# CHECK-NEXT:     bctr
+# CHECK-EMPTY:
+# CHECK-NEXT: 0000000010010288 ifunc1:
+# CHECK-NEXT:     addis 12, 2, 1
+# CHECK-NEXT:     ld 12, -32744(12)
+# CHECK-NEXT:     mtctr 12
+# CHECK-NEXT:     bctr
+
+## Check that we emit 3 R_PPC64_IRELATIVE in .rela.dyn.
 # REL:      .rela.dyn {
-# REL-NEXT:   0x10030250 R_PPC64_IRELATIVE - 0x100101F8
-# REL-NEXT:   0x10030258 R_PPC64_IRELATIVE - 0x100101FC
+# REL-NEXT:   0x100302A0 R_PPC64_IRELATIVE - 0x10010210
+# REL-NEXT:   0x100302A8 R_PPC64_IRELATIVE - 0x10010210
+# REL-NEXT:   0x100302B0 R_PPC64_IRELATIVE - 0x10010210
 # REL-NEXT: }

-.type ifunc STT_GNU_IFUNC
-.globl ifunc
-ifunc:
-  nop
-
-.type ifunc2 STT_GNU_IFUNC
-.globl ifunc2
+.type ifunc1,@gnu_indirect_function
+.type ifunc2,@gnu_indirect_function
+.type ifunc3,@gnu_indirect_function
+.globl ifunc1, ifunc2, ifunc3
+ifunc1:
 ifunc2:
-  nop
+ifunc3:
+  blr

 .global _start
 .type   _start,@function
@ -72,7 +100,20 @@ _start:
  addi 2, 2, .TOC.-.Lfunc_gep0@l
 .Lfunc_lep0:
  .localentry     _start, .Lfunc_lep0-.Lfunc_gep0
-  bl ifunc
-  nop
+
+  ## ifunc1 is taken address.
+  ## ifunc2 is called.
+  ## ifunc3 is both taken address and called.
+  ## We need to create IPLT entries in .glink for ifunc1 and ifunc3, change
+  ## their types from STT_GNU_IFUNC to STT_FUNC, and set their st_shndx/st_value
+  ## to their .glink entries. Technically we don't need an entry for ifunc2 in
+  ## .glink, but we currently do that.
  bl ifunc2
  nop
+  bl ifunc3
+  nop
+
+  addis 3, 2, ifunc1@toc@ha
+  addi  3, 3, ifunc1@toc@l
+  addis 3, 2, ifunc3@toc@ha
+  addi  3, 3, ifunc3@toc@l
--- a/lld/test/ELF/ppc64-toc-relax-ifunc.s
+++ b/lld/test/ELF/ppc64-toc-relax-ifunc.s
@ -14,12 +14,12 @@
 ## to the address of the canonical PLT is fixed.

 # SEC: .text PROGBITS 00000000100101e0
-# SEC: .plt  NOBITS   00000000100301f8
+# SEC: .plt  NOBITS   0000000010030200
 # SEC: 00000000100101e8 0 FUNC GLOBAL DEFAULT 3 ifunc

 ## .toc[0] stores the address of the canonical PLT.
 # HEX:      section '.toc':
-# HEX-NEXT: 0x100201f0 e8010110 00000000
+# HEX-NEXT: 0x100201f8 e8010110 00000000

 # REL:      .rela.dyn {
 # REL-NEXT:   0x100301f8 R_PPC64_IRELATIVE - 0x100101e8