[LLD][ELF] Add the -z ifunc-noplt option

Patch by Mark Johnston!

Summary:
When the option is configured, ifunc calls do not go through the PLT;
rather, they appear as regular function calls with relocations
referencing the ifunc symbol, and the resolver is invoked when
applying the relocation.  This is intended for use in freestanding
environments where text relocations are permissible and is incompatible
with the -z text option.  The option is motivated by ifunc usage in the
FreeBSD kernel, where ifuncs are used to elide CPU feature flag bit
checks in hot paths.  Instead of replacing the cost of a branch with that
of an indirect function call, the -z ifunc-noplt option is used to ensure
that ifunc calls carry no hidden overhead relative to normal function
calls.

Test Plan:
I added a couple of regression tests and tested the FreeBSD kernel
build using the latest lld sources.

To demonstrate the effects of the change, I used a micro-benchmark
which results in frequent invocations of a FreeBSD kernel ifunc.  The
benchmark was run with and without IBRS enabled, and with and without
-zifunc-noplt configured.  The observed speedup is small and consistent,
and is significantly larger with IBRS enabled:

https://people.freebsd.org/~markj/ifunc-noplt/noibrs.txt
https://people.freebsd.org/~markj/ifunc-noplt/ibrs.txt

Reviewed By: ruiu, MaskRay

Differential Revision: https://reviews.llvm.org/D61613

llvm-svn: 360685
This commit is contained in:
Fangrui Song 2019-05-14 15:25:21 +00:00
parent c2d9cfd925
commit e041d15f5e
7 changed files with 166 additions and 3 deletions

View File

@ -194,6 +194,7 @@ struct Configuration {
bool ZExecstack;
bool ZGlobal;
bool ZHazardplt;
bool ZIfuncNoplt;
bool ZInitfirst;
bool ZInterpose;
bool ZKeepTextSectionPrefix;

View File

@ -309,6 +309,9 @@ static void checkOptions() {
if (!Config->Relocatable && !Config->DefineCommon)
error("-no-define-common not supported in non relocatable output");
if (Config->ZText && Config->ZIfuncNoplt)
error("-z text and -z ifunc-noplt may not be used together");
if (Config->Relocatable) {
if (Config->Shared)
error("-r and -shared may not be used together");
@ -358,7 +361,7 @@ static bool getZFlag(opt::InputArgList &Args, StringRef K1, StringRef K2,
static bool isKnownZFlag(StringRef S) {
return S == "combreloc" || S == "copyreloc" || S == "defs" ||
S == "execstack" || S == "global" || S == "hazardplt" ||
S == "initfirst" || S == "interpose" ||
S == "ifunc-noplt" || S == "initfirst" || S == "interpose" ||
S == "keep-text-section-prefix" || S == "lazy" || S == "muldefs" ||
S == "nocombreloc" || S == "nocopyreloc" || S == "nodefaultlib" ||
S == "nodelete" || S == "nodlopen" || S == "noexecstack" ||
@ -896,6 +899,7 @@ static void readConfigs(opt::InputArgList &Args) {
Config->ZExecstack = getZFlag(Args, "execstack", "noexecstack", false);
Config->ZGlobal = hasZOption(Args, "global");
Config->ZHazardplt = hasZOption(Args, "hazardplt");
Config->ZIfuncNoplt = hasZOption(Args, "ifunc-noplt");
Config->ZInitfirst = hasZOption(Args, "initfirst");
Config->ZInterpose = hasZOption(Args, "interpose");
Config->ZKeepTextSectionPrefix = getZFlag(

View File

@ -1066,7 +1066,7 @@ static void scanReloc(InputSectionBase &Sec, OffsetGetter &GetOffset, RelTy *&I,
// be resolved within the executable will actually be resolved that way at
// runtime, because the main exectuable is always at the beginning of a search
// list. We can leverage that fact.
if (!Sym.IsPreemptible && !Sym.isGnuIFunc()) {
if (!Sym.IsPreemptible && (!Sym.isGnuIFunc() || Config->ZIfuncNoplt)) {
if (Expr == R_GOT_PC && !isAbsoluteValue(Sym))
Expr = Target->adjustRelaxExpr(Type, RelocatedAddr, Expr);
else
@ -1094,6 +1094,14 @@ static void scanReloc(InputSectionBase &Sec, OffsetGetter &GetOffset, RelTy *&I,
return;
}
// We were asked not to generate PLT entries for ifuncs. Instead, pass the
// direct relocation on through.
if (Sym.isGnuIFunc() && Config->ZIfuncNoplt) {
Sym.ExportDynamic = true;
In.RelaDyn->addReloc(Type, &Sec, Offset, &Sym, Addend, R_ADDEND, Type);
return;
}
// Non-preemptible ifuncs require special handling. First, handle the usual
// case where the symbol isn't one of these.
if (!Sym.isGnuIFunc() || Sym.IsPreemptible) {

View File

@ -4,7 +4,7 @@
.\"
.\" This man page documents only lld's ELF linking support, obtained originally
.\" from FreeBSD.
.Dd February 26, 2019
.Dd May 12, 2019
.Dt LD.LLD 1
.Os
.Sh NAME
@ -552,6 +552,14 @@ Sets the
.Dv DYNAMIC
section.
Different loaders can decide how to handle this flag on their own.
.It Cm ifunc-noplt
Do not emit PLT entries for ifunc symbols.
Instead, emit text relocations referencing the resolver.
This is an experimental optimization and only suitable for standalone
environments where text relocations do not have the usual drawbacks.
This option must be combined with the
.Fl z Li notext
option.
.It Cm initfirst
Sets the
.Dv DF_1_INITFIRST

View File

@ -252,3 +252,6 @@ _start:
# RUN: not ld.lld %t --thinlto-jobs=0 2>&1 | FileCheck --check-prefix=NOTHREADSTHIN %s
# RUN: not ld.lld %t --plugin-opt=jobs=0 2>&1 | FileCheck --check-prefix=NOTHREADSTHIN %s
# NOTHREADSTHIN: --thinlto-jobs: number of threads must be > 0
# RUN: not ld.lld %t -z ifunc-noplt -z text 2>&1 | FileCheck --check-prefix=NOIFUNCPLTNOTEXTREL %s
# NOIFUNCPLTNOTEXTREL: -z text and -z ifunc-noplt may not be used together

View File

@ -0,0 +1,71 @@
// REQUIRES: x86
// RUN: llvm-mc -filetype=obj -triple=i686-pc-freebsd %S/Inputs/shared2-x86-64.s -o %t1.o
// RUN: ld.lld %t1.o --shared -o %t.so
// RUN: llvm-mc -filetype=obj -triple=i686-pc-freebsd %s -o %t.o
// RUN: ld.lld -z ifunc-noplt -z notext --hash-style=sysv %t.so %t.o -o %tout
// RUN: llvm-objdump -d --no-show-raw-insn %tout | FileCheck %s --check-prefix=DISASM
// RUN: llvm-readobj -r --dynamic-table %tout | FileCheck %s
// Check that we emitted relocations for the ifunc calls
// CHECK: Relocations [
// CHECK-NEXT: Section (4) .rel.dyn {
// CHECK-NEXT: 0x401008 R_386_PLT32 bar
// CHECK-NEXT: 0x401003 R_386_PLT32 foo
// CHECK-NEXT: }
// CHECK-NEXT: Section (5) .rel.plt {
// CHECK-NEXT: 0x40300C R_386_JUMP_SLOT bar2
// CHECK-NEXT: 0x403010 R_386_JUMP_SLOT zed2
// CHECK-NEXT: }
// Check that ifunc call sites still require relocation
// DISASM: Disassembly of section .text:
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000401000 foo:
// DISASM-NEXT: 401000: retl
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000401001 bar:
// DISASM-NEXT: 401001: retl
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000401002 _start:
// DISASM-NEXT: 401002: calll -4 <_start+0x1>
// DISASM-NEXT: 401007: calll -4 <_start+0x6>
// DISASM-NEXT: 40100c: calll 31 <bar2@plt>
// DISASM-NEXT: 401011: calll 42 <zed2@plt>
// DISASM-EMPTY:
// DISASM-NEXT: Disassembly of section .plt:
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000401020 .plt:
// DISASM-NEXT: 401020: pushl 4206596
// DISASM-NEXT: 401026: jmpl *4206600
// DISASM-NEXT: 40102c: nop
// DISASM-NEXT: 40102d: nop
// DISASM-NEXT: 40102e: nop
// DISASM-NEXT: 40102f: nop
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000401030 bar2@plt:
// DISASM-NEXT: 401030: jmpl *4206604
// DISASM-NEXT: 401036: pushl $0
// DISASM-NEXT: 40103b: jmp -32 <.plt>
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000401040 zed2@plt:
// DISASM-NEXT: 401040: jmpl *4206608
// DISASM-NEXT: 401046: pushl $8
// DISASM-NEXT: 40104b: jmp -48 <.plt>
.text
.type foo STT_GNU_IFUNC
.globl foo
foo:
ret
.type bar STT_GNU_IFUNC
.globl bar
bar:
ret
.globl _start
_start:
call foo@plt
call bar@plt
call bar2@plt
call zed2@plt

View File

@ -0,0 +1,68 @@
// REQUIRES: x86
// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-freebsd %S/Inputs/shared2-x86-64.s -o %t1.o
// RUN: ld.lld %t1.o --shared -o %t.so
// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-freebsd %s -o %t.o
// RUN: ld.lld -z ifunc-noplt -z notext --hash-style=sysv %t.so %t.o -o %tout
// RUN: llvm-objdump -d --no-show-raw-insn %tout | FileCheck %s --check-prefix=DISASM
// RUN: llvm-readobj -r --dynamic-table %tout | FileCheck %s
// Check that we emitted relocations for the ifunc calls
// CHECK: Relocations [
// CHECK-NEXT: Section (4) .rela.dyn {
// CHECK-NEXT: 0x201008 R_X86_64_PLT32 bar 0xFFFFFFFFFFFFFFFC
// CHECK-NEXT: 0x201003 R_X86_64_PLT32 foo 0xFFFFFFFFFFFFFFFC
// CHECK-NEXT: }
// CHECK-NEXT: Section (5) .rela.plt {
// CHECK-NEXT: 0x203018 R_X86_64_JUMP_SLOT bar2 0x0
// CHECK-NEXT: 0x203020 R_X86_64_JUMP_SLOT zed2 0x0
// CHECK-NEXT: }
// Check that ifunc call sites still require relocation
// DISASM: Disassembly of section .text:
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000201000 foo:
// DISASM-NEXT: 201000: retq
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000201001 bar:
// DISASM-NEXT: 201001: retq
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000201002 _start:
// DISASM-NEXT: 201002: callq 0 <_start+0x5>
// DISASM-NEXT: 201007: callq 0 <_start+0xa>
// DISASM-NEXT: 20100c: callq 31 <bar2@plt>
// DISASM-NEXT: 201011: callq 42 <zed2@plt>
// DISASM-EMPTY:
// DISASM-NEXT: Disassembly of section .plt:
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000201020 .plt:
// DISASM-NEXT: 201020: pushq 8162(%rip)
// DISASM-NEXT: 201026: jmpq *8164(%rip)
// DISASM-NEXT: 20102c: nopl (%rax)
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000201030 bar2@plt:
// DISASM-NEXT: 201030: jmpq *8162(%rip)
// DISASM-NEXT: 201036: pushq $0
// DISASM-NEXT: 20103b: jmp -32 <.plt>
// DISASM-EMPTY:
// DISASM-NEXT: 0000000000201040 zed2@plt:
// DISASM-NEXT: 201040: jmpq *8154(%rip)
// DISASM-NEXT: 201046: pushq $1
// DISASM-NEXT: 20104b: jmp -48 <.plt>
.text
.type foo STT_GNU_IFUNC
.globl foo
foo:
ret
.type bar STT_GNU_IFUNC
.globl bar
bar:
ret
.globl _start
_start:
call foo
call bar
call bar2
call zed2