[PPC64] toc-indirect to toc-relative relaxation

This is based on D54720 by Sean Fertile.

When accessing a global symbol which is not defined in the translation unit,
compilers will generate instructions that load the address from the toc entry.

If the symbol is defined, non-preemptable, and addressable with a 32-bit
signed offset from the toc pointer, the address can be computed
directly. e.g.

    addis 3, 2, .LC0@toc@ha  # R_PPC64_TOC16_HA
    ld    3, .LC0@toc@l(3)   # R_PPC64_TOC16_LO_DS, load the address from a .toc entry
    ld/lwa 3, 0(3)           # load the value from the address

    .section .toc,"aw",@progbits
    .LC0: .tc var[TC],var

can be relaxed to

    addis 3,2,var@toc@ha     # this may be relaxed to a nop,
    addi  3,3,var@toc@l      # then this becomes addi 3,2,var@toc
    ld/lwa 3, 0(3)           # load the value from the address

We can delete the test ppc64-got-indirect.s as its purpose is covered by
newly added ppc64-toc-relax.s and ppc64-toc-relax-constants.s

Reviewed By: ruiu, sfertile

Differential Revision: https://reviews.llvm.org/D60958

llvm-svn: 360112
This commit is contained in:
Fangrui Song 2019-05-07 04:26:05 +00:00
parent c6d445f9c1
commit 912251e82f
15 changed files with 403 additions and 147 deletions

View File

@ -103,6 +103,88 @@ bool elf::isPPC64SmallCodeModelTocReloc(RelType Type) {
return Type == R_PPC64_TOC16 || Type == R_PPC64_TOC16_DS;
}
// Find the R_PPC64_ADDR64 in .rela.toc with matching offset.
template <typename ELFT>
static std::pair<Defined *, int64_t>
getRelaTocSymAndAddend(InputSectionBase *TocSec, uint64_t Offset) {
if (TocSec->NumRelocations == 0)
return {};
// .rela.toc contains exclusively R_PPC64_ADDR64 relocations sorted by
// r_offset: 0, 8, 16, etc. For a given Offset, Offset / 8 gives us the
// relocation index in most cases.
//
// In rare cases a TOC entry may store a constant that doesn't need an
// R_PPC64_ADDR64, the corresponding r_offset is therefore missing. Offset / 8
// points to a relocation with larger r_offset. Do a linear probe then.
// Constants are extremely uncommon in .toc and the extra number of array
// accesses can be seen as a small constant.
ArrayRef<typename ELFT::Rela> Relas = TocSec->template relas<ELFT>();
uint64_t Index = std::min<uint64_t>(Offset / 8, Relas.size() - 1);
for (;;) {
if (Relas[Index].r_offset == Offset) {
Symbol &Sym = TocSec->getFile<ELFT>()->getRelocTargetSym(Relas[Index]);
return {dyn_cast<Defined>(&Sym), getAddend<ELFT>(Relas[Index])};
}
if (Relas[Index].r_offset < Offset || Index == 0)
break;
--Index;
}
return {};
}
// When accessing a symbol defined in another translation unit, compilers
// reserve a .toc entry, allocate a local label and generate toc-indirect
// instuctions:
//
// addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA
// ld 3, .LC0@toc@l(3) # R_PPC64_TOC16_LO_DS, load the address from a .toc entry
// ld/lwa 3, 0(3) # load the value from the address
//
// .section .toc,"aw",@progbits
// .LC0: .tc var[TC],var
//
// If var is defined, non-preemptable and addressable with a 32-bit signed
// offset from the toc base, the address of var can be computed by adding an
// offset to the toc base, saving a load.
//
// addis 3,2,var@toc@ha # this may be relaxed to a nop,
// addi 3,3,var@toc@l # then this becomes addi 3,2,var@toc
// ld/lwa 3, 0(3) # load the value from the address
//
// Returns true if the relaxation is performed.
bool elf::tryRelaxPPC64TocIndirection(RelType Type, const Relocation &Rel,
uint8_t *BufLoc) {
assert(Config->TocOptimize);
if (Rel.Addend < 0)
return false;
// If the symbol is not the .toc section, this isn't a toc-indirection.
Defined *DefSym = dyn_cast<Defined>(Rel.Sym);
if (!DefSym || !DefSym->isSection() || DefSym->Section->Name != ".toc")
return false;
Defined *D;
int64_t Addend;
auto *TocISB = cast<InputSectionBase>(DefSym->Section);
std::tie(D, Addend) =
Config->IsLE ? getRelaTocSymAndAddend<ELF64LE>(TocISB, Rel.Addend)
: getRelaTocSymAndAddend<ELF64BE>(TocISB, Rel.Addend);
// Only non-preemptable defined symbols can be relaxed.
if (!D || D->IsPreemptible)
return false;
// Two instructions can materialize a 32-bit signed offset from the toc base.
uint64_t TocRelative = D->getVA(Addend) - getPPC64TocBase();
if (!isInt<32>(TocRelative))
return false;
// Add PPC64TocOffset that will be subtracted by relocateOne().
Target->relaxGot(BufLoc, Type, TocRelative + PPC64TocOffset);
return true;
}
namespace {
class PPC64 final : public TargetInfo {
public:
@ -121,6 +203,7 @@ public:
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const override;
void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
@ -270,6 +353,27 @@ uint32_t PPC64::calcEFlags() const {
return 2;
}
void PPC64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const {
switch (Type) {
case R_PPC64_TOC16_HA:
// Convert "addis reg, 2, .LC0@toc@h" to "addis reg, 2, var@toc@h" or "nop".
relocateOne(Loc, Type, Val);
break;
case R_PPC64_TOC16_LO_DS: {
// Convert "ld reg, .LC0@toc@l(reg)" to "addi reg, reg, var@toc@l" or
// "addi reg, 2, var@toc".
uint32_t Instr = readInstrFromHalf16(Loc);
if (getPrimaryOpCode(Instr) != LD)
error("expected a 'ld' for got-indirect to toc-relative relaxing");
writeInstrFromHalf16(Loc, (Instr & 0x03FFFFFF) | 0x38000000);
relocateOne(Loc, R_PPC64_TOC16_LO, Val);
break;
}
default:
llvm_unreachable("unexpected relocation type");
}
}
void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
// Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement.
// The general dynamic code sequence for a global `x` will look like:
@ -439,11 +543,12 @@ RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
return R_GOT_OFF;
case R_PPC64_TOC16:
case R_PPC64_TOC16_DS:
case R_PPC64_TOC16_HA:
case R_PPC64_TOC16_HI:
case R_PPC64_TOC16_LO:
case R_PPC64_TOC16_LO_DS:
return R_GOTREL;
case R_PPC64_TOC16_HA:
case R_PPC64_TOC16_LO_DS:
return Config->TocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL;
case R_PPC64_TOC:
return R_PPC_TOC;
case R_PPC64_REL14:

View File

@ -38,7 +38,7 @@ public:
RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const override;
void relaxGot(uint8_t *Loc, uint64_t Val) const override;
void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
@ -453,7 +453,7 @@ static void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
write32le(Loc, Val);
}
void X86_64::relaxGot(uint8_t *Loc, uint64_t Val) const {
void X86_64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const {
const uint8_t Op = Loc[-2];
const uint8_t ModRm = Loc[-1];

View File

@ -631,6 +631,7 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
case R_GOTPLTONLY_PC:
return In.GotPlt->getVA() + A - P;
case R_GOTREL:
case R_PPC64_RELAX_TOC:
return Sym.getVA(A) - In.Got->getVA();
case R_GOTPLTREL:
return Sym.getVA(A) - In.GotPlt->getVA();
@ -894,7 +895,11 @@ void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) {
switch (Expr) {
case R_RELAX_GOT_PC:
case R_RELAX_GOT_PC_NOPIC:
Target->relaxGot(BufLoc, TargetVA);
Target->relaxGot(BufLoc, Type, TargetVA);
break;
case R_PPC64_RELAX_TOC:
if (!tryRelaxPPC64TocIndirection(Type, Rel, BufLoc))
Target->relocateOne(BufLoc, Type, TargetVA);
break;
case R_RELAX_TLS_IE_TO_LE:
Target->relaxTlsIeToLe(BufLoc, Type, TargetVA);

View File

@ -383,7 +383,7 @@ static bool needsGot(RelExpr Expr) {
// file (PC, or GOT for example).
static bool isRelExpr(RelExpr Expr) {
return oneof<R_PC, R_GOTREL, R_GOTPLTREL, R_MIPS_GOTREL, R_PPC_CALL,
R_PPC_CALL_PLT, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC>(Expr);
R_PPC64_RELAX_TOC, R_PPC_CALL_PLT, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC>(Expr);
}
// Returns true if a given relocation can be computed at link-time.
@ -403,7 +403,7 @@ static bool isStaticLinkTimeConstant(RelExpr E, RelType Type, const Symbol &Sym,
R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_MIPS_TLSGD,
R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC_CALL_PLT,
R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT,
R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT,
R_TLSIE_HINT>(E))
return true;
@ -1079,7 +1079,7 @@ static void scanReloc(InputSectionBase &Sec, OffsetGetter &GetOffset, RelTy *&I,
// The 4 types that relative GOTPLT are all x86 and x86-64 specific.
if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_TLSGD_GOTPLT>(Expr)) {
In.GotPlt->HasGotPltOffRel = true;
} else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC_TOC>(Expr)) {
} else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC_TOC, R_PPC64_RELAX_TOC>(Expr)) {
In.Got->HasGotOffRel = true;
}
@ -1240,8 +1240,10 @@ static void scanRelocs(InputSectionBase &Sec, ArrayRef<RelTy> Rels) {
for (auto I = Rels.begin(), End = Rels.end(); I != End;)
scanReloc<ELFT>(Sec, GetOffset, I, End);
// Sort relocations by offset to binary search for R_RISCV_PCREL_HI20
if (Config->EMachine == EM_RISCV)
// Sort relocations by offset for more efficient searching for
// R_RISCV_PCREL_HI20 and R_PPC64_ADDR64.
if (Config->EMachine == EM_RISCV ||
(Config->EMachine == EM_PPC64 && Sec.Name == ".toc"))
llvm::stable_sort(Sec.Relocations,
[](const Relocation &LHS, const Relocation &RHS) {
return LHS.Offset < RHS.Offset;

View File

@ -93,6 +93,7 @@ enum RelExpr {
R_PPC_CALL,
R_PPC_CALL_PLT,
R_PPC_TOC,
R_PPC64_RELAX_TOC,
R_RISCV_PC_INDIRECT,
};

View File

@ -149,7 +149,7 @@ RelExpr TargetInfo::adjustRelaxExpr(RelType Type, const uint8_t *Data,
return Expr;
}
void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const {
void TargetInfo::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const {
llvm_unreachable("Should not have claimed to be relaxable");
}

View File

@ -124,7 +124,7 @@ public:
virtual RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const;
virtual void relaxGot(uint8_t *Loc, uint64_t Val) const;
virtual void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const;
virtual void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const;
virtual void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const;
virtual void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const;
@ -164,8 +164,11 @@ static inline std::string getErrorLocation(const uint8_t *Loc) {
return getErrorPlace(Loc).Loc;
}
// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first is
// a global entry point (GEP) which typically is used to intiailzie the TOC
bool tryRelaxPPC64TocIndirection(RelType Type, const Relocation &Rel,
uint8_t *BufLoc);
// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first
// is a global entry point (GEP) which typically is used to initialize the TOC
// pointer in general purpose register 2. The second is a local entry
// point (LEP) which bypasses the TOC pointer initialization code. The
// offset between GEP and LEP is encoded in a function's st_other flags.

View File

@ -0,0 +1,7 @@
.data
.type shared,@object
.globl shared
shared:
.long 8
.size shared, 4

View File

@ -0,0 +1,15 @@
.data
.globl default, hidden
.hidden hidden
default:
hidden:
.long 0
.space 65532
.globl hidden2
.hidden hidden2
hidden2:
.long 0

View File

@ -75,6 +75,6 @@ glob:
// CHECK: foo_external_diff:
// CHECK-NEXT: 10010080: {{.*}} addis 2, 12, 1
// CHECK-NEXT: 10010084: {{.*}} addi 2, 2, 32640
// CHECK-NEXT: 10010088: {{.*}} nop
// CHECK-NEXT: 10010088: {{.*}} addis 5, 2, 1
// CHECK: foo_external_same:
// CHECK-NEXT: 100100b0: {{.*}} add 3, 4, 3

View File

@ -1,121 +0,0 @@
# REQUIRES: ppc
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
# RUN: llvm-readobj -r %t.o | FileCheck -check-prefix=RELOCS-LE %s
# RUN: ld.lld %t.o -o %t2
# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=CHECK-LE
# RUN: llvm-objdump -D %t2 | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
# RUN: llvm-readobj -r %t.o | FileCheck -check-prefix=RELOCS-BE %s
# RUN: ld.lld %t.o -o %t2
# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=CHECK-BE
# RUN: llvm-objdump -D %t2 | FileCheck %s
# Make sure we calculate the offset correctly for a got-indirect access to a
# global variable as described by the PPC64 ELF V2 abi.
.text
.abiversion 2
.globl _start # -- Begin function _start
.p2align 4
.type _start,@function
_start: # @_start
.Lfunc_begin0:
.Lfunc_gep0:
addis 2, 12, .TOC.-.Lfunc_gep0@ha
addi 2, 2, .TOC.-.Lfunc_gep0@l
.Lfunc_lep0:
.localentry _start, .Lfunc_lep0-.Lfunc_gep0
# %bb.0: # %entry
addis 3, 2, .LC0@toc@ha
ld 3, .LC0@toc@l(3)
li 4, 0
stw 4, -12(1)
li 0,1
lwa 3, 0(3)
sc
.long 0
.quad 0
.Lfunc_end0:
.size _start, .Lfunc_end0-.Lfunc_begin0
# -- End function
.section .toc,"aw",@progbits
.LC0:
.tc glob[TC],glob
.type glob,@object # @glob
.data
.globl glob
.p2align 2
glob:
.long 55 # 0x37
.size glob, 4
# Verify the relocations emitted for glob are through the .toc
# RELOCS-LE: Relocations [
# RELOCS-LE: .rela.text {
# RELOCS-LE: 0x0 R_PPC64_REL16_HA .TOC. 0x0
# RELOCS-LE: 0x4 R_PPC64_REL16_LO .TOC. 0x4
# RELOCS-LE: 0x8 R_PPC64_TOC16_HA .toc 0x0
# RELOCS-LE: 0xC R_PPC64_TOC16_LO_DS .toc 0x0
# RELOCS-LE: }
# RELOCS-LE: .rela.toc {
# RELOCS-LE: 0x0 R_PPC64_ADDR64 glob 0x0
# RELOCS-LE: }
# RELOCS-BE: Relocations [
# RELOCS-BE: .rela.text {
# RELOCS-BE: 0x2 R_PPC64_REL16_HA .TOC. 0x2
# RELOCS-BE: 0x6 R_PPC64_REL16_LO .TOC. 0x6
# RELOCS-BE: 0xA R_PPC64_TOC16_HA .toc 0x0
# RELOCS-BE: 0xE R_PPC64_TOC16_LO_DS .toc 0x0
# RELOCS-BE: }
# RELOCS-BE: .rela.toc {
# RELOCS-BE: 0x0 R_PPC64_ADDR64 glob 0x0
# RELOCS-BE: }
# RELOCS-BE:]
# Verify that the global variable access is done through the correct
# toc entry:
# r2 = .TOC. = 0x10038000.
# r3 = r2 - 32760 = 0x10030008 -> .toc entry for glob.
# CHECK: _start:
# CHECK-NEXT: 10010000: {{.*}} addis 2, 12, 2
# CHECK-NEXT: 10010004: {{.*}} addi 2, 2, -32768
# CHECK-NEXT: 10010008: {{.*}} nop
# CHECK-NEXT: 1001000c: {{.*}} ld 3, -32760(2)
# CHECK: 1001001c: {{.*}} lwa 3, 0(3)
# CHECK-LE: Disassembly of section .got:
# CHECK-LE-EMPTY:
# CHECK-LE-NEXT: .got:
# CHECK-LE-NEXT: 10020000: 00 80 02 10
# CHECK-LE-NEXT: 10020004: 00 00 00 00
# Verify that .toc comes right after .got
# CHECK-LE: Disassembly of section .toc:
# CHECK-LE-EMPTY:
# CHECK-LE: 10020008: 00 00 03 10
# CHECK-LE: Disassembly of section .data:
# CHECK-LE-EMPTY:
# CHECK-LE-NEXT: glob:
# CHECK-LE-NEXT: 10030000: 37 00 00 00
# CHECK-BE: Disassembly of section .got:
# CHECK-BE-EMPTY:
# CHECK-BE-NEXT: .got:
# CHECK-BE-NEXT: 10020000: 00 00 00 00
# CHECK-BE-NEXT: 10020004: 10 02 80 00
# Verify that .toc comes right after .got
# CHECK-BE: Disassembly of section .toc:
# CHECK-BE-EMPTY:
# CHECK-BE: 10020008: 00 00 00 00
# CHECK-BE: 1002000c: 10 03 00 00
# CHECK-BE: Disassembly of section .data:
# CHECK-BE-EMPTY:
# CHECK-BE-NEXT: glob:
# CHECK-BE-NEXT: 10030000: 00 00 00 37

View File

@ -1,14 +1,14 @@
# REQUIRES: ppc
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t
# RUN: ld.lld %t -o %t2
# RUN: llvm-readelf -x .rodata -x .eh_frame %t2 | FileCheck %s --check-prefix=DATALE
# RUN: llvm-objdump -d --no-show-raw-insn %t2 | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
# RUN: ld.lld --no-toc-optimize %t.o -o %t
# RUN: llvm-readelf -x .rodata -x .eh_frame %t | FileCheck %s --check-prefix=DATALE
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t
# RUN: ld.lld %t -o %t2
# RUN: llvm-readelf -x .rodata -x .eh_frame %t2 | FileCheck %s --check-prefix=DATABE
# RUN: llvm-objdump -d --no-show-raw-insn %t2 | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
# RUN: ld.lld --no-toc-optimize %t.o -o %t
# RUN: llvm-readelf -x .rodata -x .eh_frame %t | FileCheck %s --check-prefix=DATABE
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
.text
.global _start
@ -67,7 +67,7 @@ _start:
# CHECK: Disassembly of section .R_PPC64_TOC16_HA:
# CHECK-EMPTY:
# CHECK: .FR_PPC64_TOC16_HA:
# CHECK: 10010018: nop
# CHECK: 10010018: addis 1, 2, 0
.section .R_PPC64_REL24,"ax",@progbits
.globl .FR_PPC64_REL24
@ -183,8 +183,8 @@ _start:
# CHECK: Disassembly of section .R_PPC64_REL32:
# CHECK-EMPTY:
# CHECK: .FR_PPC64_REL32:
# CHECK: 10010040: nop
# CHECK: 10010044: ld 5, -32736(2)
# CHECK: 10010040: addis 5, 2, 0
# CHECK: 10010044: ld 5, -32736(5)
# CHECK: 10010048: add 3, 3, 4
.section .R_PPC64_REL64, "ax",@progbits

View File

@ -0,0 +1,61 @@
# REQUIRES: ppc
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unkown-linux %p/Inputs/ppc64-toc-relax-shared.s -o %t.o
# RUN: ld.lld -shared %t.o -o %t.so
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-relax.s -o %t2.o
# RUN: llvm-readobj -r %t1.o | FileCheck --check-prefix=RELOCS %s
# RUN: ld.lld %t1.o %t2.o %t.so -o %t
# RUN: llvm-readelf -S %t | FileCheck --check-prefix=SECTIONS %s
# RUN: llvm-nm %t | FileCheck --check-prefix=NM %s
# RUN: llvm-objdump -D %t | FileCheck %s
# In most cases, .toc contains exclusively addresses relocated by R_PPC64_ADDR16.
# Rarely .toc contain constants or variables.
# Test we can still perform toc-indirect to toc-relative relaxation.
# RELOCS: .rela.text {
# RELOCS-NEXT: 0x0 R_PPC64_TOC16_HA .toc 0x0
# RELOCS-NEXT: 0x4 R_PPC64_TOC16_LO_DS .toc 0x0
# RELOCS-NEXT: 0x8 R_PPC64_TOC16_HA .toc 0x8
# RELOCS-NEXT: 0xC R_PPC64_TOC16_LO_DS .toc 0x8
# RELOCS-NEXT: 0x10 R_PPC64_TOC16_HA .toc 0x10
# RELOCS-NEXT: 0x14 R_PPC64_TOC16_LO_DS .toc 0x10
# RELOCS-NEXT: }
# SECTIONS: .got PROGBITS 0000000010020090
# SECTIONS: .toc PROGBITS 0000000010020090
# NM: 0000000010030000 D default
# .LCONST1 is .toc[0].
# .LCONST1 - (.got+0x8000) = 0x10020090 - (0x10020090+0x8000) = -32768
# CHECK: nop
# CHECK: lwa 3, -32768(2)
addis 3, 2, .LCONST1@toc@ha
lwa 3, .LCONST1@toc@l(3)
# .LCONST2 is .toc[1]
# .LCONST2 - (.got+0x8000) = 0x10020098 - (0x10020090+0x8000) = -32760
# CHECK: nop
# CHECK: ld 4, -32760(2)
addis 4, 2, .LCONST2@toc@ha
ld 4, .LCONST2@toc@l(4)
# .Ldefault is .toc[2]. `default` is not preemptable when producing an executable.
# After toc-indirection to toc-relative relaxation, it is loaded using an
# offset relative to r2.
# CHECK: nop
# CHECK: addi 5, 2, 32624
# CHECK: lwa 5, 0(5)
addis 5, 2, .Ldefault@toc@ha
ld 5, .Ldefault@toc@l(5)
lwa 5, 0(5)
.section .toc,"aw",@progbits
.LCONST1:
.quad 11
.LCONST2:
.quad 22
.Ldefault:
.tc default[TC],default

View File

@ -0,0 +1,73 @@
# REQUIRES: ppc
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
# RUN: ld.lld %t.o -o %t
# RUN: llvm-readelf -S %t | FileCheck --check-prefixes=SECTIONS %s
# RUN: llvm-readelf -x .toc %t | FileCheck --check-prefixes=HEX-LE %s
# RUN: llvm-objdump -d %t | FileCheck --check-prefixes=CHECK %s
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
# RUN: ld.lld %t.o -o %t
# RUN: llvm-readelf -S %t | FileCheck --check-prefixes=SECTIONS %s
# RUN: llvm-readelf -x .toc %t | FileCheck --check-prefixes=HEX-BE %s
# RUN: llvm-objdump -d %t | FileCheck --check-prefixes=CHECK %s
# .LJT is a local symbol (non-preemptable).
# Test we can perform the toc-indirect to toc-relative relaxation.
# SECTIONS: .rodata PROGBITS 00000000100001c8
# HEX-LE: section '.toc':
# HEX-LE-NEXT: 10020008 c8010010 00000000
# HEX-BE: section '.toc':
# HEX-BE-NEXT: 10020008 00000000 100001c8
# CHECK-LABEL: _start
# CHECK: clrldi 3, 3, 62
# CHECK-NEXT: addis 4, 2, -2
# CHECK-NEXT: addi 4, 4, -32312
# CHECK-NEXT: sldi 3, 3, 2
.text
.global _start
.type _start, @function
_start:
.Lstart_gep:
addis 2, 12, .TOC.-.Lstart_gep@ha
addi 2, 2, .TOC.-.Lstart_gep@l
.Lstart_lep:
.localentry _start, .Lstart_lep-.Lstart_gep
rldicl 3, 3, 0, 62
addis 4, 2, .LJTI_TE@toc@ha
ld 4, .LJTI_TE@toc@l(4)
sldi 3, 3, 2
lwax 3, 3, 4
add 3, 3, 4
mtctr 3
bctr
.LBB1:
li 3, 0
blr
.LBB2:
li 3, 10
blr
.LBB3:
li 3, 55
blr
.LBB4:
li 3, 255
blr
.section .rodata,"a",@progbits
.p2align 2
.LJT:
.long .LBB1-.LJT
.long .LBB2-.LJT
.long .LBB3-.LJT
.long .LBB4-.LJT
.section .toc,"aw",@progbits
# TOC entry for the jumptable address.
.LJTI_TE:
.tc .LJT[TC],.LJT

View File

@ -0,0 +1,105 @@
# REQUIRES: ppc
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-relax-shared.s -o %t.o
# RUN: ld.lld -shared %t.o -o %t.so
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-relax.s -o %t2.o
# RUN: llvm-readobj -r %t1.o | FileCheck --check-prefixes=RELOCS-LE,RELOCS %s
# RUN: ld.lld %t1.o %t2.o %t.so -o %t
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefixes=COMMON,EXE %s
# RUN: ld.lld -shared %t1.o %t2.o %t.so -o %t2.so
# RUN: llvm-objdump -d --no-show-raw-insn %t2.so | FileCheck --check-prefixes=COMMON,SHARED %s
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-toc-relax-shared.s -o %t.o
# RUN: ld.lld -shared %t.o -o %t.so
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t1.o
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-toc-relax.s -o %t2.o
# RUN: llvm-readobj -r %t1.o | FileCheck --check-prefixes=RELOCS-BE,RELOCS %s
# RUN: ld.lld %t1.o %t2.o %t.so -o %t
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefixes=COMMON,EXE %s
# RUN: ld.lld -shared %t1.o %t2.o %t.so -o %t2.so
# RUN: llvm-objdump -d --no-show-raw-insn %t2.so | FileCheck --check-prefixes=COMMON,SHARED %s
# RELOCS-LE: .rela.text {
# RELOCS-LE-NEXT: 0x0 R_PPC64_TOC16_HA .toc 0x0
# RELOCS-LE-NEXT: 0x4 R_PPC64_TOC16_LO_DS .toc 0x0
# RELOCS-LE-NEXT: 0xC R_PPC64_TOC16_HA .toc 0x8
# RELOCS-LE-NEXT: 0x10 R_PPC64_TOC16_LO_DS .toc 0x8
# RELOCS-LE-NEXT: 0x18 R_PPC64_TOC16_HA .toc 0x10
# RELOCS-LE-NEXT: 0x1C R_PPC64_TOC16_LO_DS .toc 0x10
# RELOCS-LE-NEXT: 0x24 R_PPC64_TOC16_HA .toc 0x18
# RELOCS-LE-NEXT: 0x28 R_PPC64_TOC16_LO_DS .toc 0x18
# RELOCS-LE-NEXT: }
# RELOCS-BE: .rela.text {
# RELOCS-BE-NEXT: 0x2 R_PPC64_TOC16_HA .toc 0x0
# RELOCS-BE-NEXT: 0x6 R_PPC64_TOC16_LO_DS .toc 0x0
# RELOCS-BE-NEXT: 0xE R_PPC64_TOC16_HA .toc 0x8
# RELOCS-BE-NEXT: 0x12 R_PPC64_TOC16_LO_DS .toc 0x8
# RELOCS-BE-NEXT: 0x1A R_PPC64_TOC16_HA .toc 0x10
# RELOCS-BE-NEXT: 0x1E R_PPC64_TOC16_LO_DS .toc 0x10
# RELOCS-BE-NEXT: 0x26 R_PPC64_TOC16_HA .toc 0x18
# RELOCS-BE-NEXT: 0x2A R_PPC64_TOC16_LO_DS .toc 0x18
# RELOCS-BE-NEXT: }
# RELOCS: .rela.toc {
# RELOCS-NEXT: 0x0 R_PPC64_ADDR64 hidden 0x0
# RELOCS-NEXT: 0x8 R_PPC64_ADDR64 hidden2 0x0
# RELOCS-NEXT: 0x10 R_PPC64_ADDR64 shared 0x0
# RELOCS-NEXT: 0x18 R_PPC64_ADDR64 default 0x0
# RELOCS-NEXT: }
# NM-DAG: 0000000010030000 D default
# NM-DAG: 0000000010030000 d hidden
# NM-DAG: 0000000010040000 d hidden2
# 'hidden' is non-preemptable. It is relaxed.
# address(hidden) - (.got+0x8000) = 0x10030000 - (0x100200c0+0x8000) = 32576
# COMMON: nop
# COMMON: addi 3, 2, 32576
# COMMON: lwa 3, 0(3)
addis 3, 2, .Lhidden@toc@ha
ld 3, .Lhidden@toc@l(3)
lwa 3, 0(3)
# address(hidden2) - (.got+0x8000) = 0x10040000 - (0x100200c0+0x8000) = (1<<16)+32576
# COMMON: addis 3, 2, 1
# COMMON: addi 3, 3, 32576
# COMMON: lwa 3, 0(3)
addis 3, 2, .Lhidden2@toc@ha
ld 3, .Lhidden2@toc@l(3)
lwa 3, 0(3)
# 'shared' is not defined in an object file. Its definition is determined at
# runtime by the dynamic linker, so the extra indirection cannot be relaxed.
# The first addis can still be relaxed to nop, though.
# COMMON: nop
# COMMON: ld 4, -32752(2)
# COMMON: lwa 4, 0(4)
addis 4, 2, .Lshared@toc@ha
ld 4, .Lshared@toc@l(4)
lwa 4, 0(4)
# 'default' has default visibility. It is non-preemptable when producing an executable.
# address(default) - (.got+0x8000) = 0x10030000 - (0x100200c0+0x8000) = 32576
# EXE: nop
# EXE: addi 5, 2, 32576
# EXE: lwa 5, 0(5)
# SHARED: nop
# SHARED: ld 5, -32744(2)
# SHARED: lwa 5, 0(5)
addis 5, 2, .Ldefault@toc@ha
ld 5, .Ldefault@toc@l(5)
lwa 5, 0(5)
.section .toc,"aw",@progbits
.Lhidden:
.tc hidden[TC], hidden
.Lhidden2:
.tc hidden2[TC], hidden2
.Lshared:
.tc shared[TC], shared
.Ldefault:
.tc default[TC], default