[LLD][PowerPC] Add a pc-rel based long branch thunk

In this patch, a pc-rel based long branch thunk is added for the local
call protocol that caller and callee does not use TOC.

Reviewed By: sfertile, nemanjai

Differential Revision: https://reviews.llvm.org/D86706
This commit is contained in:
Victor Huang 2020-08-28 10:40:48 -05:00
parent b9b954b8bb
commit bfc7636612
3 changed files with 136 additions and 5 deletions

View File

@ -326,7 +326,7 @@ public:
// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte
// alignment. This gives a possible 26 bits of 'reach'. If the call offset is
// larger then that we need to emit a long-branch thunk. The target address
// larger than that we need to emit a long-branch thunk. The target address
// of the callee is stored in a table to be accessed TOC-relative. Since the
// call must be local (a non-local call will have a PltCallStub instead) the
// table stores the address of the callee's local entry point. For
@ -337,6 +337,8 @@ public:
uint32_t size() override { return 16; }
void writeTo(uint8_t *buf) override;
void addSymbols(ThunkSection &isec) override;
bool isCompatibleWith(const InputSection &isec,
const Relocation &rel) const override;
protected:
PPC64LongBranchThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {}
@ -365,6 +367,24 @@ public:
}
};
// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte
// alignment. This gives a possible 26 bits of 'reach'. If the caller and
// callee do not use toc and the call offset is larger than 26 bits,
// we need to emit a pc-rel based long-branch thunk. The target address of
// the callee is computed with a PC-relative offset.
class PPC64PCRelLongBranchThunk final : public Thunk {
public:
PPC64PCRelLongBranchThunk(Symbol &dest, int64_t addend)
: Thunk(dest, addend) {
alignment = 16;
}
uint32_t size() override { return 16; }
void writeTo(uint8_t *buf) override;
void addSymbols(ThunkSection &isec) override;
bool isCompatibleWith(const InputSection &isec,
const Relocation &rel) const override;
};
} // end anonymous namespace
Defined *Thunk::addSymbol(StringRef name, uint8_t type, uint64_t value,
@ -937,6 +957,33 @@ void PPC64LongBranchThunk::addSymbols(ThunkSection &isec) {
isec);
}
bool PPC64LongBranchThunk::isCompatibleWith(const InputSection &isec,
const Relocation &rel) const {
return rel.type == R_PPC64_REL24 || rel.type == R_PPC64_REL14;
}
void PPC64PCRelLongBranchThunk::writeTo(uint8_t *buf) {
int64_t offset = destination.getVA() - getThunkTargetSym()->getVA();
if (!isInt<34>(offset))
fatal("offset overflow 34 bits, please compile using the large code model");
uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) |
(offset & 0xffff);
writePrefixedInstruction(buf + 0, paddi); // paddi r12, 0, func@pcrel, 1
write32(buf + 8, MTCTR_R12); // mtctr r12
write32(buf + 12, BCTR); // bctr
}
void PPC64PCRelLongBranchThunk::addSymbols(ThunkSection &isec) {
addSymbol(saver.save("__long_branch_pcrel_" + destination.getName()),
STT_FUNC, 0, isec);
}
bool PPC64PCRelLongBranchThunk::isCompatibleWith(const InputSection &isec,
const Relocation &rel) const {
return rel.type == R_PPC64_REL24_NOTOC;
}
Thunk::Thunk(Symbol &d, int64_t a) : destination(d), addend(a), offset(0) {}
Thunk::~Thunk() = default;
@ -1057,12 +1104,15 @@ static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
: (Thunk *)make<PPC64PltCallStub>(s);
// This check looks at the st_other bits of the callee. If the value is 1
// then the callee clobbers the TOC and we need an R2 save stub.
if ((s.stOther >> 5) == 1)
// then the callee clobbers the TOC and we need an R2 save stub when RelType
// is R_PPC64_REL14 or R_PPC64_REL24.
if ((type == R_PPC64_REL14 || type == R_PPC64_REL24) && (s.stOther >> 5) == 1)
return make<PPC64R2SaveStub>(s);
if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1)
return make<PPC64R12SetupStub>(s);
if (type == R_PPC64_REL24_NOTOC)
return (s.stOther >> 5) > 1
? (Thunk *)make<PPC64R12SetupStub>(s)
: (Thunk *)make<PPC64PCRelLongBranchThunk>(s, a);
if (config->picThunk)
return make<PPC64PILongBranchThunk>(s, a);

View File

@ -0,0 +1,31 @@
# REQUIRES: ppc
# RUN: echo 'SECTIONS { \
# RUN: .text_low 0x2000: { *(.text_low) } \
# RUN: .text_high 0x800002000 : { *(.text_high) } \
# RUN: }' > %t.script
# RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t.o
# RUN: not ld.lld -T %t.script %t.o -o %t 2>&1 >/dev/null | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=ppc64le -defsym HIDDEN=1 %s -o %t.o
# RUN: not ld.lld -shared -T %t.script %t.o -o %t.so 2>&1 >/dev/null | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=ppc64 %s -o %t.o
# RUN: not ld.lld -T %t.script %t.o -o %t 2>&1 >/dev/null | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=ppc64 -defsym HIDDEN=1 %s -o %t.o
# RUN: not ld.lld -shared -T %t.script %t.o -o %t.so 2>&1 >/dev/null | FileCheck %s
# CHECK: error: offset overflow 34 bits, please compile using the large code model
.section .text_low, "ax", %progbits
.globl _start
_start:
bl high@notoc
blr
.section .text_high, "ax", %progbits
.ifdef HIDDEN
.hidden high
.endif
.globl high
high:
blr

View File

@ -0,0 +1,50 @@
# REQUIRES: ppc
# RUN: echo 'SECTIONS { \
# RUN: .text_low 0x2000: { *(.text_low) } \
# RUN: .text_high 0x2002000 : { *(.text_high) } \
# RUN: }' > %t.script
# RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t.o
# RUN: ld.lld -T %t.script %t.o -o %t
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=ppc64le -defsym HIDDEN=1 %s -o %t.o
# RUN: ld.lld -shared -T %t.script %t.o -o %t.so
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t.so | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=ppc64 %s -o %t.o
# RUN: ld.lld -T %t.script %t.o -o %t
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=ppc64 -defsym HIDDEN=1 %s -o %t.o
# RUN: ld.lld -shared -T %t.script %t.o -o %t.so
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t.so | FileCheck %s
# CHECK-LABEL: <_start>:
# CHECK-NEXT: 2000: bl 0x2010
# CHECK-NEXT: blr
# CHECK-NEXT: trap
# CHECK-NEXT: trap
## Callee address - program counter = 0x2002000 - 0x2010 = 33554416
# CHECK-LABEL: <__long_branch_pcrel_high>:
# CHECK-NEXT: 2010: paddi 12, 0, 33554416, 1
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr
# CHECK-LABEL: <high>:
# CHECK-NEXT: 2002000: blr
.section .text_low, "ax", %progbits
.globl _start
_start:
bl high@notoc
blr
.section .text_high, "ax", %progbits
.ifdef HIDDEN
.hidden high
.endif
.globl high
high:
blr