[LLD][PowerPC] Implement GOT to PC-Rel relaxation

This patch implements the handling for the R_PPC64_PCREL_OPT relocation as well
as the GOT relocation for the associated R_PPC64_GOT_PCREL34 relocation.

On Power10 targets with PC-Relative addressing, the linker can relax
GOT-relative accesses to PC-Relative under some conditions. Since the sequence
consists of a prefixed load, followed by a non-prefixed access (load or store),
the linker needs to replace the first instruction (as the replacement
instruction will be prefixed). The compiler communicates to the linker that
this optimization is safe by placing the two aforementioned relocations on the
GOT load (of the address).
The linker then does two things:

- Convert the load from the got into a PC-Relative add to compute the address
  relative to the PC
- Find the instruction referred to by the second relocation (R_PPC64_PCREL_OPT)
  and replace the first with the PC-Relative version of it

It is important to synchronize the mapping from legacy memory instructions to
their PC-Relative form. Hence, this patch adds a file to be included by both
the compiler and the linker so they're always in agreement.

Differential revision: https://reviews.llvm.org/D84360
This commit is contained in:
Nemanja Ivanovic 2020-08-17 09:30:14 -05:00
parent 25d759c143
commit cddb0dbcef
10 changed files with 744 additions and 1 deletions

View File

@ -62,6 +62,90 @@ enum DFormOpcd {
ADDI = 14
};
enum class PPCLegacyInsn : uint32_t {
NOINSN = 0,
// Loads.
LBZ = 0x88000000,
LHZ = 0xa0000000,
LWZ = 0x80000000,
LHA = 0xa8000000,
LWA = 0xe8000002,
LD = 0xe8000000,
LFS = 0xC0000000,
LXSSP = 0xe4000003,
LFD = 0xc8000000,
LXSD = 0xe4000002,
LXV = 0xf4000001,
LXVP = 0x18000000,
// Stores.
STB = 0x98000000,
STH = 0xb0000000,
STW = 0x90000000,
STD = 0xf8000000,
STFS = 0xd0000000,
STXSSP = 0xf4000003,
STFD = 0xd8000000,
STXSD = 0xf4000002,
STXV = 0xf4000005,
STXVP = 0x18000001
};
enum class PPCPrefixedInsn : uint64_t {
NOINSN = 0,
PREFIX_MLS = 0x0610000000000000,
PREFIX_8LS = 0x0410000000000000,
// Loads.
PLBZ = PREFIX_MLS,
PLHZ = PREFIX_MLS,
PLWZ = PREFIX_MLS,
PLHA = PREFIX_MLS,
PLWA = PREFIX_8LS | 0xa4000000,
PLD = PREFIX_8LS | 0xe4000000,
PLFS = PREFIX_MLS,
PLXSSP = PREFIX_8LS | 0xac000000,
PLFD = PREFIX_MLS,
PLXSD = PREFIX_8LS | 0xa8000000,
PLXV = PREFIX_8LS | 0xc8000000,
PLXVP = PREFIX_8LS | 0xe8000000,
// Stores.
PSTB = PREFIX_MLS,
PSTH = PREFIX_MLS,
PSTW = PREFIX_MLS,
PSTD = PREFIX_8LS | 0xf4000000,
PSTFS = PREFIX_MLS,
PSTXSSP = PREFIX_8LS | 0xbc000000,
PSTFD = PREFIX_MLS,
PSTXSD = PREFIX_8LS | 0xb8000000,
PSTXV = PREFIX_8LS | 0xd8000000,
PSTXVP = PREFIX_8LS | 0xf8000000
};
static bool checkPPCLegacyInsn(uint32_t encoding) {
PPCLegacyInsn insn = static_cast<PPCLegacyInsn>(encoding);
if (insn == PPCLegacyInsn::NOINSN)
return false;
#define PCREL_OPT(Legacy, PCRel, InsnMask) \
if (insn == PPCLegacyInsn::Legacy) \
return true;
#include "PPCInsns.def"
#undef PCREL_OPT
return false;
}
// Masks to apply to legacy instructions when converting them to prefixed,
// pc-relative versions. For the most part, the primary opcode is shared
// between the legacy instruction and the suffix of its prefixed version.
// However, there are some instances where that isn't the case (DS-Form and
// DQ-form instructions).
enum class LegacyToPrefixMask : uint64_t {
NOMASK = 0x0,
OPC_AND_RST = 0xffe00000, // Primary opc (0-5) and R[ST] (6-10).
ONLY_RST = 0x3e00000, // [RS]T (6-10).
ST_STX28_TO5 =
0x8000000003e00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5.
};
uint64_t elf::getPPC64TocBase() {
// The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
// TOC starts where the first of these sections starts. We always create a
@ -333,6 +417,7 @@ static bool isDQFormInstruction(uint32_t encoding) {
switch (getPrimaryOpCode(encoding)) {
default:
return false;
case 6: // Power10 paired loads/stores (lxvp, stxvp).
case 56:
// The only instruction with a primary opcode of 56 is `lq`.
return true;
@ -344,6 +429,78 @@ static bool isDQFormInstruction(uint32_t encoding) {
}
}
static bool isDSFormInstruction(PPCLegacyInsn insn) {
switch (insn) {
default:
return false;
case PPCLegacyInsn::LWA:
case PPCLegacyInsn::LD:
case PPCLegacyInsn::LXSD:
case PPCLegacyInsn::LXSSP:
case PPCLegacyInsn::STD:
case PPCLegacyInsn::STXSD:
case PPCLegacyInsn::STXSSP:
return true;
}
}
static PPCLegacyInsn getPPCLegacyInsn(uint32_t encoding) {
uint32_t opc = encoding & 0xfc000000;
// If the primary opcode is shared between multiple instructions, we need to
// fix it up to match the actual instruction we are after.
if ((opc == 0xe4000000 || opc == 0xe8000000 || opc == 0xf4000000 ||
opc == 0xf8000000) &&
!isDQFormInstruction(encoding))
opc = encoding & 0xfc000003;
else if (opc == 0xf4000000)
opc = encoding & 0xfc000007;
else if (opc == 0x18000000)
opc = encoding & 0xfc00000f;
// If the value is not one of the enumerators in PPCLegacyInsn, we want to
// return PPCLegacyInsn::NOINSN.
if (!checkPPCLegacyInsn(opc))
return PPCLegacyInsn::NOINSN;
return static_cast<PPCLegacyInsn>(opc);
}
static PPCPrefixedInsn getPCRelativeForm(PPCLegacyInsn insn) {
switch (insn) {
#define PCREL_OPT(Legacy, PCRel, InsnMask) \
case PPCLegacyInsn::Legacy: \
return PPCPrefixedInsn::PCRel
#include "PPCInsns.def"
#undef PCREL_OPT
}
return PPCPrefixedInsn::NOINSN;
}
static LegacyToPrefixMask getInsnMask(PPCLegacyInsn insn) {
switch (insn) {
#define PCREL_OPT(Legacy, PCRel, InsnMask) \
case PPCLegacyInsn::Legacy: \
return LegacyToPrefixMask::InsnMask
#include "PPCInsns.def"
#undef PCREL_OPT
}
return LegacyToPrefixMask::NOMASK;
}
static uint64_t getPCRelativeForm(uint32_t encoding) {
PPCLegacyInsn origInsn = getPPCLegacyInsn(encoding);
PPCPrefixedInsn pcrelInsn = getPCRelativeForm(origInsn);
if (pcrelInsn == PPCPrefixedInsn::NOINSN)
return UINT64_C(-1);
LegacyToPrefixMask origInsnMask = getInsnMask(origInsn);
uint64_t pcrelEncoding =
(uint64_t)pcrelInsn | (encoding & (uint64_t)origInsnMask);
// If the mask requires moving bit 28 to bit 5, do that now.
if (origInsnMask == LegacyToPrefixMask::ST_STX28_TO5)
pcrelEncoding |= (encoding & 0x8) << 23;
return pcrelEncoding;
}
static bool isInstructionUpdateForm(uint32_t encoding) {
switch (getPrimaryOpCode(encoding)) {
default:
@ -368,6 +525,25 @@ static bool isInstructionUpdateForm(uint32_t encoding) {
}
}
// Compute the total displacement between the prefixed instruction that gets
// to the start of the data and the load/store instruction that has the offset
// into the data structure.
// For example:
// paddi 3, 0, 1000, 1
// lwz 3, 20(3)
// Should add up to 1020 for total displacement.
static int64_t getTotalDisp(uint64_t prefixedInsn, uint32_t accessInsn) {
int64_t disp34 = llvm::SignExtend64(
((prefixedInsn & 0x3ffff00000000) >> 16) | (prefixedInsn & 0xffff), 34);
int32_t disp16 = llvm::SignExtend32(accessInsn & 0xffff, 16);
// For DS and DQ form instructions, we need to mask out the XO bits.
if (isDQFormInstruction(accessInsn))
disp16 &= ~0xf;
else if (isDSFormInstruction(getPPCLegacyInsn(accessInsn)))
disp16 &= ~0x3;
return disp34 + disp16;
}
// There are a number of places when we either want to read or write an
// instruction when handling a half16 relocation type. On big-endian the buffer
// pointer is pointing into the middle of the word we want to extract, and on
@ -475,6 +651,49 @@ void PPC64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const {
relocateNoSym(loc, R_PPC64_TOC16_LO, val);
break;
}
case R_PPC64_GOT_PCREL34: {
// Clear the first 8 bits of the prefix and the first 6 bits of the
// instruction (the primary opcode).
uint64_t insn = readPrefixedInstruction(loc);
if ((insn & 0xfc000000) != 0xe4000000)
error("expected a 'pld' for got-indirect to pc-relative relaxing");
insn &= ~0xff000000fc000000;
// Replace the cleared bits with the values for PADDI (0x600000038000000);
insn |= 0x600000038000000;
writePrefixedInstruction(loc, insn);
relocate(loc, rel, val);
break;
}
case R_PPC64_PCREL_OPT: {
// We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can
// be relaxed. The eligibility for the relaxation needs to be determined
// on that relocation since this one does not relocate a symbol.
uint64_t insn = readPrefixedInstruction(loc);
uint32_t accessInsn = read32(loc + rel.addend);
uint64_t pcRelInsn = getPCRelativeForm(accessInsn);
// This error is not necessary for correctness but is emitted for now
// to ensure we don't miss these opportunities in real code. It can be
// removed at a later date.
if (pcRelInsn == UINT64_C(-1)) {
errorOrWarn(
"unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x" +
Twine::utohexstr(accessInsn));
break;
}
int64_t totalDisp = getTotalDisp(insn, accessInsn);
if (!isInt<34>(totalDisp))
break; // Displacement doesn't fit.
// Convert the PADDI to the prefixed version of accessInsn and convert
// accessInsn to a nop.
writePrefixedInstruction(loc, pcRelInsn |
((totalDisp & 0x3ffff0000) << 16) |
(totalDisp & 0xffff));
write32(loc + rel.addend, 0x60000000); // nop accessInsn.
break;
}
default:
llvm_unreachable("unexpected relocation type");
}
@ -668,6 +887,7 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s,
case R_PPC64_TOC16_LO:
return R_GOTREL;
case R_PPC64_GOT_PCREL34:
case R_PPC64_PCREL_OPT:
return R_GOT_PC;
case R_PPC64_TOC16_HA:
case R_PPC64_TOC16_LO_DS:
@ -1024,6 +1244,9 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
(val & si1Mask));
break;
}
// If we encounter a PCREL_OPT relocation that we won't optimize.
case R_PPC64_PCREL_OPT:
break;
default:
llvm_unreachable("unknown relocation");
}
@ -1080,6 +1303,14 @@ bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data,
RelExpr expr) const {
if ((type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) &&
config->pcRelOptimize) {
// It only makes sense to optimize pld since paddi means that the address
// of the object in the GOT is required rather than the object itself.
assert(data && "Expecting an instruction encoding here");
if ((readPrefixedInstruction(data) & 0xfc000000) == 0xe4000000)
return R_PPC64_RELAX_GOT_PC;
}
if (expr == R_RELAX_TLS_GD_TO_IE)
return R_RELAX_TLS_GD_TO_IE_GOT_OFF;
if (expr == R_RELAX_TLS_LD_TO_LE)

27
lld/ELF/Arch/PPCInsns.def Normal file
View File

@ -0,0 +1,27 @@
#ifndef PCREL_OPT
#error "Need to define function-style macro PCREL_OPT"
#endif
PCREL_OPT(NOINSN, NOINSN, NOMASK);
PCREL_OPT(LBZ, PLBZ, OPC_AND_RST);
PCREL_OPT(LHZ, PLHZ, OPC_AND_RST);
PCREL_OPT(LWZ, PLWZ, OPC_AND_RST);
PCREL_OPT(LHA, PLHA, OPC_AND_RST);
PCREL_OPT(LWA, PLWA, ONLY_RST);
PCREL_OPT(LD, PLD , ONLY_RST);
PCREL_OPT(LFS, PLFS, OPC_AND_RST);
PCREL_OPT(LXSSP, PLXSSP, ONLY_RST);
PCREL_OPT(LFD, PLFD, OPC_AND_RST);
PCREL_OPT(LXSD, PLXSD, ONLY_RST);
PCREL_OPT(LXV, PLXV, ST_STX28_TO5);
PCREL_OPT(LXVP, PLXVP, OPC_AND_RST);
PCREL_OPT(STB, PSTB, OPC_AND_RST);
PCREL_OPT(STH, PSTH, OPC_AND_RST);
PCREL_OPT(STW, PSTW, OPC_AND_RST);
PCREL_OPT(STD, PSTD, ONLY_RST);
PCREL_OPT(STFS, PSTFS, OPC_AND_RST);
PCREL_OPT(STXSSP, PSTXSSP, ONLY_RST);
PCREL_OPT(STFD, PSTFD, OPC_AND_RST);
PCREL_OPT(STXSD, PSTXSD, ONLY_RST);
PCREL_OPT(STXV, PSTXV, ST_STX28_TO5);
PCREL_OPT(STXVP, PSTXVP, OPC_AND_RST);

View File

@ -206,6 +206,7 @@ struct Configuration {
bool thinLTOIndexOnly;
bool timeTraceEnabled;
bool tocOptimize;
bool pcRelOptimize;
bool undefinedVersion;
bool unique;
bool useAndroidRelrTags = false;

View File

@ -309,6 +309,9 @@ static void checkOptions() {
if (config->tocOptimize && config->emachine != EM_PPC64)
error("--toc-optimize is only supported on the PowerPC64 target");
if (config->pcRelOptimize && config->emachine != EM_PPC64)
error("--pcrel--optimize is only supported on the PowerPC64 target");
if (config->pie && config->shared)
error("-shared and -pie may not be used together");
@ -1288,6 +1291,8 @@ static void setConfigs(opt::InputArgList &args) {
config->tocOptimize =
args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, m == EM_PPC64);
config->pcRelOptimize =
args.hasFlag(OPT_pcrel_optimize, OPT_no_pcrel_optimize, m == EM_PPC64);
}
// Returns a value of "-format" option.

View File

@ -807,6 +807,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
case R_PPC64_TOCBASE:
return getPPC64TocBase() + a;
case R_RELAX_GOT_PC:
case R_PPC64_RELAX_GOT_PC:
return sym.getVA(a) - p;
case R_RELAX_TLS_GD_TO_LE:
case R_RELAX_TLS_IE_TO_LE:
@ -1004,6 +1005,7 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) {
void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
assert(flags & SHF_ALLOC);
const unsigned bits = config->wordsize * 8;
uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1);
for (const Relocation &rel : relocations) {
if (rel.expr == R_NONE)
@ -1025,6 +1027,20 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
case R_RELAX_GOT_PC_NOPIC:
target->relaxGot(bufLoc, rel, targetVA);
break;
case R_PPC64_RELAX_GOT_PC: {
// The R_PPC64_PCREL_OPT relocation must appear immediately after
// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.
// We can only relax R_PPC64_PCREL_OPT if we have also relaxed
// the associated R_PPC64_GOT_PCREL34 since only the latter has an
// associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL34
// and only relax the other if the saved offset matches.
if (type == R_PPC64_GOT_PCREL34)
lastPPCRelaxedRelocOff = offset;
if (type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff)
break;
target->relaxGot(bufLoc, rel, targetVA);
break;
}
case R_PPC64_RELAX_TOC:
// rel.sym refers to the STT_SECTION symbol associated to the .toc input
// section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC

View File

@ -404,6 +404,10 @@ defm toc_optimize : B<"toc-optimize",
"(PowerPC64) Enable TOC related optimizations (default)",
"(PowerPC64) Disable TOC related optimizations">;
defm pcrel_optimize : B<"pcrel-optimize",
"(PowerPC64) Enable PC-relative optimizations (default)",
"(PowerPC64) Disable PC-relative optimizations">;
def trace: F<"trace">, HelpText<"Print the names of the input files">;
defm trace_symbol: Eq<"trace-symbol", "Trace references to symbols">;

View File

@ -376,7 +376,7 @@ static bool needsGot(RelExpr expr) {
static bool isRelExpr(RelExpr expr) {
return oneof<R_PC, R_GOTREL, R_GOTPLTREL, R_MIPS_GOTREL, R_PPC64_CALL,
R_PPC64_RELAX_TOC, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC,
R_RISCV_PC_INDIRECT>(expr);
R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC>(expr);
}
// Returns true if a given relocation can be computed at link-time.

View File

@ -96,6 +96,7 @@ enum RelExpr {
R_PPC64_CALL_PLT,
R_PPC64_RELAX_TOC,
R_PPC64_TOCBASE,
R_PPC64_RELAX_GOT_PC,
R_RISCV_ADD,
R_RISCV_PC_INDIRECT,
};

View File

@ -0,0 +1,66 @@
.section ".text"
.comm storeVal_vector,8,8
.comm useVal_vector,8,8
.globl storeVal_longlong, useAddr_longlong, useVal_longlong, storeVal_sshort
.globl useAddr_sshort, useVal_sshort, storeVal_sint, useAddr_sint, useVal_sint
.globl storeVal_double, useAddr_double, useVal_double, storeVal_float
.globl useAddr_float, useVal_float, storeVal_uint, storeVal_uint
.globl useVal_uint, storeVal_ushort, useAddr_ushort, useVal_ushort
.globl storeVal, useAddr, useVal
.section ".data"
.align 3
.type storeVal_longlong, @object
.size storeVal_longlong, 8
storeVal_longlong:
.quad 18
useAddr_longlong:
.quad 17
useVal_longlong:
.quad 16
storeVal_sshort:
.short -15
useAddr_sshort:
.short -14
useVal_sshort:
.short -13
.zero 2
storeVal_sint:
.long -12
useAddr_sint:
.long -11
useVal_sint:
.long -10
.zero 4
storeVal_double:
.long 858993459
.long 1076966195
useAddr_double:
.long -1717986918
.long -1070589543
useVal_double:
.long 0
.long 1076756480
storeVal_float:
.long 1045220557
useAddr_float:
.long -1050568294
useVal_float:
.long 1095761920
storeVal_uint:
.long 12
useAddr_uint:
.long 11
useVal_uint:
.long 10
storeVal_ushort:
.short 1
useAddr_ushort:
.short 10
useVal_ushort:
.short 5
storeVal:
.byte -1
useAddr:
.byte 10
useVal:
.byte 5

View File

@ -0,0 +1,392 @@
# REQUIRES: ppc
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t1.o
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o
# RUN: ld.lld --shared %t2.o -o %t2.so --soname=t2
# RUN: ld.lld %t1.o %t2.o -o %t
# RUN: ld.lld %t1.o %t2.so -o %ts
# RUN: ld.lld %t1.o %t2.o -o %tn --no-pcrel-optimize
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %tn | FileCheck %s --check-prefix=CHECK-D
# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t1.o
# RUN: llvm-mc -filetype=obj -triple=powerpc64 %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o
# RUN: ld.lld --shared %t2.o -o %t2.so --soname=t2
# RUN: ld.lld %t1.o %t2.o -o %t
# RUN: ld.lld %t1.o %t2.so -o %ts
# RUN: ld.lld %t1.o %t2.o -o %tn --no-pcrel-optimize
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %tn | FileCheck %s --check-prefix=CHECK-D
# CHECK-S-LABEL: <check_LBZ_STB>:
# CHECK-S-NEXT: plbz 10
# CHECK-S-NEXT: paddi 9
# CHECK-S-NEXT: li 3, 0
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: rldicl 9, 9, 9, 60
# CHECK-S-NEXT: add 9, 9, 10
# CHECK-S-NEXT: pstb 9
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LBZ_STB>:
# CHECK-D-NEXT: pld 8
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: li 3, 0
# CHECK-D-NEXT: lbz 10, 0(8)
# CHECK-D-NEXT: rldicl 9, 9, 9, 60
# CHECK-D-NEXT: add 9, 9, 10
# CHECK-D-NEXT: pld 10
# CHECK-D-NEXT: stb 9, 0(10)
# CHECK-D-NEXT: blr
check_LBZ_STB:
pld 8,useVal@got@pcrel(0),1
.Lpcrel1:
pld 9,useAddr@got@pcrel(0),1
li 3,0
.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
lbz 10,0(8)
rldicl 9,9,9,60
add 9,9,10
pld 10,storeVal@got@pcrel(0),1
.Lpcrel2:
.reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
stb 9,0(10)
blr
# CHECK-S-LABEL: <check_LHZ_STH>:
# CHECK-S-NEXT: plhz 3
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: psth 3
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LHZ_STH>:
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: lhz 3, 0(9)
# CHECK-D-NEXT: nop
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: sth 3, 0(9)
# CHECK-D-NEXT: blr
check_LHZ_STH:
pld 9,useVal_ushort@got@pcrel(0),1
.Lpcrel3:
.reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
lhz 3,0(9)
pld 9,storeVal_ushort@got@pcrel(0),1
.Lpcrel4:
.reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
sth 3,0(9)
blr
# CHECK-S-LABEL: <check_LWZ_STW>:
# CHECK-S-NEXT: plwz 3
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: pstw 3
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LWZ_STW>:
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: lwz 3, 0(9)
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: stw 3, 0(9)
# CHECK-D-NEXT: blr
check_LWZ_STW:
pld 9,useVal_uint@got@pcrel(0),1
.Lpcrel5:
.reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
lwz 3,0(9)
pld 9,storeVal_uint@got@pcrel(0),1
.Lpcrel6:
.reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
stw 3,0(9)
blr
# CHECK-S-LABEL: <check_LFS_STFS>:
# CHECK-S-NEXT: plfs 1
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: pstfs 1
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LFS_STFS>:
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: lfs 1, 0(9)
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: stfs 1, 0(9)
# CHECK-D-NEXT: blr
check_LFS_STFS:
pld 9,useVal_float@got@pcrel(0),1
.Lpcrel7:
.reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
lfs 1,0(9)
pld 9,storeVal_float@got@pcrel(0),1
.Lpcrel8:
.reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
stfs 1,0(9)
blr
# CHECK-S-LABEL: <check_LFD_STFD>:
# CHECK-S-NEXT: plfd 1
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: pstfd 1
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LFD_STFD>:
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: lfd 1, 0(9)
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: stfd 1, 0(9)
# CHECK-D-NEXT: blr
check_LFD_STFD:
pld 9,useVal_double@got@pcrel(0),1
.Lpcrel9:
.reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8)
lfd 1,0(9)
pld 9,storeVal_double@got@pcrel(0),1
.Lpcrel10:
.reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
stfd 1,0(9)
blr
# CHECK-S-LABEL: <check_LWA_STW>:
# CHECK-S-NEXT: mr 9, 3
# CHECK-S-NEXT: plwa 3
# CHECK-S-NEXT: pstw 9
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LWA_STW>:
# CHECK-D-NEXT: mr 9, 3
# CHECK-D-NEXT: pld 8
# CHECK-D-NEXT: pld 10
# CHECK-D-NEXT: lwa 3, 0(8)
# CHECK-D-NEXT: stw 9, 0(10)
# CHECK-D-NEXT: blr
check_LWA_STW:
mr 9,3
pld 8,useVal_sint@got@pcrel(0),1
.Lpcrel11:
pld 10,storeVal_sint@got@pcrel(0),1
.Lpcrel12:
.reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8)
lwa 3,0(8)
.reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8)
stw 9,0(10)
blr
# CHECK-S-LABEL: <check_LHA_STH>:
# CHECK-S-NEXT: mr 9, 3
# CHECK-S-NEXT: plha 3
# CHECK-S-NEXT: psth 9
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LHA_STH>:
# CHECK-D-NEXT: mr 9, 3
# CHECK-D-NEXT: pld 8
# CHECK-D-NEXT: pld 10
# CHECK-D-NEXT: lha 3, 0(8)
# CHECK-D-NEXT: sth 9, 0(10)
# CHECK-D-NEXT: blr
check_LHA_STH:
mr 9,3
pld 8,useVal_sshort@got@pcrel(0),1
.Lpcrel13:
pld 10,storeVal_sshort@got@pcrel(0),1
.Lpcrel14:
.reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8)
lha 3,0(8)
.reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8)
sth 9,0(10)
blr
# CHECK-S-LABEL: <check_LD_STD>:
# CHECK-S-NEXT: pld 3
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: pstd 3
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LD_STD>:
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: ld 3, 0(9)
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: std 3, 0(9)
# CHECK-D-NEXT: blr
check_LD_STD:
pld 9,useVal_longlong@got@pcrel(0),1
.Lpcrel15:
.reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8)
ld 3,0(9)
pld 9,storeVal_longlong@got@pcrel(0),1
.Lpcrel16:
.reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8)
std 3,0(9)
blr
# CHECK-S-LABEL: <check_LXV_STXV>:
# CHECK-S-NEXT: plxv 34
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: pstxv 34
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LXV_STXV>:
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: lxv 34, 0(9)
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: stxv 34, 0(9)
# CHECK-D-NEXT: blr
check_LXV_STXV:
pld 9,useVal_vector@got@pcrel(0),1
.Lpcrel17:
.reloc .Lpcrel17-8,R_PPC64_PCREL_OPT,.-(.Lpcrel17-8)
lxv 34,0(9)
pld 9,storeVal_vector@got@pcrel(0),1
.Lpcrel18:
.reloc .Lpcrel18-8,R_PPC64_PCREL_OPT,.-(.Lpcrel18-8)
stxv 34,0(9)
blr
# CHECK-S-LABEL: <check_LXSSP_STXSSP>:
# CHECK-S-NEXT: plxssp 1
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: pstxssp 1
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LXSSP_STXSSP>:
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: lxssp 1, 0(9)
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: stxssp 1, 0(9)
# CHECK-D-NEXT: blr
check_LXSSP_STXSSP:
pld 9,useVal_float@got@pcrel(0),1
.Lpcrel19:
.reloc .Lpcrel19-8,R_PPC64_PCREL_OPT,.-(.Lpcrel19-8)
lxssp 1,0(9)
pld 9,storeVal_float@got@pcrel(0),1
.Lpcrel20:
.reloc .Lpcrel20-8,R_PPC64_PCREL_OPT,.-(.Lpcrel20-8)
stxssp 1,0(9)
blr
# CHECK-S-LABEL: <check_LXSD_STXSD>:
# CHECK-S-NEXT: plxsd 1, [[#ADDR1:]]
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: pstxsd 1, [[#ADDR2:]]
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LXSD_STXSD>:
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: lxsd 1, 0(9)
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: stxsd 1, 0(9)
# CHECK-D-NEXT: blr
check_LXSD_STXSD:
pld 9,useVal_double@got@pcrel(0),1
.Lpcrel21:
.reloc .Lpcrel21-8,R_PPC64_PCREL_OPT,.-(.Lpcrel21-8)
lxsd 1,0(9)
pld 9,storeVal_double@got@pcrel(0),1
.Lpcrel22:
.reloc .Lpcrel22-8,R_PPC64_PCREL_OPT,.-(.Lpcrel22-8)
stxsd 1,0(9)
blr
# The respective displacements are computed relative to the PC which advanced
# by 28 bytes in this function. Since the displacements in the two access
# instructions are 8 and 32 so the displacements are those computed above minus
# 20 and plus 4 (+8 - 28 and +32 - 28) respectively.
# CHECK-S-LABEL: <check_LXSD_STXSD_aggr>:
# CHECK-S-NEXT: plxsd 1, [[#ADDR1-20]]
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: pstxsd 1, [[#ADDR2+4]]
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LXSD_STXSD_aggr>:
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: lxsd 1, 8(9)
# CHECK-D-NEXT: pld 9
# CHECK-D-NEXT: stxsd 1, 32(9)
# CHECK-D-NEXT: blr
check_LXSD_STXSD_aggr:
pld 9,useVal_double@got@pcrel(0),1
.Lpcrel23:
.reloc .Lpcrel23-8,R_PPC64_PCREL_OPT,.-(.Lpcrel23-8)
lxsd 1,8(9)
pld 9,storeVal_double@got@pcrel(0),1
.Lpcrel24:
.reloc .Lpcrel24-8,R_PPC64_PCREL_OPT,.-(.Lpcrel24-8)
stxsd 1,32(9)
blr
# This includes a nop but that is not emitted by the linker.
# It is an alignment nop to prevent the prefixed instruction from
# crossing a 64-byte boundary.
# CHECK-S-LABEL: <check_LD_STD_W_PADDI>:
# CHECK-S-NEXT: paddi 9
# CHECK-S-NEXT: ld 3, 0(9)
# CHECK-S-NEXT: nop
# CHECK-S-NEXT: paddi 9
# CHECK-S-NEXT: std 3, 0(9)
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LD_STD_W_PADDI>:
# CHECK-D-NEXT: paddi 9
# CHECK-D-NEXT: ld 3, 0(9)
# CHECK-D-NEXT: nop
# CHECK-D-NEXT: paddi 9
# CHECK-D-NEXT: std 3, 0(9)
# CHECK-D-NEXT: blr
check_LD_STD_W_PADDI:
paddi 9,0,useVal_longlong@got@pcrel,1
.Lpcrel25:
.reloc .Lpcrel25-8,R_PPC64_PCREL_OPT,.-(.Lpcrel25-8)
ld 3,0(9)
paddi 9,0,storeVal_longlong@got@pcrel,1
.Lpcrel26:
.reloc .Lpcrel26-8,R_PPC64_PCREL_OPT,.-(.Lpcrel26-8)
std 3,0(9)
blr
# CHECK-S-LABEL: <check_LXSD_STXSD_aggr_notoc>:
# CHECK-S-NEXT: paddi 3, 0, -12, 1
# CHECK-S-NEXT: lwz 4, 8(3)
# CHECK-S-NEXT: paddi 3, 0, -24, 1
# CHECK-S-NEXT: stw 4, 32(3)
# CHECK-S-NEXT: blr
# CHECK-D-LABEL: <check_LXSD_STXSD_aggr_notoc>:
# CHECK-D-NEXT: paddi 3, 0, -12, 1
# CHECK-D-NEXT: lwz 4, 8(3)
# CHECK-D-NEXT: paddi 3, 0, -24, 1
# CHECK-D-NEXT: stw 4, 32(3)
# CHECK-D-NEXT: blr
.type Arr,@object # @Arr
.globl Arr
.p2align 2
Arr:
.long 11 # 0xb
.long 22 # 0x16
.long 33 # 0x21
check_LXSD_STXSD_aggr_notoc:
paddi 3, 0, Arr@PCREL, 1
.Lpcrel27:
.reloc .Lpcrel27-8,R_PPC64_PCREL_OPT,.-(.Lpcrel27-8)
lwz 4,8(3)
paddi 3, 0, Arr@PCREL, 1
.Lpcrel28:
.reloc .Lpcrel28-8,R_PPC64_PCREL_OPT,.-(.Lpcrel28-8)
stw 4,32(3)
blr