[PowerPC][PCRelative] Thread Local Storage Support for Local Dynamic

This patch is the initial support for the Local Dynamic Thread Local Storage
model to produce code sequence and relocation correct to the ABI for the model
when using PC relative memory operations.

Differential Revision: https://reviews.llvm.org/D87721
This commit is contained in:
Victor Huang 2020-09-23 13:43:33 -05:00
parent 6ada9e516f
commit 652a8f150d
16 changed files with 171 additions and 12 deletions

View File

@ -101,7 +101,9 @@
#undef R_PPC64_PCREL34
#undef R_PPC64_GOT_PCREL34
#undef R_PPC64_TPREL34
#undef R_PPC64_DTPREL34
#undef R_PPC64_GOT_TLSGD_PCREL34
#undef R_PPC64_GOT_TLSLD_PCREL34
#undef R_PPC64_GOT_TPREL_PCREL34
#undef R_PPC64_IRELATIVE
#undef R_PPC64_REL16
@ -202,7 +204,9 @@ ELF_RELOC(R_PPC64_PCREL_OPT, 123)
ELF_RELOC(R_PPC64_PCREL34, 132)
ELF_RELOC(R_PPC64_GOT_PCREL34, 133)
ELF_RELOC(R_PPC64_TPREL34, 146)
ELF_RELOC(R_PPC64_DTPREL34, 147)
ELF_RELOC(R_PPC64_GOT_TLSGD_PCREL34, 148)
ELF_RELOC(R_PPC64_GOT_TLSLD_PCREL34, 149)
ELF_RELOC(R_PPC64_GOT_TPREL_PCREL34, 150)
ELF_RELOC(R_PPC64_IRELATIVE, 248)
ELF_RELOC(R_PPC64_REL16, 249)

View File

@ -300,6 +300,7 @@ public:
VK_PPC_GOT_TLSLD_HA, // symbol@got@tlsld@ha
VK_PPC_GOT_PCREL, // symbol@got@pcrel
VK_PPC_GOT_TLSGD_PCREL, // symbol@got@tlsgd@pcrel
VK_PPC_GOT_TLSLD_PCREL, // symbol@got@tlsld@pcrel
VK_PPC_GOT_TPREL_PCREL, // symbol@got@tprel@pcrel
VK_PPC_TLS_PCREL, // symbol@tls@pcrel
VK_PPC_TLSLD, // symbol@tlsld

View File

@ -324,6 +324,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
return "got@pcrel";
case VK_PPC_GOT_TLSGD_PCREL:
return "got@tlsgd@pcrel";
case VK_PPC_GOT_TLSLD_PCREL:
return "got@tlsld@pcrel";
case VK_PPC_GOT_TPREL_PCREL:
return "got@tprel@pcrel";
case VK_PPC_TLS_PCREL:
@ -461,6 +463,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA)
.Case("got@pcrel", VK_PPC_GOT_PCREL)
.Case("got@tlsgd@pcrel", VK_PPC_GOT_TLSGD_PCREL)
.Case("got@tlsld@pcrel", VK_PPC_GOT_TLSLD_PCREL)
.Case("got@tprel@pcrel", VK_PPC_GOT_TPREL_PCREL)
.Case("tls@pcrel", VK_PPC_TLS_PCREL)
.Case("notoc", VK_PPC_NOTOC)

View File

@ -141,6 +141,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL:
Type = ELF::R_PPC64_GOT_TLSGD_PCREL34;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL:
Type = ELF::R_PPC64_GOT_TLSLD_PCREL34;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL:
Type = ELF::R_PPC64_GOT_TPREL_PCREL34;
break;
@ -422,6 +425,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
switch (Modifier) {
default:
report_fatal_error("Unsupported Modifier for fixup_ppc_imm34.");
case MCSymbolRefExpr::VK_DTPREL:
Type = ELF::R_PPC64_DTPREL34;
break;
case MCSymbolRefExpr::VK_TPREL:
Type = ELF::R_PPC64_TPREL34;
break;

View File

@ -233,9 +233,11 @@ PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL ||
SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL ||
SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL ||
SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL ||
SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL) &&
"VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL or "
"VK_PPC_GOT_TLSGD_PCREL or VK_PPC_GOT_TPREL_PCREL");
"VK_PPC_GOT_TLSGD_PCREL or VK_PPC_GOT_TLSLD_PCREL or "
"VK_PPC_GOT_TPREL_PCREL.");
// Generate the fixup for the relocation.
Fixups.push_back(
MCFixup::create(0, Expr,

View File

@ -123,11 +123,20 @@ FunctionPass *createPPCCTRLoops();
/// TLS Initial Exec model.
MO_TPREL_FLAG = 64,
/// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to
/// TLS Local Dynamic model.
MO_TLSLD_FLAG = 128,
/// MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set
/// they should produce the relocation @got@tlsgd@pcrel.
/// Fix up is VK_PPC_GOT_TLSGD_PCREL
MO_GOT_TLSGD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSGD_FLAG,
/// MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set
/// they should produce the relocation @got@tlsld@pcrel.
/// Fix up is VK_PPC_GOT_TLSLD_PCREL
MO_GOT_TLSLD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSLD_FLAG,
/// MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set
/// they should produce the relocation @got@tprel@pcrel.
/// Fix up is VK_PPC_GOT_TPREL_PCREL

View File

@ -491,7 +491,8 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
unsigned Opcode = PPC::BL8_NOP_TLS;
assert(MI->getNumOperands() >= 3 && "Expecting at least 3 operands from MI");
if (MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG) {
if (MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG ||
MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSLD_PCREL_FLAG) {
Kind = MCSymbolRefExpr::VK_PPC_NOTOC;
Opcode = PPC::BL8_NOTOC_TLS;
}
@ -1146,6 +1147,21 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addExpr(SymDtprel));
return;
}
case PPC::PADDIdtprel: {
// Transform: %rd = PADDIdtprel %rs, @sym
// Into: %rd = PADDI8 %rs, sym@dtprel
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel = MCSymbolRefExpr::create(
MOSymbol, MCSymbolRefExpr::VK_DTPREL, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::PADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymDtprel));
return;
}
case PPC::ADDIdtprelL:
// Transform: %xd = ADDIdtprelL %xs, @sym
// Into: %xd = ADDI8 %xs, sym@dtprel@l

View File

@ -1499,6 +1499,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::PADDI_DTPREL:
return "PPCISD::PADDI_DTPREL";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
case PPCISD::SC: return "PPCISD::SC";
case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
@ -3098,6 +3100,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::LocalDynamic) {
if (Subtarget.isUsingPCRelativeCalls()) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_GOT_TLSLD_PCREL_FLAG);
SDValue MatPCRel =
DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
}
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {

View File

@ -381,6 +381,10 @@ namespace llvm {
/// sym\@got\@dtprel\@l.
ADDI_DTPREL_L,
/// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS
/// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel.
PADDI_DTPREL,
/// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
/// during instruction selection to optimize a BUILD_VECTOR into
/// operations on splats. This is necessary to avoid losing these

View File

@ -1332,6 +1332,11 @@ def ADDIdtprelL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm
[(set i64:$rD,
(PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
def PADDIdtprel : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#PADDIdtprel",
[(set i64:$rD,
(PPCpaddiDtprel i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
let PPC970_Unit = 2 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {

View File

@ -2380,8 +2380,10 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
{MO_GOT_FLAG, "ppc-got"},
{MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
{MO_TLSGD_FLAG, "ppc-tlsgd"},
{MO_TLSLD_FLAG, "ppc-tlsld"},
{MO_TPREL_FLAG, "ppc-tprel"},
{MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
{MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
{MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"}};
return makeArrayRef(TargetFlags);
}

View File

@ -222,6 +222,7 @@ def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR",
SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCpaddiDtprel : SDNode<"PPCISD::PADDI_DTPREL", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;

View File

@ -90,6 +90,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_TPREL;
else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL;
else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSLD_PCREL_FLAG)
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL;
else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG)
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL;

View File

@ -50,17 +50,17 @@ protected:
bool Changed = false;
bool NeedFence = true;
bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
bool IsTLSGDPCREL = false;
bool IsPCREL = false;
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE;) {
MachineInstr &MI = *I;
IsTLSGDPCREL = isTLSGDPCREL(MI);
IsPCREL = isPCREL(MI);
if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
MI.getOpcode() != PPC::ADDItlsldLADDR &&
MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
MI.getOpcode() != PPC::ADDItlsldLADDR32 && !IsTLSGDPCREL) {
MI.getOpcode() != PPC::ADDItlsldLADDR32 && !IsPCREL) {
// Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
// as scheduling fences, we skip creating fences if we already
// have existing ADJCALLSTACKDOWN/UP to avoid nesting,
@ -80,7 +80,7 @@ protected:
Register InReg = PPC::NoRegister;
Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
SmallVector<Register, 3> OrigRegs = {OutReg, GPR3};
if (!IsTLSGDPCREL) {
if (!IsPCREL) {
InReg = MI.getOperand(1).getReg();
OrigRegs.push_back(InReg);
}
@ -107,9 +107,12 @@ protected:
Opc2 = PPC::GETtlsldADDR32;
break;
case PPC::PADDI8pc:
assert(IsTLSGDPCREL && "Expecting General Dynamic PCRel");
assert(IsPCREL && "Expecting General/Local Dynamic PCRel");
Opc1 = PPC::PADDI8pc;
Opc2 = PPC::GETtlsADDR;
Opc2 = MI.getOperand(2).getTargetFlags() ==
PPCII::MO_GOT_TLSGD_PCREL_FLAG
? PPC::GETtlsADDR
: PPC::GETtlsldADDR;
}
// We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr
@ -123,7 +126,7 @@ protected:
.addImm(0);
MachineInstr *Addi;
if (IsTLSGDPCREL) {
if (IsPCREL) {
Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addImm(0);
} else {
// Expand into two ops built prior to the existing instruction.
@ -140,7 +143,7 @@ protected:
MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
.addReg(GPR3));
if (IsTLSGDPCREL)
if (IsPCREL)
Call->addOperand(MI.getOperand(2));
else
Call->addOperand(MI.getOperand(3));
@ -168,11 +171,14 @@ protected:
}
public:
bool isTLSGDPCREL(const MachineInstr &MI) {
bool isPCREL(const MachineInstr &MI) {
return (MI.getOpcode() == PPC::PADDI8pc) &&
(MI.getOperand(2).getTargetFlags() ==
PPCII::MO_GOT_TLSGD_PCREL_FLAG);
PPCII::MO_GOT_TLSGD_PCREL_FLAG ||
MI.getOperand(2).getTargetFlags() ==
PPCII::MO_GOT_TLSLD_PCREL_FLAG);
}
bool runOnMachineFunction(MachineFunction &MF) override {
TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();

View File

@ -0,0 +1,55 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \
; RUN: -ppc-asm-full-reg-names --relocation-model=pic -enable-ppc-pcrel-tls < %s | FileCheck %s --check-prefix=CHECK-S
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \
; RUN: -ppc-asm-full-reg-names --relocation-model=pic -enable-ppc-pcrel-tls --filetype=obj < %s | \
; RUN: llvm-objdump --mcpu=pwr10 --no-show-raw-insn -dr - | FileCheck %s --check-prefix=CHECK-O
; These test cases are to ensure that when using pc relative memory operations
; ABI correct code and relocations are produced for Local Dynamic TLS Model.
@x = hidden thread_local global i32 0, align 4
define nonnull i32* @LocalDynamicAddressLoad() {
; CHECK-S-LABEL: LocalDynamicAddressLoad:
; CHECK-S: paddi r3, 0, x@got@tlsld@pcrel, 1
; CHECK-S-NEXT: bl __tls_get_addr@notoc(x@tlsld)
; CHECK-S-NEXT: paddi r3, r3, x@DTPREL, 0
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-O-LABEL: <LocalDynamicAddressLoad>:
; CHECK-O: c: paddi 3, 0, 0, 1
; CHECK-O-NEXT: 000000000000000c: R_PPC64_GOT_TLSLD_PCREL34 x
; CHECK-O-NEXT: 14: bl 0x14
; CHECK-O-NEXT: 0000000000000014: R_PPC64_TLSLD x
; CHECK-O-NEXT: 0000000000000014: R_PPC64_REL24_NOTOC __tls_get_addr
; CHECK-O-NEXT: 18: paddi 3, 3, 0, 0
; CHECK-O-NEXT: 0000000000000018: R_PPC64_DTPREL34 x
entry:
ret i32* @x
}
define i32 @LocalDynamicValueLoad() {
; CHECK-S-LABEL: LocalDynamicValueLoad:
; CHECK-S: paddi r3, 0, x@got@tlsld@pcrel, 1
; CHECK-S-NEXT: bl __tls_get_addr@notoc(x@tlsld)
; CHECK-S-NEXT: paddi r3, r3, x@DTPREL, 0
; CHECK-S-NEXT: lwz r3, 0(r3)
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-O-LABEL: <LocalDynamicValueLoad>:
; CHECK-O: 4c: paddi 3, 0, 0, 1
; CHECK-O-NEXT: 000000000000004c: R_PPC64_GOT_TLSLD_PCREL34 x
; CHECK-O-NEXT: 54: bl 0x54
; CHECK-O-NEXT: 0000000000000054: R_PPC64_TLSLD x
; CHECK-O-NEXT: 0000000000000054: R_PPC64_REL24_NOTOC __tls_get_addr
; CHECK-O-NEXT: 58: paddi 3, 3, 0, 0
; CHECK-O-NEXT: 0000000000000058: R_PPC64_DTPREL34 x
; CHECK-O-NEXT: 60: lwz 3, 0(3)
entry:
%0 = load i32, i32* @x, align 4
ret i32 %0
}

View File

@ -0,0 +1,33 @@
# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s 2>&1 | \
# RUN: FileCheck %s -check-prefix=MC
# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s | \
# RUN: llvm-readobj -r - | FileCheck %s -check-prefix=READOBJ
# This test checks that on Power PC we can correctly convert @got@tlsld@pcrel
# x@tlsld, __tls_get_addr@notoc and x@DTPREL into R_PPC64_GOT_TLSLD_PCREL34,
# R_PPC64_TLSLD, R_PPC64_REL24_NOTOC and R_PPC64_DTPREL34 for local dynamic
# relocations with address/value loaded
# MC-NOT: error: invalid variant
# READOBJ: 0x0 R_PPC64_GOT_TLSLD_PCREL34 x 0x0
# READOBJ-NEXT: 0x8 R_PPC64_TLSLD x 0x0
# READOBJ-NEXT: 0x8 R_PPC64_REL24_NOTOC __tls_get_addr 0x0
# READOBJ-NEXT: 0xC R_PPC64_DTPREL34 x 0x0
# READOBJ-NEXT: 0x18 R_PPC64_GOT_TLSLD_PCREL34 x 0x0
# READOBJ-NEXT: 0x20 R_PPC64_TLSLD x 0x0
# READOBJ-NEXT: 0x20 R_PPC64_REL24_NOTOC __tls_get_addr 0x0
# READOBJ-NEXT: 0x24 R_PPC64_DTPREL34 x 0x0
LocalDynamicAddrLoad:
paddi 3, 0, x@got@tlsld@pcrel, 1
bl __tls_get_addr@notoc(x@tlsld)
paddi 3, 3, x@DTPREL, 0
blr
LocalDynamicValueLoad:
paddi 3, 0, x@got@tlsld@pcrel, 1
bl __tls_get_addr@notoc(x@tlsld)
paddi 3, 3, x@DTPREL, 0
lwz 3, 0(3)
blr