From c0f199e5667a862819d333847059cfaa95354111 Mon Sep 17 00:00:00 2001 From: Kamau Bridgeman Date: Fri, 11 Sep 2020 10:33:33 -0400 Subject: [PATCH] [PowerPC] Implement Thread Local Storage Support for Local Exec This patch is the initial support for the Local Exec Thread Local Storage model to produce code sequence and relocations correct to the ABI for the model when using PC relative memory operations. Patch by: Kamau Bridgeman Differential Revision: https://reviews.llvm.org/D83404 --- .../llvm/BinaryFormat/ELFRelocs/PowerPC64.def | 2 + .../MCTargetDesc/PPCELFObjectWriter.cpp | 8 +- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 2 + llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 11 +++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 5 ++ llvm/lib/Target/PowerPC/PPCInstrInfo.td | 2 + llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 4 + llvm/lib/Target/PowerPC/PPCMCInstLower.cpp | 2 + .../CodeGen/PowerPC/pcrel-tls-local-exec.ll | 74 +++++++++++++++++++ .../pcrel-tls-local-exec-address-load-reloc.s | 15 ++++ .../pcrel-tls-local-exec-value-load-reloc.s | 16 ++++ 11 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/PowerPC/pcrel-tls-local-exec.ll create mode 100644 llvm/test/MC/PowerPC/pcrel-tls-local-exec-address-load-reloc.s create mode 100644 llvm/test/MC/PowerPC/pcrel-tls-local-exec-value-load-reloc.s diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def index 2cf021a4cf6f..901af679b915 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def @@ -100,6 +100,7 @@ #undef R_PPC64_PCREL_OPT #undef R_PPC64_PCREL34 #undef R_PPC64_GOT_PCREL34 +#undef R_PPC64_TPREL34 #undef R_PPC64_GOT_TLSGD_PCREL34 #undef R_PPC64_GOT_TPREL_PCREL34 #undef R_PPC64_IRELATIVE @@ -200,6 +201,7 @@ ELF_RELOC(R_PPC64_REL24_NOTOC, 116) ELF_RELOC(R_PPC64_PCREL_OPT, 123) ELF_RELOC(R_PPC64_PCREL34, 132) ELF_RELOC(R_PPC64_GOT_PCREL34, 133) +ELF_RELOC(R_PPC64_TPREL34, 146) ELF_RELOC(R_PPC64_GOT_TLSGD_PCREL34, 148) ELF_RELOC(R_PPC64_GOT_TPREL_PCREL34, 150) ELF_RELOC(R_PPC64_IRELATIVE, 248) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 006cd57f517e..601e11d4ee8e 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -419,7 +419,13 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, } break; case PPC::fixup_ppc_imm34: - report_fatal_error("Unsupported Modifier for fixup_ppc_imm34."); + switch (Modifier) { + default: + report_fatal_error("Unsupported Modifier for fixup_ppc_imm34."); + case MCSymbolRefExpr::VK_TPREL: + Type = ELF::R_PPC64_TPREL34; + break; + } break; case FK_Data_8: switch (Modifier) { diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 62bb5cc1e806..a70e7468a15b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -691,6 +691,8 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { SDValue Offset = LD->getOffset(); if (!Offset.isUndef()) return false; + if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) + return false; SDLoc dl(LD); EVT MemVT = LD->getMemoryVT(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 469fe9701d06..66711f69a645 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1512,6 +1512,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR: return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; + case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR: + return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; case PPCISD::STRICT_FADDRTZ: @@ -3015,6 +3017,15 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, TLSModel::Model Model = TM.getTLSModel(GV); if (Model == TLSModel::LocalExec) { + if (Subtarget.isUsingPCRelativeCalls()) { + SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64); + SDValue TGA = DAG.getTargetGlobalAddress( + GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG)); + SDValue MatAddr = + DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr); + } + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 05c9a5d31413..3e900e2ce299 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -441,6 +441,11 @@ namespace llvm { /// through an add like PADDI. TLS_DYNAMIC_MAT_PCREL_ADDR, + /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address + /// when using local exec access models, and when prefixed instructions are + /// available. This is used with ADD_TLS to produce an add like PADDI. + TLS_LOCAL_EXEC_MAT_ADDR, + // Constrained conversion from floating point to int STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE, STRICT_FCTIWZ, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index bf7ad639ab6e..30605a22ea39 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -368,6 +368,8 @@ def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>; def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>; def PPCtlsdynamatpcreladdr : SDNode<"PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR", SDTIntUnaryOp, []>; +def PPCtlslocalexecmataddr : SDNode<"PPCISD::TLS_LOCAL_EXEC_MAT_ADDR", + SDTIntUnaryOp, []>; //===----------------------------------------------------------------------===// // PowerPC specific transformation functions and pattern fragments. diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 73321dec99d3..55872a493dd6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -829,6 +829,10 @@ let Predicates = [PCRelativeMemops], AddedComplexity = 500 in { // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize // tls global address with paddi instruction. def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; + // PPCtlslocalexecmataddr node is used for TLS local exec models to + // materialize tls global address with paddi instruction. + def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)), + (PADDI8 $in, $addr)>; } let Predicates = [PrefixInstrs] in { diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 795abed413e0..1358bec8e36f 100644 --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -86,6 +86,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, RefKind = MCSymbolRefExpr::VK_PCREL; else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG)) RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL; + else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG)) + RefKind = MCSymbolRefExpr::VK_TPREL; else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG) RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL; else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG) diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tls-local-exec.ll b/llvm/test/CodeGen/PowerPC/pcrel-tls-local-exec.ll new file mode 100644 index 000000000000..47245991d82f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-tls-local-exec.ll @@ -0,0 +1,74 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -enable-ppc-pcrel-tls -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-S +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -enable-ppc-pcrel-tls -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: --filetype=obj < %s | llvm-objdump --no-show-raw-insn --mcpu=pwr10 -dr - \ +; RUN: | FileCheck %s --check-prefix=CHECK-O + +; These test cases are to ensure that when using pc relative memory operations +; ABI correct code and relocations are produced for the Local Exec TLS Model. + +@x = thread_local global i32 0, align 4 +@y = thread_local global [5 x i32] [i32 0, i32 0, i32 0, i32 0, i32 0], align 4 + +define i32* @LocalExecAddressLoad() { +; CHECK-S-LABEL: LocalExecAddressLoad: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: paddi r3, r13, x@TPREL, 0 +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 0: paddi 3, 13, 0, 0 +; CHECK-O-NEXT: 0000000000000000: R_PPC64_TPREL34 x +; CHECK-O-NEXT: 8: blr +entry: + ret i32* @x +} + +define i32 @LocalExecValueLoad() { +; CHECK-S-LABEL: LocalExecValueLoad: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: paddi r3, r13, x@TPREL, 0 +; CHECK-S-NEXT: lwz r3, 0(r3) +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 20: paddi 3, 13, 0, 0 +; CHECK-O-NEXT: 0000000000000020: R_PPC64_TPREL34 x +; CHECK-O-NEXT: 28: lwz 3, 0(3) +; CHECK-O-NEXT: 2c: blr +entry: + %0 = load i32, i32* @x, align 4 + ret i32 %0 +} + +define i32 @LocalExecValueLoadOffset() { +; CHECK-S-LABEL: LocalExecValueLoadOffset: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: paddi r3, r13, y@TPREL, 0 +; CHECK-S-NEXT: lwz r3, 12(r3) +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 40: paddi 3, 13, 0, 0 +; CHECK-O-NEXT: 0000000000000040: R_PPC64_TPREL34 y +; CHECK-O-NEXT: 48: lwz 3, 12(3) +; CHECK-O-NEXT: 4c: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @y, i64 0, i64 3), align 4 + ret i32 %0 +} + + +define i32* @LocalExecValueLoadOffsetNoLoad() { +; CHECK-S-LABEL: LocalExecValueLoadOffsetNoLoad: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: paddi r3, r13, y@TPREL, 0 +; CHECK-S-NEXT: addi r3, r3, 12 +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 60: paddi 3, 13, 0, 0 +; CHECK-O-NEXT: 0000000000000060: R_PPC64_TPREL34 y +; CHECK-O-NEXT: 68: addi 3, 3, 12 +; CHECK-O-NEXT: 6c: blr +entry: + ret i32* getelementptr inbounds ([5 x i32], [5 x i32]* @y, i64 0, i64 3) +} diff --git a/llvm/test/MC/PowerPC/pcrel-tls-local-exec-address-load-reloc.s b/llvm/test/MC/PowerPC/pcrel-tls-local-exec-address-load-reloc.s new file mode 100644 index 000000000000..ae3eb8b88662 --- /dev/null +++ b/llvm/test/MC/PowerPC/pcrel-tls-local-exec-address-load-reloc.s @@ -0,0 +1,15 @@ +# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s 2>&1 | \ +# RUN: FileCheck %s -check-prefix=MC +# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s | \ +# RUN: llvm-readobj -r - | FileCheck %s -check-prefix=READOBJ + +# This test checks that on Power PC we can correctly convert x@TPREL +# into R_PPC64_TPREL34 for local exec relocations with address loaded. + +# MC-NOT: error: invalid variant + +# READOBJ: 0x0 R_PPC64_TPREL34 x 0x0 + +LocalExec: + paddi 3, 13, x@TPREL, 0 + blr diff --git a/llvm/test/MC/PowerPC/pcrel-tls-local-exec-value-load-reloc.s b/llvm/test/MC/PowerPC/pcrel-tls-local-exec-value-load-reloc.s new file mode 100644 index 000000000000..6ebee2ff9cff --- /dev/null +++ b/llvm/test/MC/PowerPC/pcrel-tls-local-exec-value-load-reloc.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s 2>&1 | \ +# RUN: FileCheck %s -check-prefix=MC +# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s | \ +# RUN: llvm-readobj -r - | FileCheck %s -check-prefix=READOBJ + +# This test checks that on Power PC we can correctly convert x@TPREL +# into R_PPC64_TPREL34 for local exec relocations with the value loaded. + +# MC-NOT: error: invalid variant + +# READOBJ: 0x0 R_PPC64_TPREL34 x 0x0 + +LocalExecLoad: + paddi 3, 13, x@TPREL, 0 + lwz 3, 0(3) + blr