diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index d64d8515bd9a..5367f90f7c16 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -12,6 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "MCTargetDesc/AArch64MCExpr.h" #include "AArch64.h" #include "AArch64MCInstLower.h" #include "AArch64MachineFunctionInfo.h" @@ -489,24 +491,57 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(OutStreamer, TmpInst); return; } - case AArch64::TLSDESC_BLR: { - MCOperand Callee, Sym; - MCInstLowering.lowerOperand(MI->getOperand(0), Callee); - MCInstLowering.lowerOperand(MI->getOperand(1), Sym); + case AArch64::TLSDESC_CALLSEQ: { + /// lower this to: + /// adrp x0, :tlsdesc:var + /// ldr x1, [x0, #:tlsdesc_lo12:var] + /// add x0, x0, #:tlsdesc_lo12:var + /// .tlsdesccall var + /// blr x1 + /// (TPIDR_EL0 offset now in x0) + const MachineOperand &MO_Sym = MI->getOperand(0); + MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym); + MCOperand Sym, SymTLSDescLo12, SymTLSDesc; + MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); + MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE); + MCInstLowering.lowerOperand(MO_Sym, Sym); + MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12); + MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc); - // First emit a relocation-annotation. This expands to no code, but requests + MCInst Adrp; + Adrp.setOpcode(AArch64::ADRP); + Adrp.addOperand(MCOperand::CreateReg(AArch64::X0)); + Adrp.addOperand(SymTLSDesc); + EmitToStreamer(OutStreamer, Adrp); + + MCInst Ldr; + Ldr.setOpcode(AArch64::LDRXui); + Ldr.addOperand(MCOperand::CreateReg(AArch64::X1)); + Ldr.addOperand(MCOperand::CreateReg(AArch64::X0)); + Ldr.addOperand(SymTLSDescLo12); + Ldr.addOperand(MCOperand::CreateImm(0)); + EmitToStreamer(OutStreamer, Ldr); + + MCInst Add; + Add.setOpcode(AArch64::ADDXri); + Add.addOperand(MCOperand::CreateReg(AArch64::X0)); + Add.addOperand(MCOperand::CreateReg(AArch64::X0)); + Add.addOperand(SymTLSDescLo12); + Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0))); + EmitToStreamer(OutStreamer, Add); + + // Emit a relocation-annotation. This expands to no code, but requests // the following instruction gets an R_AARCH64_TLSDESC_CALL. MCInst TLSDescCall; TLSDescCall.setOpcode(AArch64::TLSDESCCALL); TLSDescCall.addOperand(Sym); EmitToStreamer(OutStreamer, TLSDescCall); - // Other than that it's just a normal indirect call to the function loaded - // from the descriptor. - MCInst BLR; - BLR.setOpcode(AArch64::BLR); - BLR.addOperand(Callee); - EmitToStreamer(OutStreamer, BLR); + MCInst Blr; + Blr.setOpcode(AArch64::BLR); + Blr.addOperand(MCOperand::CreateReg(AArch64::X1)); + EmitToStreamer(OutStreamer, Blr); return; } diff --git a/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index 3b74481f3c8c..06ff9af37fd7 100644 --- a/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass { for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { switch (I->getOpcode()) { - case AArch64::TLSDESC_BLR: + case AArch64::TLSDESC_CALLSEQ: // Make sure it's a local dynamic access. - if (!I->getOperand(1).isSymbol() || - strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + if (!I->getOperand(0).isSymbol() || + strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_")) break; if (TLSBaseAddrReg) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index acc89fb8f710..d108c09c1625 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -64,8 +64,16 @@ EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden, static cl::opt EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, - cl::desc("Allow AArch64 SLI/SRI formation"), - cl::init(false)); + cl::desc("Allow AArch64 SLI/SRI formation"), + cl::init(false)); + +// FIXME: The necessary dtprel relocations don't seem to be supported +// well in the GNU bfd and gold linkers at the moment. Therefore, by +// default, for now, fall back to GeneralDynamic code generation. +cl::opt EnableAArch64ELFLocalDynamicTLSGeneration( + "aarch64-elf-ldtls-generation", cl::Hidden, + cl::desc("Allow AArch64 Local Dynamic TLS code generation"), + cl::init(false)); AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) @@ -752,7 +760,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG"; case AArch64ISD::CSINC: return "AArch64ISD::CSINC"; case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; - case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL"; + case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ"; case AArch64ISD::ADC: return "AArch64ISD::ADC"; case AArch64ISD::SBC: return "AArch64ISD::SBC"; case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; @@ -3027,58 +3035,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, /// When accessing thread-local variables under either the general-dynamic or /// local-dynamic system, we make a "TLS-descriptor" call. The variable will /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry -/// is a function pointer to carry out the resolution. This function takes the -/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All -/// other registers (except LR, NZCV) are preserved. +/// is a function pointer to carry out the resolution. /// -/// Thus, the ideal call sequence on AArch64 is: +/// The sequence is: +/// adrp x0, :tlsdesc:var +/// ldr x1, [x0, #:tlsdesc_lo12:var] +/// add x0, x0, #:tlsdesc_lo12:var +/// .tlsdesccall var +/// blr x1 +/// (TPIDR_EL0 offset now in x0) /// -/// adrp x0, :tlsdesc:thread_var -/// ldr x8, [x0, :tlsdesc_lo12:thread_var] -/// add x0, x0, :tlsdesc_lo12:thread_var -/// .tlsdesccall thread_var -/// blr x8 -/// (TPIDR_EL0 offset now in x0). -/// -/// The ".tlsdesccall" directive instructs the assembler to insert a particular -/// relocation to help the linker relax this sequence if it turns out to be too -/// conservative. -/// -/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this -/// is harmless. -SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr, - SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const { +/// The above sequence must be produced unscheduled, to enable the linker to +/// optimize/relax this sequence. +/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the +/// above sequence, and expanded really late in the compilation flow, to ensure +/// the sequence is produced as per above. +SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL, + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); - // The function we need to call is simply the first entry in the GOT for this - // descriptor, load it in preparation. - SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr); - - // TLS calls preserve all registers except those that absolutely must be - // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be - // silly). - const uint32_t *Mask = - Subtarget->getRegisterInfo()->getTLSCallPreservedMask(); - - // The function takes only one argument: the address of the descriptor itself - // in X0. - SDValue Glue, Chain; - Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); - Glue = Chain.getValue(1); - - // We're now ready to populate the argument list, as with a normal call: - SmallVector Ops; - Ops.push_back(Chain); - Ops.push_back(Func); - Ops.push_back(SymAddr); - Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); - Ops.push_back(DAG.getRegisterMask(Mask)); - Ops.push_back(Glue); - + SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops); - Glue = Chain.getValue(1); + + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(SymAddr); + + Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops); + SDValue Glue = Chain.getValue(1); return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); } @@ -3089,9 +3073,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, assert(Subtarget->isTargetELF() && "This function expects an ELF target"); assert(getTargetMachine().getCodeModel() == CodeModel::Small && "ELF TLS only supported in small memory model"); + // Different choices can be made for the maximum size of the TLS area for a + // module. For the small address model, the default TLS size is 16MiB and the + // maximum TLS size is 4GiB. + // FIXME: add -mtls-size command line option and make it control the 16MiB + // vs. 4GiB code sequence generation. const GlobalAddressSDNode *GA = cast(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + if (!EnableAArch64ELFLocalDynamicTLSGeneration) { + if (Model == TLSModel::LocalDynamic) + Model = TLSModel::GeneralDynamic; + } SDValue TPOff; EVT PtrVT = getPointerTy(); @@ -3102,17 +3095,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, if (Model == TLSModel::LocalExec) { SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); SDValue LoVar = DAG.getTargetGlobalAddress( GV, DL, PtrVT, 0, - AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); + AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); + SDValue TPWithOff_lo = + SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, + HiVar, DAG.getTargetConstant(0, MVT::i32)), + 0); + SDValue TPWithOff = + SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo, + LoVar, DAG.getTargetConstant(0, MVT::i32)), + 0); + return TPWithOff; } else if (Model == TLSModel::InitialExec) { TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff); @@ -3127,19 +3123,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, DAG.getMachineFunction().getInfo(); MFI->incNumLocalDynamicTLSAccesses(); - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, - AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. @@ -3148,40 +3131,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, // Now we can calculate the offset from TPIDR_EL0 to this module's // thread-local area. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); + TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); // Now use :dtprel_whatever: operations to calculate this variable's offset // in its thread-storage area. SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); SDValue LoVar = DAG.getTargetGlobalAddress( GV, DL, MVT::i64, 0, - AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); - - SDValue DTPOff = - SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - DTPOff = - SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); - - TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff); - } else if (Model == TLSModel::GeneralDynamic) { - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - + TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); + } else if (Model == TLSModel::GeneralDynamic) { // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. @@ -3189,7 +3155,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); // Finally we can make a call to calculate the offset from tpidr_el0. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); + TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); } else llvm_unreachable("Unsupported ELF TLS access model"); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index e973364711c0..4ad859184402 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -30,9 +30,9 @@ enum { WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. CALL, // Function call. - // Almost the same as a normal call node, except that a TLSDesc relocation is - // needed so the linker can relax it correctly if possible. - TLSDESC_CALL, + // Produces the full sequence of instructions for getting the thread pointer + // offset of a variable into X0, using the TLSDesc model. + TLSDESC_CALLSEQ, ADRP, // Page address of a TargetGlobalAddress operand. ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. LOADgot, // Load from automatically generated descriptor (e.g. Global @@ -399,8 +399,8 @@ private: SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const; + SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL, + SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6e4c0b006f28..48cb8f933785 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -96,6 +96,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; + +// Generates the general dynamic sequences, i.e. +// adrp x0, :tlsdesc:var +// ldr x1, [x0, #:tlsdesc_lo12:var] +// add x0, x0, #:tlsdesc_lo12:var +// .tlsdesccall var +// blr x1 + +// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) +// number of operands (the variable) +def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, + [SDTCisPtrTy<0>]>; + def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, @@ -229,10 +242,11 @@ def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; -def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL", - SDT_AArch64TLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; +def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", + SDT_AArch64TLSDescCallSeq, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; + def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", SDT_AArch64WrapperLarge>; @@ -1049,15 +1063,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { let AsmString = ".tlsdesccall $sym"; } -// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It -// gets expanded to two MCInsts during lowering. -let isCall = 1, Defs = [LR] in -def TLSDESC_BLR - : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym), - [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; +// FIXME: maybe the scratch register used shouldn't be fixed to X1? +// FIXME: can "hasSideEffects be dropped? +let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, + isCodeGenOnly = 1 in +def TLSDESC_CALLSEQ + : Pseudo<(outs), (ins i64imm:$sym), + [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>; +def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), + (TLSDESC_CALLSEQ texternalsym:$sym)>; -def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym), - (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>; //===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp index e57b0f4dbb09..b82934134d91 100644 --- a/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -22,9 +22,12 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; +extern cl::opt EnableAArch64ELFLocalDynamicTLSGeneration; + AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer) : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {} @@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); Model = Printer.TM.getTLSModel(GV); + if (!EnableAArch64ELFLocalDynamicTLSGeneration && + Model == TLSModel::LocalDynamic) + Model = TLSModel::GeneralDynamic; + } else { assert(MO.isSymbol() && StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" && "unexpected external TLS symbol"); + // The general dynamic access sequence is used to get the + // address of _TLS_MODULE_BASE_. Model = TLSModel::GeneralDynamic; } switch (Model) { @@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, RefFlags |= AArch64MCExpr::VK_G1; else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0) RefFlags |= AArch64MCExpr::VK_G0; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12) + RefFlags |= AArch64MCExpr::VK_HI12; if (MO.getTargetFlags() & AArch64II::MO_NC) RefFlags |= AArch64MCExpr::VK_NC; diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index c60b09a5f119..6d0337ce15ed 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -1229,7 +1229,7 @@ namespace AArch64II { MO_NO_FLAG, - MO_FRAGMENT = 0x7, + MO_FRAGMENT = 0xf, /// MO_PAGE - A symbol operand with this flag represents the pc-relative /// offset of the 4K page containing the symbol. This is used with the @@ -1257,26 +1257,31 @@ namespace AArch64II { /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction MO_G0 = 6, + /// MO_HI12 - This flag indicates that a symbol operand represents the bits + /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left- + /// by-12-bits instruction. + MO_HI12 = 7, + /// MO_GOT - This flag indicates that a symbol operand represents the /// address of the GOT entry for the symbol, rather than the address of /// the symbol itself. - MO_GOT = 8, + MO_GOT = 0x10, /// MO_NC - Indicates whether the linker is expected to check the symbol /// reference for overflow. For example in an ADRP/ADD pair of relocations /// the ADRP usually does check, but not the ADD. - MO_NC = 0x10, + MO_NC = 0x20, /// MO_TLS - Indicates that the operand being accessed is some kind of /// thread-local symbol. On Darwin, only one type of thread-local access /// exists (pre linker-relaxation), but on ELF the TLSModel used for the /// referee will affect interpretation. - MO_TLS = 0x20, + MO_TLS = 0x40, /// MO_CONSTPOOL - This flag indicates that a symbol operand represents /// the address of a constant pool entry for the symbol, rather than the /// address of the symbol itself. - MO_CONSTPOOL = 0x40 + MO_CONSTPOOL = 0x80 }; } // end namespace AArch64II diff --git a/llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll index 8a9dfd25ced3..88700a153437 100644 --- a/llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll +++ b/llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll @@ -1,5 +1,7 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOLD %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-NOLD-RELOC %s @general_dynamic_var = external thread_local global i32 @@ -9,22 +11,34 @@ define i32 @test_generaldynamic() { %val = load i32, i32* @general_dynamic_var ret i32 %val - ; FIXME: the adrp instructions are redundant (if harmless). -; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] -; CHECK: .tlsdesccall general_dynamic_var +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var +; CHECK-NEXT: .tlsdesccall general_dynamic_var ; CHECK-NEXT: blr [[CALLEE]] +; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var +; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] +; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var +; CHECK-NOLD-NEXT: .tlsdesccall general_dynamic_var +; CHECK-NOLD-NEXT: blr [[CALLEE]] + + ; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0 ; CHECK: ldr w0, [x[[TP]], x0] +; CHECK-NOLD: mrs x[[TP:[0-9]+]], TPIDR_EL0 +; CHECK-NOLD: ldr w0, [x[[TP]], x0] ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL + } define i32* @test_generaldynamic_addr() { @@ -32,21 +46,25 @@ define i32* @test_generaldynamic_addr() { ret i32* @general_dynamic_var - ; FIXME: the adrp instructions are redundant (if harmless). -; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] -; CHECK: .tlsdesccall general_dynamic_var +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var +; CHECK-NEXT: .tlsdesccall general_dynamic_var ; CHECK-NEXT: blr [[CALLEE]] ; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0 ; CHECK: add x0, [[TP]], x0 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL + +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL + } @local_dynamic_var = external thread_local(localdynamic) global i32 @@ -58,54 +76,71 @@ define i32 @test_localdynamic() { ret i32 %val ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ +; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_ ; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var -; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var - -; CHECK: add x[[TPREL:[0-9]+]], x0, [[DTP_OFFSET]] - +; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var +; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var ; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK: ldr w0, [x[[TPIDR]], x[[TPOFF]]] + +; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var +; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var] +; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var +; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var +; CHECK-NOLD-NEXT: blr [[CALLEE]] +; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK-NOLD: ldr w0, [x[[TPIDR]], x0] -; CHECK: ldr w0, [x[[TPIDR]], x[[TPREL]]] ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC + +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL } define i32* @test_localdynamic_addr() { ; CHECK-LABEL: test_localdynamic_addr: +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ +; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK-NEXT: blr [[CALLEE]] +; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var +; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var +; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK: add x0, x[[TPIDR]], x[[TPOFF]] + +; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var +; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var] +; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var +; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var +; CHECK-NOLD-NEXT: blr [[CALLEE]] +; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK-NOLD: add x0, x[[TPIDR]], x0 ret i32* @local_dynamic_var -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var -; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var - -; CHECK: add [[TPREL:x[0-9]+]], x0, [[DTP_OFFSET]] - -; CHECK: mrs [[TPIDR:x[0-9]+]], TPIDR_EL0 - -; CHECK: add x0, [[TPIDR]], [[TPREL]] - ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL } ; The entire point of the local-dynamic access model is to have a single call to @@ -122,11 +157,10 @@ define i32 @test_localdynamic_deduplicate() { %sum = add i32 %val, %val2 ret i32 %sum -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK: adrp x[[DTPREL_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[DTPREL_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE +; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_ ; CHECK-NEXT: blr [[CALLEE]] ; CHECK-NOT: _TLS_MODULE_BASE_ diff --git a/llvm/test/CodeGen/AArch64/arm64-tls-execs.ll b/llvm/test/CodeGen/AArch64/arm64-tls-execs.ll index 38c30b4831de..deced6988815 100644 --- a/llvm/test/CodeGen/AArch64/arm64-tls-execs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-tls-execs.ll @@ -38,14 +38,13 @@ define i32 @test_local_exec() { ; CHECK-LABEL: test_local_exec: %val = load i32, i32* @local_exec_var -; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [0bAAA{{[01]+}},A,0b101AAAAA,0x92] -; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var -; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0 -; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]] - -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC +; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var +; CHECK: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var +; CHECK: ldr w0, [x[[R3]]] +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC ret i32 %val } @@ -53,11 +52,11 @@ define i32* @test_local_exec_addr() { ; CHECK-LABEL: test_local_exec_addr: ret i32* @local_exec_var -; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var -; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var -; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0 -; CHECK: add x0, [[TP]], [[TP_OFFSET]] +; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var +; CHECK: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var +; CHECK: ret -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC }