forked from OSchip/llvm-project
Implement the local-dynamic TLS model for x86 (PR3985)
This implements codegen support for accesses to thread-local variables using the local-dynamic model, and adds a clean-up pass so that the base address for the TLS block can be re-used between local-dynamic access on an execution path. llvm-svn: 157818
This commit is contained in:
parent
dc191a184f
commit
789acfb63d
|
@ -100,6 +100,26 @@ namespace X86II {
|
|||
/// SYMBOL_LABEL @TLSGD
|
||||
MO_TLSGD,
|
||||
|
||||
/// MO_TLSLD - On a symbol operand this indicates that the immediate is
|
||||
/// the offset of the GOT entry with the TLS index for the module that
|
||||
/// contains the symbol. When this index is passed to a call to to
|
||||
/// __tls_get_addr, the function will return the base address of the TLS
|
||||
/// block for the symbol.
|
||||
///
|
||||
/// See 'ELF Handling for Thread-Local Storage' for more details.
|
||||
/// SYMBOL_LABEL @TLSLD
|
||||
MO_TLSLD,
|
||||
|
||||
/// MO_TLSLDM - On a symbol operand this indicates that the immediate is
|
||||
/// the offset of the GOT entry with the TLS index for the module that
|
||||
/// contains the symbol. When this index is passed to a call to to
|
||||
/// ___tls_get_addr, the function will return the base address of the TLS
|
||||
/// block for the symbol.
|
||||
///
|
||||
/// See 'ELF Handling for Thread-Local Storage' for more details.
|
||||
/// SYMBOL_LABEL @TLSLDM
|
||||
MO_TLSLDM,
|
||||
|
||||
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
|
||||
/// some TLS offset.
|
||||
///
|
||||
|
@ -121,6 +141,13 @@ namespace X86II {
|
|||
/// SYMBOL_LABEL @TPOFF
|
||||
MO_TPOFF,
|
||||
|
||||
/// MO_DTPOFF - On a symbol operand this indicates that the immediate is
|
||||
/// the offset of the GOT entry with the TLS offset of the symbol.
|
||||
///
|
||||
/// See 'ELF Handling for Thread-Local Storage' for more details.
|
||||
/// SYMBOL_LABEL @DTPOFF
|
||||
MO_DTPOFF,
|
||||
|
||||
/// MO_NTPOFF - On a symbol operand this indicates that the immediate is
|
||||
/// some TLS offset.
|
||||
///
|
||||
|
|
|
@ -36,6 +36,11 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
|
|||
/// register for PIC on x86-32.
|
||||
FunctionPass* createGlobalBaseRegPass();
|
||||
|
||||
/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses
|
||||
/// to local-dynamic TLS variables so that the TLS base address for the module
|
||||
/// is only fetched once per execution path through the function.
|
||||
FunctionPass *createCleanupLocalDynamicTLSPass();
|
||||
|
||||
/// createX86FloatingPointStackifierPass - This function returns a pass which
|
||||
/// converts floating point register references and pseudo instructions into
|
||||
/// floating point stack references and physical instructions.
|
||||
|
|
|
@ -186,9 +186,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
|
|||
O << '-' << *MF->getPICBaseSymbol();
|
||||
break;
|
||||
case X86II::MO_TLSGD: O << "@TLSGD"; break;
|
||||
case X86II::MO_TLSLD: O << "@TLSLD"; break;
|
||||
case X86II::MO_TLSLDM: O << "@TLSLDM"; break;
|
||||
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
|
||||
case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
|
||||
case X86II::MO_TPOFF: O << "@TPOFF"; break;
|
||||
case X86II::MO_DTPOFF: O << "@DTPOFF"; break;
|
||||
case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
|
||||
case X86II::MO_GOTNTPOFF: O << "@GOTNTPOFF"; break;
|
||||
case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
|
||||
|
|
|
@ -7263,7 +7263,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
|
|||
static SDValue
|
||||
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
|
||||
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
|
||||
unsigned char OperandFlags) {
|
||||
unsigned char OperandFlags, bool LocalDynamic = false) {
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
DebugLoc dl = GA->getDebugLoc();
|
||||
|
@ -7271,12 +7271,16 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
|
|||
GA->getValueType(0),
|
||||
GA->getOffset(),
|
||||
OperandFlags);
|
||||
|
||||
X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
|
||||
: X86ISD::TLSADDR;
|
||||
|
||||
if (InFlag) {
|
||||
SDValue Ops[] = { Chain, TGA, *InFlag };
|
||||
Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
|
||||
Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
|
||||
} else {
|
||||
SDValue Ops[] = { Chain, TGA };
|
||||
Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
|
||||
Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
|
||||
}
|
||||
|
||||
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
|
||||
|
@ -7308,6 +7312,45 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
|
|||
X86::RAX, X86II::MO_TLSGD);
|
||||
}
|
||||
|
||||
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
|
||||
SelectionDAG &DAG,
|
||||
const EVT PtrVT,
|
||||
bool is64Bit) {
|
||||
DebugLoc dl = GA->getDebugLoc();
|
||||
|
||||
// Get the start address of the TLS block for this module.
|
||||
X86MachineFunctionInfo* MFI = DAG.getMachineFunction()
|
||||
.getInfo<X86MachineFunctionInfo>();
|
||||
MFI->incNumLocalDynamicTLSAccesses();
|
||||
|
||||
SDValue Base;
|
||||
if (is64Bit) {
|
||||
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX,
|
||||
X86II::MO_TLSLD, /*LocalDynamic=*/true);
|
||||
} else {
|
||||
SDValue InFlag;
|
||||
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
|
||||
DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT), InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
|
||||
X86II::MO_TLSLDM, /*LocalDynamic=*/true);
|
||||
}
|
||||
|
||||
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
|
||||
// of Base.
|
||||
|
||||
// Build x@dtpoff.
|
||||
unsigned char OperandFlags = X86II::MO_DTPOFF;
|
||||
unsigned WrapperKind = X86ISD::Wrapper;
|
||||
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
|
||||
GA->getValueType(0),
|
||||
GA->getOffset(), OperandFlags);
|
||||
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
|
||||
|
||||
// Add x@dtpoff with the base.
|
||||
return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
|
||||
}
|
||||
|
||||
// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
|
||||
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
|
||||
const EVT PtrVT, TLSModel::Model model,
|
||||
|
@ -7372,8 +7415,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
|
|||
const GlobalValue *GV = GA->getGlobal();
|
||||
|
||||
if (Subtarget->isTargetELF()) {
|
||||
// TODO: implement the "local dynamic" model
|
||||
|
||||
// If GV is an alias then use the aliasee for determining
|
||||
// thread-localness.
|
||||
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
|
||||
|
@ -7383,11 +7424,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
|
|||
|
||||
switch (model) {
|
||||
case TLSModel::GeneralDynamic:
|
||||
case TLSModel::LocalDynamic: // not implemented
|
||||
if (Subtarget->is64Bit())
|
||||
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
|
||||
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
|
||||
|
||||
case TLSModel::LocalDynamic:
|
||||
return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(),
|
||||
Subtarget->is64Bit());
|
||||
case TLSModel::InitialExec:
|
||||
case TLSModel::LocalExec:
|
||||
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
|
||||
|
@ -11257,6 +11299,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
|
||||
case X86ISD::FRCP: return "X86ISD::FRCP";
|
||||
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
|
||||
case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
|
||||
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
|
||||
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
|
||||
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
|
||||
|
|
|
@ -207,6 +207,10 @@ namespace llvm {
|
|||
// TLSADDR - Thread Local Storage.
|
||||
TLSADDR,
|
||||
|
||||
// TLSBASEADDR - Thread Local Storage. A call to get the start address
|
||||
// of the TLS block for the current module.
|
||||
TLSBASEADDR,
|
||||
|
||||
// TLSCALL - Thread Local Storage. When calling to an OS provided
|
||||
// thunk at the address from an earlier relocation.
|
||||
TLSCALL,
|
||||
|
|
|
@ -375,11 +375,16 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
|
|||
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
|
||||
Uses = [ESP] in
|
||||
Uses = [ESP] in {
|
||||
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
|
||||
"# TLS_addr32",
|
||||
[(X86tlsaddr tls32addr:$sym)]>,
|
||||
Requires<[In32BitMode]>;
|
||||
def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
|
||||
"# TLS_base_addr32",
|
||||
[(X86tlsbaseaddr tls32baseaddr:$sym)]>,
|
||||
Requires<[In32BitMode]>;
|
||||
}
|
||||
|
||||
// All calls clobber the non-callee saved registers. RSP is marked as
|
||||
// a use to prevent stack-pointer assignments that appear immediately
|
||||
|
@ -389,11 +394,16 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
|
|||
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
|
||||
Uses = [RSP] in
|
||||
Uses = [RSP] in {
|
||||
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
|
||||
"# TLS_addr64",
|
||||
[(X86tlsaddr tls64addr:$sym)]>,
|
||||
Requires<[In64BitMode]>;
|
||||
def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
|
||||
"# TLS_base_addr64",
|
||||
[(X86tlsbaseaddr tls64baseaddr:$sym)]>,
|
||||
Requires<[In64BitMode]>;
|
||||
}
|
||||
|
||||
// Darwin TLS Support
|
||||
// For i386, the address of the thunk is passed on the stack, on return the
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "llvm/LLVMContext.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/CodeGen/MachineConstantPool.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
@ -3990,9 +3991,126 @@ namespace {
|
|||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned BaseReg;
|
||||
};
|
||||
}
|
||||
|
||||
char CGBR::ID = 0;
|
||||
FunctionPass*
|
||||
llvm::createGlobalBaseRegPass() { return new CGBR(); }
|
||||
|
||||
namespace {
|
||||
struct LDTLSCleanup : public MachineFunctionPass {
|
||||
static char ID;
|
||||
LDTLSCleanup() : MachineFunctionPass(ID) {}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF) {
|
||||
X86MachineFunctionInfo* MFI = MF.getInfo<X86MachineFunctionInfo>();
|
||||
if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
|
||||
// No point folding accesses if there isn't at least two.
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
|
||||
return VisitNode(DT->getRootNode(), 0);
|
||||
}
|
||||
|
||||
// Visit the dominator subtree rooted at Node in pre-order.
|
||||
// If TLSBaseAddrReg is non-null, then use that to replace any
|
||||
// TLS_base_addr instructions. Otherwise, create the register
|
||||
// when the first such instruction is seen, and then use it
|
||||
// as we encounter more instructions.
|
||||
bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
|
||||
MachineBasicBlock *BB = Node->getBlock();
|
||||
bool Changed = false;
|
||||
|
||||
// Traverse the current block.
|
||||
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
|
||||
++I) {
|
||||
switch (I->getOpcode()) {
|
||||
case X86::TLS_base_addr32:
|
||||
case X86::TLS_base_addr64:
|
||||
if (TLSBaseAddrReg)
|
||||
I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
|
||||
else
|
||||
I = SetRegister(I, &TLSBaseAddrReg);
|
||||
Changed = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Visit the children of this block in the dominator tree.
|
||||
for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
|
||||
I != E; ++I) {
|
||||
Changed |= VisitNode(*I, TLSBaseAddrReg);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// Replace the TLS_base_addr instruction I with a copy from
|
||||
// TLSBaseAddrReg, returning the new instruction.
|
||||
MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
|
||||
unsigned TLSBaseAddrReg) {
|
||||
MachineFunction *MF = I->getParent()->getParent();
|
||||
const X86TargetMachine *TM =
|
||||
static_cast<const X86TargetMachine *>(&MF->getTarget());
|
||||
const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
|
||||
const X86InstrInfo *TII = TM->getInstrInfo();
|
||||
|
||||
// Insert a Copy from TLSBaseAddrReg to RAX/EAX.
|
||||
MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
|
||||
TII->get(TargetOpcode::COPY),
|
||||
is64Bit ? X86::RAX : X86::EAX)
|
||||
.addReg(TLSBaseAddrReg);
|
||||
|
||||
// Erase the TLS_base_addr instruction.
|
||||
I->eraseFromParent();
|
||||
|
||||
return Copy;
|
||||
}
|
||||
|
||||
// Create a virtal register in *TLSBaseAddrReg, and populate it by
|
||||
// inserting a copy instruction after I. Returns the new instruction.
|
||||
MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
|
||||
MachineFunction *MF = I->getParent()->getParent();
|
||||
const X86TargetMachine *TM =
|
||||
static_cast<const X86TargetMachine *>(&MF->getTarget());
|
||||
const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
|
||||
const X86InstrInfo *TII = TM->getInstrInfo();
|
||||
|
||||
// Create a virtual register for the TLS base address.
|
||||
MachineRegisterInfo &RegInfo = MF->getRegInfo();
|
||||
*TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
|
||||
? &X86::GR64RegClass
|
||||
: &X86::GR32RegClass);
|
||||
|
||||
// Insert a copy from RAX/EAX to TLSBaseAddrReg.
|
||||
MachineInstr *Next = I->getNextNode();
|
||||
MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
|
||||
TII->get(TargetOpcode::COPY),
|
||||
*TLSBaseAddrReg)
|
||||
.addReg(is64Bit ? X86::RAX : X86::EAX);
|
||||
|
||||
return Copy;
|
||||
}
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "Local Dynamic TLS Access Clean-up";
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
char LDTLSCleanup::ID = 0;
|
||||
FunctionPass*
|
||||
llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
|
||||
|
|
|
@ -97,6 +97,8 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
|
|||
|
||||
def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
|
||||
|
@ -203,6 +205,9 @@ def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
|
|||
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
|
||||
def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
|
||||
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
|
@ -492,6 +497,9 @@ def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
|
|||
def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
|
||||
[tglobaltlsaddr], []>;
|
||||
|
||||
def tls32baseaddr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
|
||||
[tglobaltlsaddr], []>;
|
||||
|
||||
def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
|
||||
[add, sub, mul, X86mul_imm, shl, or, frameindex,
|
||||
X86WrapperRIP], []>;
|
||||
|
@ -499,6 +507,9 @@ def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
|
|||
def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
|
||||
[tglobaltlsaddr], []>;
|
||||
|
||||
def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
|
||||
[tglobaltlsaddr], []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Predicate Definitions.
|
||||
def HasCMov : Predicate<"Subtarget->hasCMov()">;
|
||||
|
|
|
@ -156,9 +156,12 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
|
|||
break;
|
||||
case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break;
|
||||
case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
|
||||
case X86II::MO_TLSLD: RefKind = MCSymbolRefExpr::VK_TLSLD; break;
|
||||
case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break;
|
||||
case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
|
||||
case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
|
||||
case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break;
|
||||
case X86II::MO_DTPOFF: RefKind = MCSymbolRefExpr::VK_DTPOFF; break;
|
||||
case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
|
||||
case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break;
|
||||
case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
|
||||
|
@ -551,17 +554,38 @@ ReSimplify:
|
|||
static void LowerTlsAddr(MCStreamer &OutStreamer,
|
||||
X86MCInstLower &MCInstLowering,
|
||||
const MachineInstr &MI) {
|
||||
bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
|
||||
|
||||
bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
|
||||
MI.getOpcode() == X86::TLS_base_addr64;
|
||||
|
||||
bool needsPadding = MI.getOpcode() == X86::TLS_addr64;
|
||||
|
||||
MCContext &context = OutStreamer.getContext();
|
||||
|
||||
if (is64Bits) {
|
||||
if (needsPadding) {
|
||||
MCInst prefix;
|
||||
prefix.setOpcode(X86::DATA16_PREFIX);
|
||||
OutStreamer.EmitInstruction(prefix);
|
||||
}
|
||||
|
||||
MCSymbolRefExpr::VariantKind SRVK;
|
||||
switch (MI.getOpcode()) {
|
||||
case X86::TLS_addr32:
|
||||
case X86::TLS_addr64:
|
||||
SRVK = MCSymbolRefExpr::VK_TLSGD;
|
||||
break;
|
||||
case X86::TLS_base_addr32:
|
||||
SRVK = MCSymbolRefExpr::VK_TLSLDM;
|
||||
break;
|
||||
case X86::TLS_base_addr64:
|
||||
SRVK = MCSymbolRefExpr::VK_TLSLD;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unexpected opcode");
|
||||
}
|
||||
|
||||
MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
|
||||
const MCSymbolRefExpr *symRef =
|
||||
MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
|
||||
const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context);
|
||||
|
||||
MCInst LEA;
|
||||
if (is64Bits) {
|
||||
|
@ -583,7 +607,7 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
|
|||
}
|
||||
OutStreamer.EmitInstruction(LEA);
|
||||
|
||||
if (is64Bits) {
|
||||
if (needsPadding) {
|
||||
MCInst prefix;
|
||||
prefix.setOpcode(X86::DATA16_PREFIX);
|
||||
OutStreamer.EmitInstruction(prefix);
|
||||
|
@ -645,6 +669,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
|||
|
||||
case X86::TLS_addr32:
|
||||
case X86::TLS_addr64:
|
||||
case X86::TLS_base_addr32:
|
||||
case X86::TLS_base_addr64:
|
||||
return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
|
||||
|
||||
case X86::MOVPC32r: {
|
||||
|
@ -714,4 +740,3 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
|||
MCInstLowering.Lower(MI, TmpInst);
|
||||
OutStreamer.EmitInstruction(TmpInst);
|
||||
}
|
||||
|
||||
|
|
|
@ -66,6 +66,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
|||
/// ArgumentStackSize - The number of bytes on stack consumed by the arguments
|
||||
/// being passed on the stack.
|
||||
unsigned ArgumentStackSize;
|
||||
/// NumLocalDynamics - Number of local-dynamic TLS accesses.
|
||||
unsigned NumLocalDynamics;
|
||||
|
||||
public:
|
||||
X86MachineFunctionInfo() : ForceFramePointer(false),
|
||||
|
@ -79,7 +81,8 @@ public:
|
|||
RegSaveFrameIndex(0),
|
||||
VarArgsGPOffset(0),
|
||||
VarArgsFPOffset(0),
|
||||
ArgumentStackSize(0) {}
|
||||
ArgumentStackSize(0),
|
||||
NumLocalDynamics(0) {}
|
||||
|
||||
explicit X86MachineFunctionInfo(MachineFunction &MF)
|
||||
: ForceFramePointer(false),
|
||||
|
@ -93,7 +96,8 @@ public:
|
|||
RegSaveFrameIndex(0),
|
||||
VarArgsGPOffset(0),
|
||||
VarArgsFPOffset(0),
|
||||
ArgumentStackSize(0) {}
|
||||
ArgumentStackSize(0),
|
||||
NumLocalDynamics(0) {}
|
||||
|
||||
bool getForceFramePointer() const { return ForceFramePointer;}
|
||||
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
|
||||
|
@ -130,6 +134,9 @@ public:
|
|||
|
||||
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
|
||||
void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
|
||||
|
||||
unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
|
||||
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
|
|
@ -147,6 +147,10 @@ bool X86PassConfig::addInstSelector() {
|
|||
// Install an instruction selector.
|
||||
PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
|
||||
|
||||
// For ELF, cleanup any local-dynamic TLS accesses.
|
||||
if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
|
||||
PM->add(createCleanupLocalDynamicTLSPass());
|
||||
|
||||
// For 32-bit, prepend instructions to set the "global base reg" for PIC.
|
||||
if (!getX86Subtarget().is64Bit())
|
||||
PM->add(createGlobalBaseRegPass());
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck %s
|
||||
|
||||
@x = internal thread_local global i32 0, align 4
|
||||
@y = internal thread_local global i32 0, align 4
|
||||
|
||||
; get_x and get_y are here to prevent x and y to be optimized away as 0
|
||||
|
||||
define i32* @get_x() {
|
||||
entry:
|
||||
ret i32* @x
|
||||
; FIXME: This function uses a single thread-local variable,
|
||||
; so we might want to fall back to general-dynamic here.
|
||||
; CHECK: get_x:
|
||||
; CHECK: leaq x@TLSLD(%rip), %rdi
|
||||
; CHECK-NEXT: callq __tls_get_addr@PLT
|
||||
; CHECK: x@DTPOFF
|
||||
}
|
||||
|
||||
define i32* @get_y() {
|
||||
entry:
|
||||
ret i32* @y
|
||||
}
|
||||
|
||||
define i32 @f(i32 %i) {
|
||||
entry:
|
||||
%cmp = icmp eq i32 %i, 1
|
||||
br i1 %cmp, label %return, label %if.else
|
||||
; This bb does not access TLS, so should not call __tls_get_addr.
|
||||
; CHECK: f:
|
||||
; CHECK-NOT: __tls_get_addr
|
||||
; CHECK: je
|
||||
|
||||
|
||||
if.else:
|
||||
%0 = load i32* @x, align 4
|
||||
%cmp1 = icmp eq i32 %i, 2
|
||||
br i1 %cmp1, label %if.then2, label %return
|
||||
; Now we call __tls_get_addr.
|
||||
; CHECK: # %if.else
|
||||
; CHECK: leaq x@TLSLD(%rip), %rdi
|
||||
; CHECK-NEXT: callq __tls_get_addr@PLT
|
||||
; CHECK: x@DTPOFF
|
||||
|
||||
|
||||
if.then2:
|
||||
%1 = load i32* @y, align 4
|
||||
%add = add nsw i32 %1, %0
|
||||
br label %return
|
||||
; This accesses TLS, but is dominated by the previous block,
|
||||
; so should not have to call __tls_get_addr again.
|
||||
; CHECK: # %if.then2
|
||||
; CHECK-NOT: __tls_get_addr
|
||||
; CHECK: y@DTPOFF
|
||||
|
||||
|
||||
return:
|
||||
%retval.0 = phi i32 [ %add, %if.then2 ], [ 5, %entry ], [ %0, %if.else ]
|
||||
ret i32 %retval.0
|
||||
}
|
|
@ -2,6 +2,8 @@
|
|||
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
|
||||
|
||||
@i = thread_local global i32 15
|
||||
@j = internal thread_local global i32 42
|
||||
@k = internal thread_local global i32 42
|
||||
|
||||
define i32 @f1() {
|
||||
entry:
|
||||
|
@ -64,4 +66,22 @@ entry:
|
|||
; X64: callq __tls_get_addr@PLT
|
||||
|
||||
|
||||
define i32 @f5() nounwind {
|
||||
entry:
|
||||
%0 = load i32* @j, align 4
|
||||
%1 = load i32* @k, align 4
|
||||
%add = add nsw i32 %0, %1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; X32: f5:
|
||||
; X32: leal {{[jk]}}@TLSLDM
|
||||
; X32-NEXT: calll ___tls_get_addr@PLT
|
||||
; X32-NEXT: movl {{[jk]}}@DTPOFF(%eax)
|
||||
; X32-NEXT: addl {{[jk]}}@DTPOFF(%eax)
|
||||
|
||||
; X64: f5:
|
||||
; X64: leaq {{[jk]}}@TLSLD(%rip), %rdi
|
||||
; X64-NEXT: callq __tls_get_addr@PLT
|
||||
; X64-NEXT: movl {{[jk]}}@DTPOFF(%rax)
|
||||
; X64-NEXT: addl {{[jk]}}@DTPOFF(%rax)
|
||||
|
|
Loading…
Reference in New Issue