forked from OSchip/llvm-project
Revert "r227976 - [PowerPC] Yet another approach to __tls_get_addr" and related fixups
Unfortunately, even with the workaround of disabling the linker TLS optimizations in Clang restored (which has already been done), this still breaks self-hosting on my P7 machine (-O3 -DNDEBUG -mcpu=native). Bill is currently working on an alternate implementation to address the TLS issue in a way that also fully elides the linker bug (which, unfortunately, this approach did not fully), so I'm reverting this now. llvm-svn: 228460
This commit is contained in:
parent
ed218a0da0
commit
0d2a1515d5
|
@ -32,7 +32,6 @@ add_llvm_target(PowerPCCodeGen
|
|||
PPCTargetObjectFile.cpp
|
||||
PPCTargetTransformInfo.cpp
|
||||
PPCSelectionDAGInfo.cpp
|
||||
PPCTLSDynamicCall.cpp
|
||||
PPCVSXCopy.cpp
|
||||
PPCVSXFMAMutate.cpp
|
||||
)
|
||||
|
|
|
@ -40,7 +40,6 @@ namespace llvm {
|
|||
FunctionPass *createPPCVSXFMAMutatePass();
|
||||
FunctionPass *createPPCBranchSelectionPass();
|
||||
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
|
||||
FunctionPass *createPPCTLSDynamicCallPass();
|
||||
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
|
||||
AsmPrinter &AP, bool isDarwin);
|
||||
|
||||
|
@ -91,7 +90,12 @@ namespace llvm {
|
|||
MO_TOC_LO = 7 << 4,
|
||||
|
||||
// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
|
||||
MO_TLS = 8 << 4
|
||||
MO_TLS = 8 << 4,
|
||||
|
||||
// Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
|
||||
// call sequences.
|
||||
MO_TLSLD = 9 << 4,
|
||||
MO_TLSGD = 10 << 4
|
||||
};
|
||||
} // end namespace PPCII
|
||||
|
||||
|
|
|
@ -101,7 +101,6 @@ namespace {
|
|||
const MachineInstr &MI);
|
||||
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
|
||||
const MachineInstr &MI);
|
||||
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
|
||||
};
|
||||
|
||||
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
|
||||
|
@ -405,39 +404,6 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
|
|||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
|
||||
}
|
||||
|
||||
/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
|
||||
/// call to __tls_get_addr to the current output stream.
|
||||
void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
|
||||
MCSymbolRefExpr::VariantKind VK) {
|
||||
StringRef Name = "__tls_get_addr";
|
||||
MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
|
||||
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
|
||||
|
||||
assert(MI->getOperand(0).isReg() &&
|
||||
((Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
|
||||
(!Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::R3)) &&
|
||||
"GETtls[ld]ADDR[32] must define GPR3");
|
||||
assert(MI->getOperand(1).isReg() &&
|
||||
((Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::X3) ||
|
||||
(!Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
|
||||
"GETtls[ld]ADDR[32] must read GPR3");
|
||||
|
||||
if (!Subtarget.isPPC64() && !Subtarget.isDarwin() &&
|
||||
TM.getRelocationModel() == Reloc::PIC_)
|
||||
Kind = MCSymbolRefExpr::VK_PLT;
|
||||
const MCSymbolRefExpr *TlsRef =
|
||||
MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext);
|
||||
const MachineOperand &MO = MI->getOperand(2);
|
||||
const GlobalValue *GValue = MO.getGlobal();
|
||||
MCSymbol *MOSymbol = getSymbol(GValue);
|
||||
const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext);
|
||||
EmitToStreamer(OutStreamer,
|
||||
MCInstBuilder(Subtarget.isPPC64() ?
|
||||
PPC::BL8_NOP_TLS : PPC::BL_TLS)
|
||||
.addExpr(TlsRef)
|
||||
.addExpr(SymVar));
|
||||
}
|
||||
|
||||
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
|
||||
/// the current output stream.
|
||||
///
|
||||
|
@ -841,15 +807,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
|||
.addExpr(SymGotTlsGD));
|
||||
return;
|
||||
}
|
||||
case PPC::GETtlsADDR:
|
||||
// Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
|
||||
// Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd)
|
||||
case PPC::GETtlsADDR32: {
|
||||
// Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym>
|
||||
// Into: BL_TLS __tls_get_addr(sym@tlsgd)@PLT
|
||||
EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
|
||||
return;
|
||||
}
|
||||
case PPC::ADDIStlsldHA: {
|
||||
// Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
|
||||
// Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
|
||||
|
@ -887,15 +844,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
|||
.addExpr(SymGotTlsLD));
|
||||
return;
|
||||
}
|
||||
case PPC::GETtlsldADDR:
|
||||
// Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
|
||||
// Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld)
|
||||
case PPC::GETtlsldADDR32: {
|
||||
// Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym>
|
||||
// Into: BL_TLS __tls_get_addr(sym@tlsld)@PLT
|
||||
EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD);
|
||||
return;
|
||||
}
|
||||
case PPC::ADDISdtprelHA:
|
||||
// Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
|
||||
// Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
|
||||
|
|
|
@ -355,20 +355,6 @@ static bool hasNonRISpills(const MachineFunction &MF) {
|
|||
return FuncInfo->hasNonRISpills();
|
||||
}
|
||||
|
||||
/// MustSaveLR - Return true if this function requires that we save the LR
|
||||
/// register onto the stack in the prolog and restore it in the epilog of the
|
||||
/// function.
|
||||
static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
|
||||
const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
|
||||
|
||||
// We need a save/restore of LR if there is any def of LR (which is
|
||||
// defined by calls, including the PIC setup sequence), or if there is
|
||||
// some use of the LR stack slot (e.g. for builtin_return_address).
|
||||
// (LR comes in 32 and 64 bit versions.)
|
||||
MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
|
||||
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
|
||||
}
|
||||
|
||||
/// determineFrameLayout - Determine the size of the frame and maximum call
|
||||
/// frame size.
|
||||
unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
|
||||
|
@ -395,7 +381,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
|
|||
// stackless code if all local vars are reg-allocated.
|
||||
bool DisableRedZone = MF.getFunction()->getAttributes().
|
||||
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
|
||||
unsigned LR = RegInfo->getRARegister();
|
||||
if (!DisableRedZone &&
|
||||
(Subtarget.isPPC64() || // 32-bit SVR4, no stack-
|
||||
!Subtarget.isSVR4ABI() || // allocated locals.
|
||||
|
@ -403,7 +388,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
|
|||
FrameSize <= 224 && // Fits in red zone.
|
||||
!MFI->hasVarSizedObjects() && // No dynamic alloca.
|
||||
!MFI->adjustsStack() && // No calls.
|
||||
!MustSaveLR(MF, LR) &&
|
||||
!RegInfo->hasBasePointer(MF)) { // No special alignment.
|
||||
// No need for frame
|
||||
if (UpdateMF)
|
||||
|
@ -1124,6 +1108,20 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
}
|
||||
}
|
||||
|
||||
/// MustSaveLR - Return true if this function requires that we save the LR
|
||||
/// register onto the stack in the prolog and restore it in the epilog of the
|
||||
/// function.
|
||||
static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
|
||||
const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
|
||||
|
||||
// We need a save/restore of LR if there is any def of LR (which is
|
||||
// defined by calls, including the PIC setup sequence), or if there is
|
||||
// some use of the LR stack slot (e.g. for builtin_return_address).
|
||||
// (LR comes in 32 and 64 bit versions.)
|
||||
MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
|
||||
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
|
||||
}
|
||||
|
||||
void
|
||||
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
||||
RegScavenger *) const {
|
||||
|
|
|
@ -806,6 +806,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::SHL: return "PPCISD::SHL";
|
||||
case PPCISD::CALL: return "PPCISD::CALL";
|
||||
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
|
||||
case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS";
|
||||
case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
|
||||
case PPCISD::MTCTR: return "PPCISD::MTCTR";
|
||||
case PPCISD::BCTRL: return "PPCISD::BCTRL";
|
||||
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
|
||||
|
@ -839,10 +841,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
|
||||
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
|
||||
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
|
||||
case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
|
||||
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
|
||||
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
|
||||
case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
|
||||
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
|
||||
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
|
||||
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
|
||||
|
@ -1701,6 +1701,27 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
|
|||
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
|
||||
}
|
||||
|
||||
// Generate a call to __tls_get_addr for the given GOT entry Op.
|
||||
std::pair<SDValue,SDValue>
|
||||
PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
|
||||
TargetLowering::ArgListTy Args;
|
||||
TargetLowering::ArgListEntry Entry;
|
||||
Entry.Node = Op;
|
||||
Entry.Ty = IntPtrTy;
|
||||
Args.push_back(Entry);
|
||||
|
||||
TargetLowering::CallLoweringInfo CLI(DAG);
|
||||
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
|
||||
.setCallee(CallingConv::C, IntPtrTy,
|
||||
DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
|
||||
std::move(Args), 0);
|
||||
|
||||
return LowerCallTo(CLI);
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
|
@ -1747,7 +1768,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
|
|||
}
|
||||
|
||||
if (Model == TLSModel::GeneralDynamic) {
|
||||
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
|
||||
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
|
||||
PPCII::MO_TLSGD);
|
||||
SDValue GOTPtr;
|
||||
if (is64bit) {
|
||||
setUsesTOCBasePtr(DAG);
|
||||
|
@ -1760,13 +1782,15 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
|
|||
else
|
||||
GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
|
||||
}
|
||||
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl,
|
||||
PtrVT, GOTPtr, TGA);
|
||||
return DAG.getNode(PPCISD::GET_TLS_ADDR, dl, PtrVT, GOTEntry, TGA);
|
||||
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
|
||||
GOTPtr, TGA);
|
||||
std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
|
||||
return CallResult.first;
|
||||
}
|
||||
|
||||
if (Model == TLSModel::LocalDynamic) {
|
||||
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
|
||||
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
|
||||
PPCII::MO_TLSLD);
|
||||
SDValue GOTPtr;
|
||||
if (is64bit) {
|
||||
setUsesTOCBasePtr(DAG);
|
||||
|
@ -1781,10 +1805,11 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
|
|||
}
|
||||
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
|
||||
GOTPtr, TGA);
|
||||
SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
|
||||
PtrVT, GOTEntry, TGA);
|
||||
SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
|
||||
PtrVT, TLSAddr, TGA);
|
||||
std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
|
||||
SDValue TLSAddr = CallResult.first;
|
||||
SDValue Chain = CallResult.second;
|
||||
SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
|
||||
Chain, TLSAddr, TGA);
|
||||
return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
|
||||
}
|
||||
|
||||
|
@ -3808,6 +3833,23 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
|
|||
if (Callee.getNode()) {
|
||||
Ops.push_back(Chain);
|
||||
Ops.push_back(Callee);
|
||||
|
||||
// If this is a call to __tls_get_addr, find the symbol whose address
|
||||
// is to be taken and add it to the list. This will be used to
|
||||
// generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
|
||||
// We find the symbol by walking the chain to the CopyFromReg, walking
|
||||
// back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
|
||||
// pulling the symbol from that node.
|
||||
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
|
||||
if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
|
||||
assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
|
||||
SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
|
||||
SDValue TGTAddr = AddI->getOperand(1);
|
||||
assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
|
||||
"Didn't find target global TLS address where we expected one");
|
||||
Ops.push_back(TGTAddr);
|
||||
CallOpc = PPCISD::CALL_TLS;
|
||||
}
|
||||
}
|
||||
// If this is a tail call add stack pointer delta.
|
||||
if (isTailCall)
|
||||
|
@ -3970,9 +4012,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
|
|||
Ops.insert(std::next(Ops.begin()), AddTOC);
|
||||
} else if ((CallOpc == PPCISD::CALL) &&
|
||||
(!isLocalCall(Callee) ||
|
||||
DAG.getTarget().getRelocationModel() == Reloc::PIC_))
|
||||
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
|
||||
// Otherwise insert NOP for non-local calls.
|
||||
CallOpc = PPCISD::CALL_NOP;
|
||||
} else if (CallOpc == PPCISD::CALL_TLS)
|
||||
// For 64-bit SVR4, TLS calls are always non-local.
|
||||
CallOpc = PPCISD::CALL_NOP_TLS;
|
||||
}
|
||||
|
||||
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
|
||||
|
|
|
@ -101,6 +101,10 @@ namespace llvm {
|
|||
/// SVR4 calls.
|
||||
CALL, CALL_NOP,
|
||||
|
||||
/// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
|
||||
/// to access TLS variables.
|
||||
CALL_TLS, CALL_NOP_TLS,
|
||||
|
||||
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
|
||||
/// MTCTR instruction.
|
||||
MTCTR,
|
||||
|
@ -224,10 +228,6 @@ namespace llvm {
|
|||
/// sym\@got\@tlsgd\@l.
|
||||
ADDI_TLSGD_L,
|
||||
|
||||
/// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
|
||||
/// model, produces a call to __tls_get_addr(sym\@tlsgd).
|
||||
GET_TLS_ADDR,
|
||||
|
||||
/// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
|
||||
/// model, produces an ADDIS8 instruction that adds the GOT base
|
||||
/// register to sym\@got\@tlsld\@ha.
|
||||
|
@ -238,13 +238,11 @@ namespace llvm {
|
|||
/// sym\@got\@tlsld\@l.
|
||||
ADDI_TLSLD_L,
|
||||
|
||||
/// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
|
||||
/// model, produces a call to __tls_get_addr(sym\@tlsld).
|
||||
GET_TLSLD_ADDR,
|
||||
|
||||
/// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
|
||||
/// model, produces an ADDIS8 instruction that adds X3 to
|
||||
/// sym\@dtprel\@ha.
|
||||
/// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
|
||||
/// local-dynamic TLS model, produces an ADDIS8 instruction
|
||||
/// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
|
||||
/// to tie this in place following a copy to %X3 from the result
|
||||
/// of a GET_TLSLD_ADDR.
|
||||
ADDIS_DTPREL_HA,
|
||||
|
||||
/// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
|
||||
|
@ -637,6 +635,8 @@ namespace llvm {
|
|||
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -202,6 +202,9 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
|
|||
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
|
||||
(BL8_NOP texternalsym:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
|
||||
(BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
|
||||
|
||||
// Atomic operations
|
||||
let usesCustomInserter = 1 in {
|
||||
let Defs = [CR0] in {
|
||||
|
@ -901,12 +904,6 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
|
|||
[(set i64:$rD,
|
||||
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
|
||||
isPPC64;
|
||||
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR8] in
|
||||
def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
|
||||
"#GETtlsADDR",
|
||||
[(set i64:$rD,
|
||||
(PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
|
||||
isPPC64;
|
||||
def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
|
||||
"#ADDIStlsldHA",
|
||||
[(set i64:$rD,
|
||||
|
@ -917,12 +914,6 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
|
|||
[(set i64:$rD,
|
||||
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
|
||||
isPPC64;
|
||||
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR8] in
|
||||
def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
|
||||
"#GETtlsldADDR",
|
||||
[(set i64:$rD,
|
||||
(PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
|
||||
isPPC64;
|
||||
def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
|
||||
"#ADDISdtprelHA",
|
||||
[(set i64:$rD,
|
||||
|
|
|
@ -110,11 +110,10 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
|
|||
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
|
||||
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
|
||||
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
|
||||
def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
|
||||
def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
|
||||
def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
|
||||
def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
|
||||
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
|
||||
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
|
||||
[SDNPHasChain]>;
|
||||
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
|
||||
|
||||
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
|
||||
|
@ -137,9 +136,15 @@ def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
|
|||
def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
|
||||
|
@ -2454,6 +2459,9 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
|
|||
def : Pat<(PPCcall (i32 texternalsym:$dst)),
|
||||
(BL texternalsym:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
|
||||
(BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
|
||||
|
||||
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
|
||||
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
|
||||
|
||||
|
@ -2508,21 +2516,10 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
|
|||
"#ADDItlsgdL32",
|
||||
[(set i32:$rD,
|
||||
(PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
|
||||
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR] in
|
||||
def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
|
||||
"GETtlsADDR32",
|
||||
[(set i32:$rD,
|
||||
(PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
|
||||
def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
|
||||
"#ADDItlsldL32",
|
||||
[(set i32:$rD,
|
||||
(PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
|
||||
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR] in
|
||||
def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
|
||||
"GETtlsldADDR32",
|
||||
[(set i32:$rD,
|
||||
(PPCgetTlsldAddr i32:$reg,
|
||||
tglobaltlsaddr:$sym))]>;
|
||||
def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
|
||||
"#ADDIdtprelL32",
|
||||
[(set i32:$rD,
|
||||
|
|
|
@ -137,6 +137,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
|
|||
case PPCII::MO_TLS:
|
||||
RefKind = MCSymbolRefExpr::VK_PPC_TLS;
|
||||
break;
|
||||
case PPCII::MO_TLSGD:
|
||||
RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
|
||||
break;
|
||||
case PPCII::MO_TLSLD:
|
||||
RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
|
||||
break;
|
||||
}
|
||||
|
||||
if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
|
||||
|
|
|
@ -1,113 +0,0 @@
|
|||
//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass fixes up GETtls[ld]ADDR[32] machine instructions so that
|
||||
// they read and write GPR3. These are really call instructions, so
|
||||
// must use the calling convention registers. This is done in a late
|
||||
// pass so that TLS variable accesses can be fully commoned.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PPCInstrInfo.h"
|
||||
#include "PPC.h"
|
||||
#include "PPCInstrBuilder.h"
|
||||
#include "PPCTargetMachine.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "ppc-tls-dynamic-call"
|
||||
|
||||
namespace llvm {
|
||||
void initializePPCTLSDynamicCallPass(PassRegistry&);
|
||||
}
|
||||
|
||||
namespace {
|
||||
// PPCTLSDynamicCall pass - Add copies to and from GPR3 around
|
||||
// GETtls[ld]ADDR[32] machine instructions. These instructions
|
||||
// are actually call instructions, so the register choice is
|
||||
// constrained. We delay introducing these copies as late as
|
||||
// possible so that TLS variable accesses can be fully commoned.
|
||||
struct PPCTLSDynamicCall : public MachineFunctionPass {
|
||||
static char ID;
|
||||
PPCTLSDynamicCall() : MachineFunctionPass(ID) {
|
||||
initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
const PPCTargetMachine *TM;
|
||||
const PPCInstrInfo *TII;
|
||||
|
||||
protected:
|
||||
bool processBlock(MachineBasicBlock &MBB) {
|
||||
bool Changed = false;
|
||||
bool Is64Bit = TM->getSubtargetImpl()->isPPC64();
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
|
||||
I != IE; ++I) {
|
||||
MachineInstr *MI = I;
|
||||
|
||||
if (MI->getOpcode() != PPC::GETtlsADDR &&
|
||||
MI->getOpcode() != PPC::GETtlsldADDR &&
|
||||
MI->getOpcode() != PPC::GETtlsADDR32 &&
|
||||
MI->getOpcode() != PPC::GETtlsldADDR32)
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;);
|
||||
|
||||
unsigned OutReg = MI->getOperand(0).getReg();
|
||||
unsigned InReg = MI->getOperand(1).getReg();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
|
||||
.addReg(InReg);
|
||||
MI->getOperand(0).setReg(GPR3);
|
||||
MI->getOperand(1).setReg(GPR3);
|
||||
BuildMI(MBB, ++I, DL, TII->get(TargetOpcode::COPY), OutReg)
|
||||
.addReg(GPR3);
|
||||
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
public:
|
||||
bool runOnMachineFunction(MachineFunction &MF) override {
|
||||
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
|
||||
TII = TM->getSubtargetImpl()->getInstrInfo();
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
|
||||
MachineBasicBlock &B = *I++;
|
||||
if (processBlock(B))
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
|
||||
"PowerPC TLS Dynamic Call Fixup", false, false)
|
||||
INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE,
|
||||
"PowerPC TLS Dynamic Call Fixup", false, false)
|
||||
|
||||
char PPCTLSDynamicCall::ID = 0;
|
||||
FunctionPass*
|
||||
llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); }
|
|
@ -266,7 +266,6 @@ void PPCPassConfig::addPreRegAlloc() {
|
|||
initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
|
||||
insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
|
||||
&PPCVSXFMAMutateID);
|
||||
addPass(createPPCTLSDynamicCallPass());
|
||||
}
|
||||
|
||||
void PPCPassConfig::addPreSched2() {
|
||||
|
|
|
@ -12,7 +12,8 @@ entry:
|
|||
; CHECK-LABEL: @test1
|
||||
; CHECK: mflr 0
|
||||
; CHECK: std 0, 16(1)
|
||||
; CHECK-DAG: ld 3, 64(1)
|
||||
; FIXME: These next two lines don't both need to load the same value.
|
||||
; CHECK-DAG: ld 3, 16(1)
|
||||
; CHECK-DAG: ld 0, 16(1)
|
||||
; CHECK: mtlr 0
|
||||
; CHECK: blr
|
||||
|
|
|
@ -1,52 +0,0 @@
|
|||
; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s
|
||||
; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | grep "__tls_get_addr" | count 1
|
||||
|
||||
; This test was derived from LLVM's own
|
||||
; PrettyStackTraceEntry::~PrettyStackTraceEntry(). It demonstrates an
|
||||
; opportunity for CSE of calls to __tls_get_addr().
|
||||
|
||||
target datalayout = "e-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64le-unknown-linux-gnu"
|
||||
|
||||
%"class.llvm::PrettyStackTraceEntry" = type { i32 (...)**, %"class.llvm::PrettyStackTraceEntry"* }
|
||||
|
||||
@_ZTVN4llvm21PrettyStackTraceEntryE = unnamed_addr constant [5 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD2Ev to i8*), i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD0Ev to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)], align 8
|
||||
@_ZL20PrettyStackTraceHead = internal thread_local unnamed_addr global %"class.llvm::PrettyStackTraceEntry"* null, align 8
|
||||
@.str = private unnamed_addr constant [87 x i8] c"PrettyStackTraceHead == this && \22Pretty stack trace entry destruction is out of order\22\00", align 1
|
||||
@.str1 = private unnamed_addr constant [64 x i8] c"/home/wschmidt/llvm/llvm-test2/lib/Support/PrettyStackTrace.cpp\00", align 1
|
||||
@__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev = private unnamed_addr constant [62 x i8] c"virtual llvm::PrettyStackTraceEntry::~PrettyStackTraceEntry()\00", align 1
|
||||
|
||||
declare void @_ZN4llvm21PrettyStackTraceEntryD2Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr
|
||||
declare void @__cxa_pure_virtual()
|
||||
declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*)
|
||||
declare void @_ZdlPv(i8*)
|
||||
|
||||
define void @_ZN4llvm21PrettyStackTraceEntryD0Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr align 2 {
|
||||
entry:
|
||||
%0 = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 0
|
||||
store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @_ZTVN4llvm21PrettyStackTraceEntryE, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
|
||||
%1 = load %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8
|
||||
%cmp.i = icmp eq %"class.llvm::PrettyStackTraceEntry"* %1, %this
|
||||
br i1 %cmp.i, label %_ZN4llvm21PrettyStackTraceEntryD2Ev.exit, label %cond.false.i
|
||||
|
||||
cond.false.i: ; preds = %entry
|
||||
tail call void @__assert_fail(i8* getelementptr inbounds ([87 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([64 x i8]* @.str1, i64 0, i64 0), i32 zeroext 119, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev, i64 0, i64 0))
|
||||
unreachable
|
||||
|
||||
_ZN4llvm21PrettyStackTraceEntryD2Ev.exit: ; preds = %entry
|
||||
%NextEntry.i.i = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 1
|
||||
%2 = bitcast %"class.llvm::PrettyStackTraceEntry"** %NextEntry.i.i to i64*
|
||||
%3 = load i64* %2, align 8
|
||||
store i64 %3, i64* bitcast (%"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead to i64*), align 8
|
||||
%4 = bitcast %"class.llvm::PrettyStackTraceEntry"* %this to i8*
|
||||
tail call void @_ZdlPv(i8* %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _ZN4llvm21PrettyStackTraceEntryD0Ev:
|
||||
; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead@got@tlsld@ha
|
||||
; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead@got@tlsld@l
|
||||
; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead@tlsld)
|
||||
; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead@dtprel@ha
|
||||
; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)
|
||||
; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=ppc64 -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0 %s
|
||||
; RUN: llc -march=ppc64 -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1 %s
|
||||
; R;U;N: llc -march=ppc32 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0-32 %s
|
||||
; R;U;N: llc -march=ppc32 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1-32 %s
|
||||
; RUN: llc -march=ppc32 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0-32 %s
|
||||
; RUN: llc -march=ppc32 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1-32 %s
|
||||
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
; Test correct assembly code generation for thread-local storage using
|
||||
|
@ -65,5 +65,5 @@ entry:
|
|||
; OPT1: bl __tls_get_addr(a2@tlsgd)
|
||||
; OPT1-NEXT: nop
|
||||
; OPT1-32-LABEL: main2
|
||||
; OPT1-32: addi {{[0-9]+}}, {{[0-9]+}}, a2@got@tlsgd
|
||||
; OPT1-32: addi 3, {{[0-9]+}}, a2@got@tlsgd
|
||||
; OPT1-32: bl __tls_get_addr(a2@tlsgd)@PLT
|
||||
|
|
|
@ -19,11 +19,13 @@ entry:
|
|||
}
|
||||
|
||||
; CHECK-LABEL: call_once:
|
||||
; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l
|
||||
; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha
|
||||
; CHECK: addi 3, 3, __once_callable@got@tlsgd@l
|
||||
; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK: std {{[0-9]+}}, 0(3)
|
||||
; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l
|
||||
; CHECK: addis 3, 2, __once_call@got@tlsgd@ha
|
||||
; CHECK: addi 3, 3, __once_call@got@tlsgd@l
|
||||
; CHECK: bl __tls_get_addr(__once_call@tlsgd)
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK: std {{[0-9]+}}, 0(3)
|
||||
|
|
Loading…
Reference in New Issue