2014-05-24 20:50:23 +08:00
|
|
|
//===-- AArch64CleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=//
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Local-dynamic access to thread-local variables proceeds in three stages.
|
|
|
|
//
|
|
|
|
// 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated
|
|
|
|
// in much the same way as a general-dynamic TLS-descriptor access against
|
|
|
|
// the special symbol _TLS_MODULE_BASE.
|
|
|
|
// 2. The variable's offset from _TLS_MODULE_BASE_ is calculated using
|
|
|
|
// instructions with "dtprel" modifiers.
|
|
|
|
// 3. These two are added, together with TPIDR_EL0, to obtain the variable's
|
|
|
|
// true address.
|
|
|
|
//
|
|
|
|
// This is only better than general-dynamic access to the variable if two or
|
|
|
|
// more of the first stage TLS-descriptor calculations can be combined. This
|
|
|
|
// pass looks through a function and performs such combinations.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "AArch64.h"
|
|
|
|
#include "AArch64InstrInfo.h"
|
|
|
|
#include "AArch64MachineFunctionInfo.h"
|
|
|
|
#include "AArch64TargetMachine.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/CodeGen/MachineDominators.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
struct LDTLSCleanup : public MachineFunctionPass {
|
|
|
|
static char ID;
|
|
|
|
LDTLSCleanup() : MachineFunctionPass(ID) {}
|
|
|
|
|
2014-04-29 15:58:25 +08:00
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override {
|
2014-05-24 20:50:23 +08:00
|
|
|
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
2014-03-29 18:18:08 +08:00
|
|
|
if (AFI->getNumLocalDynamicTLSAccesses() < 2) {
|
|
|
|
// No point folding accesses if there isn't at least two.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
|
|
|
|
return VisitNode(DT->getRootNode(), 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Visit the dominator subtree rooted at Node in pre-order.
|
|
|
|
// If TLSBaseAddrReg is non-null, then use that to replace any
|
|
|
|
// TLS_base_addr instructions. Otherwise, create the register
|
|
|
|
// when the first such instruction is seen, and then use it
|
|
|
|
// as we encounter more instructions.
|
|
|
|
bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
|
|
|
|
MachineBasicBlock *BB = Node->getBlock();
|
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
// Traverse the current block.
|
|
|
|
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
|
|
|
|
++I) {
|
|
|
|
switch (I->getOpcode()) {
|
Fix PR22408 - LLVM producing AArch64 TLS relocations that GNU linkers cannot handle yet.
As is described at http://llvm.org/bugs/show_bug.cgi?id=22408, the GNU linkers
ld.bfd and ld.gold currently only support a subset of the whole range of AArch64
ELF TLS relocations. Furthermore, they assume that some of the code sequences to
access thread-local variables are produced in a very specific sequence.
When the sequence is not as the linker expects, it can silently mis-relaxe/mis-optimize
the instructions.
Even if that wouldn't be the case, it's good to produce the exact sequence,
as that ensures that linkers can perform optimizing relaxations.
This patch:
* implements support for 16MiB TLS area size instead of 4GiB TLS area size. Ideally clang
would grow an -mtls-size option to allow support for both, but that's not part of this patch.
* by default doesn't produce local dynamic access patterns, as even modern ld.bfd and ld.gold
linkers do not support the associated relocations. An option (-aarch64-elf-ldtls-generation)
is added to enable generation of local dynamic code sequence, but is off by default.
* makes sure that the exact expected code sequence for local dynamic and general dynamic
accesses is produced, by making use of a new pseudo instruction. The patch also removes
two (AArch64ISD::TLSDESC_BLR, AArch64ISD::TLSDESC_CALL) pre-existing AArch64-specific pseudo
SDNode instructions that are superseded by the new one (TLSDESC_CALLSEQ).
llvm-svn: 231227
2015-03-04 17:12:08 +08:00
|
|
|
case AArch64::TLSDESC_CALLSEQ:
|
2014-03-29 18:18:08 +08:00
|
|
|
// Make sure it's a local dynamic access.
|
Fix PR22408 - LLVM producing AArch64 TLS relocations that GNU linkers cannot handle yet.
As is described at http://llvm.org/bugs/show_bug.cgi?id=22408, the GNU linkers
ld.bfd and ld.gold currently only support a subset of the whole range of AArch64
ELF TLS relocations. Furthermore, they assume that some of the code sequences to
access thread-local variables are produced in a very specific sequence.
When the sequence is not as the linker expects, it can silently mis-relaxe/mis-optimize
the instructions.
Even if that wouldn't be the case, it's good to produce the exact sequence,
as that ensures that linkers can perform optimizing relaxations.
This patch:
* implements support for 16MiB TLS area size instead of 4GiB TLS area size. Ideally clang
would grow an -mtls-size option to allow support for both, but that's not part of this patch.
* by default doesn't produce local dynamic access patterns, as even modern ld.bfd and ld.gold
linkers do not support the associated relocations. An option (-aarch64-elf-ldtls-generation)
is added to enable generation of local dynamic code sequence, but is off by default.
* makes sure that the exact expected code sequence for local dynamic and general dynamic
accesses is produced, by making use of a new pseudo instruction. The patch also removes
two (AArch64ISD::TLSDESC_BLR, AArch64ISD::TLSDESC_CALL) pre-existing AArch64-specific pseudo
SDNode instructions that are superseded by the new one (TLSDESC_CALLSEQ).
llvm-svn: 231227
2015-03-04 17:12:08 +08:00
|
|
|
if (!I->getOperand(0).isSymbol() ||
|
|
|
|
strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
if (TLSBaseAddrReg)
|
|
|
|
I = replaceTLSBaseAddrCall(I, TLSBaseAddrReg);
|
|
|
|
else
|
|
|
|
I = setRegister(I, &TLSBaseAddrReg);
|
|
|
|
Changed = true;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Visit the children of this block in the dominator tree.
|
2014-04-03 04:21:22 +08:00
|
|
|
for (MachineDomTreeNode *N : *Node) {
|
2014-04-03 02:00:41 +08:00
|
|
|
Changed |= VisitNode(N, TLSBaseAddrReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Replace the TLS_base_addr instruction I with a copy from
|
|
|
|
// TLSBaseAddrReg, returning the new instruction.
|
|
|
|
MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I,
|
|
|
|
unsigned TLSBaseAddrReg) {
|
|
|
|
MachineFunction *MF = I->getParent()->getParent();
|
2015-01-30 09:10:24 +08:00
|
|
|
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
|
|
|
|
// code sequence assumes the address will be.
|
2014-05-24 20:50:23 +08:00
|
|
|
MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
|
|
|
|
TII->get(TargetOpcode::COPY),
|
|
|
|
AArch64::X0).addReg(TLSBaseAddrReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Erase the TLS_base_addr instruction.
|
|
|
|
I->eraseFromParent();
|
|
|
|
|
|
|
|
return Copy;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a virtal register in *TLSBaseAddrReg, and populate it by
|
|
|
|
// inserting a copy instruction after I. Returns the new instruction.
|
|
|
|
MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
|
|
|
|
MachineFunction *MF = I->getParent()->getParent();
|
2015-01-30 09:10:24 +08:00
|
|
|
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Create a virtual register for the TLS base address.
|
|
|
|
MachineRegisterInfo &RegInfo = MF->getRegInfo();
|
2014-05-24 20:50:23 +08:00
|
|
|
*TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Insert a copy from X0 to TLSBaseAddrReg for later.
|
|
|
|
MachineInstr *Next = I->getNextNode();
|
|
|
|
MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
|
|
|
|
TII->get(TargetOpcode::COPY),
|
2014-05-24 20:50:23 +08:00
|
|
|
*TLSBaseAddrReg).addReg(AArch64::X0);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
return Copy;
|
|
|
|
}
|
|
|
|
|
2014-04-29 15:58:25 +08:00
|
|
|
const char *getPassName() const override {
|
2014-03-29 18:18:08 +08:00
|
|
|
return "Local Dynamic TLS Access Clean-up";
|
|
|
|
}
|
|
|
|
|
2014-04-29 15:58:25 +08:00
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
2014-03-29 18:18:08 +08:00
|
|
|
AU.setPreservesCFG();
|
|
|
|
AU.addRequired<MachineDominatorTree>();
|
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
char LDTLSCleanup::ID = 0;
|
2014-05-24 20:50:23 +08:00
|
|
|
FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() {
|
2014-03-29 18:18:08 +08:00
|
|
|
return new LDTLSCleanup();
|
|
|
|
}
|