forked from OSchip/llvm-project
Re-land "[Thumb] Save/restore high registers in Thumb1 pro/epilogues"
Reverts r283938 to reinstate r283867 with a fix. The original change had an ArrayRef referring to a destroyed temporary initializer list. Use plain C arrays instead. llvm-svn: 283942
This commit is contained in:
parent
e778d10c0f
commit
bdfc05ff93
|
@ -30,6 +30,8 @@
|
|||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
|
||||
#define DEBUG_TYPE "arm-frame-lowering"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool>
|
||||
|
@ -1485,6 +1487,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
||||
(void)TRI; // Silence unused warning in non-assert builds.
|
||||
unsigned FramePtr = RegInfo->getFrameRegister(MF);
|
||||
|
||||
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
|
||||
|
@ -1640,6 +1644,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||
SavedRegs.set(ARM::LR);
|
||||
LRSpilled = true;
|
||||
NumGPRSpills++;
|
||||
auto LRPos = find(UnspilledCS1GPRs, ARM::LR);
|
||||
if (LRPos != UnspilledCS1GPRs.end())
|
||||
UnspilledCS1GPRs.erase(LRPos);
|
||||
}
|
||||
auto FPPos = find(UnspilledCS1GPRs, FramePtr);
|
||||
if (FPPos != UnspilledCS1GPRs.end())
|
||||
|
@ -1649,6 +1656,116 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||
CS1Spilled = true;
|
||||
}
|
||||
|
||||
if (AFI->isThumb1OnlyFunction()) {
|
||||
// For Thumb1-only targets, we need some low registers when we save and
|
||||
// restore the high registers (which aren't allocatable, but could be
|
||||
// used by inline assembly) because the push/pop instructions can not
|
||||
// access high registers. If necessary, we might need to push more low
|
||||
// registers to ensure that there is at least one free that can be used
|
||||
// for the saving & restoring, and preferably we should ensure that as
|
||||
// many as are needed are available so that fewer push/pop instructions
|
||||
// are required.
|
||||
|
||||
// Low registers which are not currently pushed, but could be (r4-r7).
|
||||
SmallVector<unsigned, 4> AvailableRegs;
|
||||
|
||||
// Unused argument registers (r0-r3) can be clobbered in the prologue for
|
||||
// free.
|
||||
int EntryRegDeficit = 0;
|
||||
for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
|
||||
if (!MF.getRegInfo().isLiveIn(Reg)) {
|
||||
--EntryRegDeficit;
|
||||
DEBUG(dbgs() << PrintReg(Reg, TRI)
|
||||
<< " is unused argument register, EntryRegDeficit = "
|
||||
<< EntryRegDeficit << "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Unused return registers can be clobbered in the epilogue for free.
|
||||
int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
|
||||
DEBUG(dbgs() << AFI->getReturnRegsCount()
|
||||
<< " return regs used, ExitRegDeficit = " << ExitRegDeficit
|
||||
<< "\n");
|
||||
|
||||
int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
|
||||
DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
|
||||
|
||||
// r4-r6 can be used in the prologue if they are pushed by the first push
|
||||
// instruction.
|
||||
for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
|
||||
if (SavedRegs.test(Reg)) {
|
||||
--RegDeficit;
|
||||
DEBUG(dbgs() << PrintReg(Reg, TRI)
|
||||
<< " is saved low register, RegDeficit = " << RegDeficit
|
||||
<< "\n");
|
||||
} else {
|
||||
AvailableRegs.push_back(Reg);
|
||||
DEBUG(dbgs()
|
||||
<< PrintReg(Reg, TRI)
|
||||
<< " is non-saved low register, adding to AvailableRegs\n");
|
||||
}
|
||||
}
|
||||
|
||||
// r7 can be used if it is not being used as the frame pointer.
|
||||
if (!hasFP(MF)) {
|
||||
if (SavedRegs.test(ARM::R7)) {
|
||||
--RegDeficit;
|
||||
DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = "
|
||||
<< RegDeficit << "\n");
|
||||
} else {
|
||||
AvailableRegs.push_back(ARM::R7);
|
||||
DEBUG(dbgs()
|
||||
<< "%R7 is non-saved low register, adding to AvailableRegs\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Each of r8-r11 needs to be copied to a low register, then pushed.
|
||||
for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
|
||||
if (SavedRegs.test(Reg)) {
|
||||
++RegDeficit;
|
||||
DEBUG(dbgs() << PrintReg(Reg, TRI)
|
||||
<< " is saved high register, RegDeficit = " << RegDeficit
|
||||
<< "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// LR can only be used by PUSH, not POP, and can't be used at all if the
|
||||
// llvm.returnaddress intrinsic is used. This is only worth doing if we
|
||||
// are more limited at function entry than exit.
|
||||
if ((EntryRegDeficit > ExitRegDeficit) &&
|
||||
!(MF.getRegInfo().isLiveIn(ARM::LR) &&
|
||||
MF.getFrameInfo().isReturnAddressTaken())) {
|
||||
if (SavedRegs.test(ARM::LR)) {
|
||||
--RegDeficit;
|
||||
DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit
|
||||
<< "\n");
|
||||
} else {
|
||||
AvailableRegs.push_back(ARM::LR);
|
||||
DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n");
|
||||
}
|
||||
}
|
||||
|
||||
// If there are more high registers that need pushing than low registers
|
||||
// available, push some more low registers so that we can use fewer push
|
||||
// instructions. This might not reduce RegDeficit all the way to zero,
|
||||
// because we can only guarantee that r4-r6 are available, but r8-r11 may
|
||||
// need saving.
|
||||
DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
|
||||
for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
|
||||
unsigned Reg = AvailableRegs.pop_back_val();
|
||||
DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
|
||||
<< " to make up reg deficit\n");
|
||||
SavedRegs.set(Reg);
|
||||
NumGPRSpills++;
|
||||
CS1Spilled = true;
|
||||
ExtraCSSpill = true;
|
||||
UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg));
|
||||
if (Reg == ARM::LR)
|
||||
LRSpilled = true;
|
||||
}
|
||||
DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit << "\n");
|
||||
}
|
||||
|
||||
// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
|
||||
// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
|
||||
if (!LRSpilled && CS1Spilled) {
|
||||
|
@ -1666,6 +1783,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||
// If stack and double are 8-byte aligned and we are spilling an odd number
|
||||
// of GPRs, spill one extra callee save GPR so we won't have to pad between
|
||||
// the integer and double callee save areas.
|
||||
DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
|
||||
unsigned TargetAlign = getStackAlignment();
|
||||
if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
|
||||
if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
|
||||
|
@ -1677,6 +1795,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||
(STI.isTargetWindows() && Reg == ARM::R11) ||
|
||||
isARMLowRegister(Reg) || Reg == ARM::LR) {
|
||||
SavedRegs.set(Reg);
|
||||
DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
|
||||
<< " to make up alignment\n");
|
||||
if (!MRI.isReserved(Reg))
|
||||
ExtraCSSpill = true;
|
||||
break;
|
||||
|
@ -1685,6 +1805,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||
} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
|
||||
unsigned Reg = UnspilledCS2GPRs.front();
|
||||
SavedRegs.set(Reg);
|
||||
DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
|
||||
<< " to make up alignment\n");
|
||||
if (!MRI.isReserved(Reg))
|
||||
ExtraCSSpill = true;
|
||||
}
|
||||
|
|
|
@ -568,10 +568,12 @@ public:
|
|||
}
|
||||
/// Returns true if the frame setup is split into two separate pushes (first
|
||||
/// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
|
||||
/// to lr.
|
||||
/// to lr. This is always required on Thumb1-only targets, as the push and
|
||||
/// pop instructions can't access the high registers.
|
||||
bool splitFramePushPop(const MachineFunction &MF) const {
|
||||
return useR7AsFramePointer() &&
|
||||
MF.getTarget().Options.DisableFramePointerElim(MF);
|
||||
return (useR7AsFramePointer() &&
|
||||
MF.getTarget().Options.DisableFramePointerElim(MF)) ||
|
||||
isThumb1Only();
|
||||
}
|
||||
|
||||
bool useStride4VFPs(const MachineFunction &MF) const;
|
||||
|
|
|
@ -188,7 +188,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
|
|||
|
||||
int FramePtrOffsetInBlock = 0;
|
||||
unsigned adjustedGPRCS1Size = GPRCS1Size;
|
||||
if (tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
|
||||
if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
|
||||
tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
|
||||
FramePtrOffsetInBlock = NumBytes;
|
||||
adjustedGPRCS1Size += NumBytes;
|
||||
NumBytes = 0;
|
||||
|
@ -261,6 +262,48 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
|
|||
AFI->setShouldRestoreSPFromFP(true);
|
||||
}
|
||||
|
||||
// Skip past the spilling of r8-r11, which could consist of multiple tPUSH
|
||||
// and tMOVr instructions. We don't need to add any call frame information
|
||||
// in-between these instructions, because they do not modify the high
|
||||
// registers.
|
||||
while (true) {
|
||||
MachineBasicBlock::iterator OldMBBI = MBBI;
|
||||
// Skip a run of tMOVr instructions
|
||||
while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
|
||||
MBBI++;
|
||||
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
|
||||
MBBI++;
|
||||
} else {
|
||||
// We have reached an instruction which is not a push, so the previous
|
||||
// run of tMOVr instructions (which may have been empty) was not part of
|
||||
// the prologue. Reset MBBI back to the last PUSH of the prologue.
|
||||
MBBI = OldMBBI;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Emit call frame information for the callee-saved high registers.
|
||||
for (auto &I : CSI) {
|
||||
unsigned Reg = I.getReg();
|
||||
int FI = I.getFrameIdx();
|
||||
switch (Reg) {
|
||||
case ARM::R8:
|
||||
case ARM::R9:
|
||||
case ARM::R10:
|
||||
case ARM::R11:
|
||||
case ARM::R12: {
|
||||
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
|
||||
nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
|
||||
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex)
|
||||
.setMIFlags(MachineInstr::FrameSetup);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (NumBytes) {
|
||||
// Insert it after all the callee-save spills.
|
||||
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
|
||||
|
@ -308,12 +351,12 @@ static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
|
|||
isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs))
|
||||
return true;
|
||||
else if (MI.getOpcode() == ARM::tPOP) {
|
||||
// The first two operands are predicates. The last two are
|
||||
// imp-def and imp-use of SP. Check everything in between.
|
||||
for (int i = 2, e = MI.getNumOperands() - 2; i != e; ++i)
|
||||
if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
|
||||
return false;
|
||||
return true;
|
||||
} else if (MI.getOpcode() == ARM::tMOVr) {
|
||||
unsigned Dst = MI.getOperand(0).getReg();
|
||||
unsigned Src = MI.getOperand(1).getReg();
|
||||
return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) &&
|
||||
ARM::hGPRRegClass.contains(Dst));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -568,6 +611,19 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
|
|||
return true;
|
||||
}
|
||||
|
||||
// Return the first iteraror after CurrentReg which is present in EnabledRegs,
|
||||
// or OrderEnd if no further registers are in that set. This does not advance
|
||||
// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
|
||||
template <unsigned SetSize>
|
||||
static const unsigned *
|
||||
findNextOrderedReg(const unsigned *CurrentReg,
|
||||
SmallSet<unsigned, SetSize> &EnabledRegs,
|
||||
const unsigned *OrderEnd) {
|
||||
while (CurrentReg != OrderEnd && !EnabledRegs.count(*CurrentReg))
|
||||
++CurrentReg;
|
||||
return CurrentReg;
|
||||
}
|
||||
|
||||
bool Thumb1FrameLowering::
|
||||
spillCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
|
@ -578,29 +634,114 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
|
|||
|
||||
DebugLoc DL;
|
||||
const TargetInstrInfo &TII = *STI.getInstrInfo();
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
|
||||
MF.getSubtarget().getRegisterInfo());
|
||||
|
||||
SmallSet<unsigned, 9> LoRegsToSave; // r0-r7, lr
|
||||
SmallSet<unsigned, 4> HiRegsToSave; // r8-r11
|
||||
SmallSet<unsigned, 9> CopyRegs; // Registers which can be used after pushing
|
||||
// LoRegs for saving HiRegs.
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
|
||||
AddDefaultPred(MIB);
|
||||
for (unsigned i = CSI.size(); i != 0; --i) {
|
||||
unsigned Reg = CSI[i-1].getReg();
|
||||
bool isKill = true;
|
||||
|
||||
// Add the callee-saved register as live-in unless it's LR and
|
||||
// @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
|
||||
// then it's already added to the function and entry block live-in sets.
|
||||
if (Reg == ARM::LR) {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
if (MF.getFrameInfo().isReturnAddressTaken() &&
|
||||
MF.getRegInfo().isLiveIn(Reg))
|
||||
isKill = false;
|
||||
if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
|
||||
LoRegsToSave.insert(Reg);
|
||||
} else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
|
||||
HiRegsToSave.insert(Reg);
|
||||
} else {
|
||||
llvm_unreachable("callee-saved register of unexpected class");
|
||||
}
|
||||
|
||||
if (isKill)
|
||||
MBB.addLiveIn(Reg);
|
||||
|
||||
MIB.addReg(Reg, getKillRegState(isKill));
|
||||
if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
|
||||
!MF.getRegInfo().isLiveIn(Reg) &&
|
||||
!(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
|
||||
CopyRegs.insert(Reg);
|
||||
}
|
||||
MIB.setMIFlags(MachineInstr::FrameSetup);
|
||||
|
||||
// Unused argument registers can be used for the high register saving.
|
||||
for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
|
||||
if (!MF.getRegInfo().isLiveIn(ArgReg))
|
||||
CopyRegs.insert(ArgReg);
|
||||
|
||||
// Push the low registers and lr
|
||||
if (!LoRegsToSave.empty()) {
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
|
||||
AddDefaultPred(MIB);
|
||||
for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
|
||||
if (LoRegsToSave.count(Reg)) {
|
||||
bool isKill = !MF.getRegInfo().isLiveIn(Reg);
|
||||
if (isKill)
|
||||
MBB.addLiveIn(Reg);
|
||||
|
||||
MIB.addReg(Reg, getKillRegState(isKill));
|
||||
}
|
||||
}
|
||||
MIB.setMIFlags(MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
// Push the high registers. There are no store instructions that can access
|
||||
// these registers directly, so we have to move them to low registers, and
|
||||
// push them. This might take multiple pushes, as it is possible for there to
|
||||
// be fewer low registers available than high registers which need saving.
|
||||
|
||||
// These are in reverse order so that in the case where we need to use
|
||||
// multiple PUSH instructions, the order of the registers on the stack still
|
||||
// matches the unwind info. They need to be swicthed back to ascending order
|
||||
// before adding to the PUSH instruction.
|
||||
static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6,
|
||||
ARM::R5, ARM::R4, ARM::R3,
|
||||
ARM::R2, ARM::R1, ARM::R0};
|
||||
static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8};
|
||||
|
||||
const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
|
||||
const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
|
||||
|
||||
// Find the first register to save.
|
||||
const unsigned *HiRegToSave = findNextOrderedReg(
|
||||
std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd);
|
||||
|
||||
while (HiRegToSave != AllHighRegsEnd) {
|
||||
// Find the first low register to use.
|
||||
const unsigned *CopyReg =
|
||||
findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
|
||||
|
||||
// Create the PUSH, but don't insert it yet (the MOVs need to come first).
|
||||
MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH));
|
||||
AddDefaultPred(PushMIB);
|
||||
|
||||
SmallVector<unsigned, 4> RegsToPush;
|
||||
while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
|
||||
if (HiRegsToSave.count(*HiRegToSave)) {
|
||||
bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave);
|
||||
if (isKill)
|
||||
MBB.addLiveIn(*HiRegToSave);
|
||||
|
||||
// Emit a MOV from the high reg to the low reg.
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
|
||||
MIB.addReg(*CopyReg, RegState::Define);
|
||||
MIB.addReg(*HiRegToSave, getKillRegState(isKill));
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
// Record the register that must be added to the PUSH.
|
||||
RegsToPush.push_back(*CopyReg);
|
||||
|
||||
CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
|
||||
HiRegToSave =
|
||||
findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd);
|
||||
}
|
||||
}
|
||||
|
||||
// Add the low registers to the PUSH, in ascending order.
|
||||
for (unsigned Reg : reverse(RegsToPush))
|
||||
PushMIB.addReg(Reg, RegState::Kill);
|
||||
|
||||
// Insert the PUSH instruction after the MOVs.
|
||||
MBB.insert(MI, PushMIB);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -615,15 +756,101 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
|
|||
MachineFunction &MF = *MBB.getParent();
|
||||
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
|
||||
const TargetInstrInfo &TII = *STI.getInstrInfo();
|
||||
const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
|
||||
MF.getSubtarget().getRegisterInfo());
|
||||
|
||||
bool isVarArg = AFI->getArgRegsSaveSize() > 0;
|
||||
DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
|
||||
|
||||
SmallSet<unsigned, 9> LoRegsToRestore;
|
||||
SmallSet<unsigned, 4> HiRegsToRestore;
|
||||
// Low registers (r0-r7) which can be used to restore the high registers.
|
||||
SmallSet<unsigned, 9> CopyRegs;
|
||||
|
||||
for (CalleeSavedInfo I : CSI) {
|
||||
unsigned Reg = I.getReg();
|
||||
|
||||
if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
|
||||
LoRegsToRestore.insert(Reg);
|
||||
} else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
|
||||
HiRegsToRestore.insert(Reg);
|
||||
} else {
|
||||
llvm_unreachable("callee-saved register of unexpected class");
|
||||
}
|
||||
|
||||
// If this is a low register not used as the frame pointer, we may want to
|
||||
// use it for restoring the high registers.
|
||||
if ((ARM::tGPRRegClass.contains(Reg)) &&
|
||||
!(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
|
||||
CopyRegs.insert(Reg);
|
||||
}
|
||||
|
||||
// If this is a return block, we may be able to use some unused return value
|
||||
// registers for restoring the high regs.
|
||||
auto Terminator = MBB.getFirstTerminator();
|
||||
if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
|
||||
CopyRegs.insert(ARM::R0);
|
||||
CopyRegs.insert(ARM::R1);
|
||||
CopyRegs.insert(ARM::R2);
|
||||
CopyRegs.insert(ARM::R3);
|
||||
for (auto Op : Terminator->implicit_operands()) {
|
||||
if (Op.isReg())
|
||||
CopyRegs.erase(Op.getReg());
|
||||
}
|
||||
}
|
||||
|
||||
static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
|
||||
ARM::R4, ARM::R5, ARM::R6, ARM::R7};
|
||||
static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
|
||||
|
||||
const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
|
||||
const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
|
||||
|
||||
// Find the first register to restore.
|
||||
auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs),
|
||||
HiRegsToRestore, AllHighRegsEnd);
|
||||
|
||||
while (HiRegToRestore != AllHighRegsEnd) {
|
||||
assert(!CopyRegs.empty());
|
||||
// Find the first low register to use.
|
||||
auto CopyReg =
|
||||
findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
|
||||
|
||||
// Create the POP instruction.
|
||||
MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP));
|
||||
AddDefaultPred(PopMIB);
|
||||
|
||||
while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
|
||||
// Add the low register to the POP.
|
||||
PopMIB.addReg(*CopyReg, RegState::Define);
|
||||
|
||||
// Create the MOV from low to high register.
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
|
||||
MIB.addReg(*HiRegToRestore, RegState::Define);
|
||||
MIB.addReg(*CopyReg, RegState::Kill);
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
|
||||
HiRegToRestore =
|
||||
findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
bool NeedsPop = false;
|
||||
for (unsigned i = CSI.size(); i != 0; --i) {
|
||||
unsigned Reg = CSI[i-1].getReg();
|
||||
|
||||
// High registers (excluding lr) have already been dealt with
|
||||
if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
|
||||
continue;
|
||||
|
||||
if (Reg == ARM::LR) {
|
||||
if (MBB.succ_empty()) {
|
||||
// Special epilogue for vararg functions. See emitEpilogue
|
||||
|
|
|
@ -0,0 +1,236 @@
|
|||
; RUN: llc -mtriple=thumbv6m-none-eabi < %s | FileCheck %s
|
||||
|
||||
declare i8* @llvm.returnaddress(i32)
|
||||
|
||||
; We don't allocate high registers, so any function not using inline asm will
|
||||
; only need to save the low registers.
|
||||
define void @low_regs_only() {
|
||||
; CHECK-LABEL: low_regs_only:
|
||||
entry:
|
||||
; CHECK: push {r4, r5, r6, r7, lr}
|
||||
tail call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"()
|
||||
; CHECK: pop {r4, r5, r6, r7, pc}
|
||||
ret void
|
||||
}
|
||||
|
||||
; One high reg clobbered, but no low regs, args or returns. We can use an
|
||||
; argument/return register to help save/restore it.
|
||||
define void @one_high() {
|
||||
; CHECK-LABEL: one_high:
|
||||
entry:
|
||||
; CHECK: mov [[SAVEREG:r[0-3]]], r8
|
||||
; CHECK: push {[[SAVEREG]]}
|
||||
tail call void asm sideeffect "", "~{r8}"()
|
||||
; CHECK: pop {[[RESTOREREG:r[0-3]]]}
|
||||
; CHECK: mov r8, [[RESTOREREG]]
|
||||
ret void
|
||||
}
|
||||
|
||||
; 4 high regs clobbered, but still no low regs, args or returns. We can use all
|
||||
; 4 arg/return regs for the save/restore.
|
||||
define void @four_high() {
|
||||
; CHECK-LABEL: four_high:
|
||||
entry:
|
||||
; CHECK: mov r3, r11
|
||||
; CHECK: mov r2, r10
|
||||
; CHECK: mov r1, r9
|
||||
; CHECK: mov r0, r8
|
||||
; CHECK: push {r0, r1, r2, r3}
|
||||
tail call void asm sideeffect "", "~{r8},~{r9},~{r10},~{r11}"()
|
||||
; CHECK: pop {r0, r1, r2, r3}
|
||||
; CHECK: mov r8, r0
|
||||
; CHECK: mov r9, r1
|
||||
; CHECK: mov r10, r2
|
||||
; CHECK: mov r11, r3
|
||||
ret void
|
||||
}
|
||||
|
||||
; One high and one low register clobbered. lr also gets pushed to simplify the
|
||||
; return, and r7 to keep the stack aligned. Here, we could use r0-r3, r4, r7 or
|
||||
; lr to save/restore r8.
|
||||
define void @one_high_one_low() {
|
||||
; CHECK-LABEL: one_high_one_low:
|
||||
entry:
|
||||
; CHECK: push {r4, r7, lr}
|
||||
; CHECK: mov [[SAVEREG:r0|r1|r2|r3|r4|r7|lr]], r8
|
||||
; CHECK: push {[[SAVEREG]]}
|
||||
tail call void asm sideeffect "", "~{r4},~{r8}"()
|
||||
; CHECK: pop {[[RESTOREREG:r0|r1|r2|r3|r4|r7]]}
|
||||
; CHECK: mov r8, [[RESTOREREG]]
|
||||
; CHECK: pop {r4, r7, pc}
|
||||
ret void
|
||||
}
|
||||
|
||||
; All callee-saved registers clobbered, r4-r7 and lr are not live after the
|
||||
; first push so can be used for pushing the high registers.
|
||||
define void @four_high_four_low() {
|
||||
; CHECK-LABEL: four_high_four_low:
|
||||
entry:
|
||||
; CHECK: push {r4, r5, r6, r7, lr}
|
||||
; CHECK: mov lr, r11
|
||||
; CHECK: mov r7, r10
|
||||
; CHECK: mov r6, r9
|
||||
; CHECK: mov r5, r8
|
||||
; CHECK: push {r5, r6, r7, lr}
|
||||
tail call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"()
|
||||
; CHECK: pop {r0, r1, r2, r3}
|
||||
; CHECK: mov r8, r0
|
||||
; CHECK: mov r9, r1
|
||||
; CHECK: mov r10, r2
|
||||
; CHECK: mov r11, r3
|
||||
; CHECK: pop {r4, r5, r6, r7, pc}
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; All callee-saved registers clobbered, and frame pointer is requested. r7 now
|
||||
; cannot be used while saving/restoring the high regs.
|
||||
define void @four_high_four_low_frame_ptr() "no-frame-pointer-elim"="true" {
|
||||
; CHECK-LABEL: four_high_four_low_frame_ptr:
|
||||
entry:
|
||||
; CHECK: push {r4, r5, r6, r7, lr}
|
||||
; CHECK: add r7, sp, #12
|
||||
; CHECK: mov lr, r11
|
||||
; CHECK: mov r6, r10
|
||||
; CHECK: mov r5, r9
|
||||
; CHECK: mov r4, r8
|
||||
; CHECK: push {r4, r5, r6, lr}
|
||||
tail call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"()
|
||||
; CHECK: pop {r0, r1, r2, r3}
|
||||
; CHECK: mov r8, r0
|
||||
; CHECK: mov r9, r1
|
||||
; CHECK: mov r10, r2
|
||||
; CHECK: mov r11, r3
|
||||
; CHECK: pop {r4, r5, r6, r7, pc}
|
||||
ret void
|
||||
}
|
||||
|
||||
; All callee-saved registers clobbered, frame pointer is requested and
|
||||
; llvm.returnaddress used. r7 and lr now cannot be used while saving/restoring
|
||||
; the high regs.
|
||||
define void @four_high_four_low_frame_ptr_ret_addr() "no-frame-pointer-elim"="true" {
|
||||
; CHECK-LABEL: four_high_four_low_frame_ptr_ret_addr:
|
||||
entry:
|
||||
; CHECK: push {r4, r5, r6, r7, lr}
|
||||
; CHECK: mov r6, r11
|
||||
; CHECK: mov r5, r10
|
||||
; CHECK: mov r4, r9
|
||||
; CHECK: mov r3, r8
|
||||
; CHECK: push {r3, r4, r5, r6}
|
||||
%a = tail call i8* @llvm.returnaddress(i32 0)
|
||||
tail call void asm sideeffect "", "r,~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"(i8* %a)
|
||||
; CHECK: pop {r0, r1, r2, r3}
|
||||
; CHECK: mov r8, r0
|
||||
; CHECK: mov r9, r1
|
||||
; CHECK: mov r10, r2
|
||||
; CHECK: mov r11, r3
|
||||
; CHECK: pop {r4, r5, r6, r7, pc}
|
||||
ret void
|
||||
}
|
||||
|
||||
; 4 high regs clobbered, all 4 argument registers used. We push an extra 4 low
|
||||
; registers, so that we can use them for saving the high regs.
|
||||
define void @four_high_four_arg(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: four_high_four_arg:
|
||||
entry:
|
||||
; CHECK: push {r5, r6, r7, lr}
|
||||
; CHECK: mov lr, r11
|
||||
; CHECK: mov r7, r10
|
||||
; CHECK: mov r6, r9
|
||||
; CHECK: mov r5, r8
|
||||
; CHECK: push {r5, r6, r7, lr}
|
||||
tail call void asm sideeffect "", "r,r,r,r,~{r8},~{r9},~{r10},~{r11}"(i32 %a, i32 %b, i32 %c, i32 %d)
|
||||
; CHECK: pop {r0, r1, r2, r3}
|
||||
; CHECK: mov r8, r0
|
||||
; CHECK: mov r9, r1
|
||||
; CHECK: mov r10, r2
|
||||
; CHECK: mov r11, r3
|
||||
; CHECK: pop {r5, r6, r7, pc}
|
||||
ret void
|
||||
}
|
||||
|
||||
; 4 high regs clobbered, all 4 return registers used. We push an extra 4 low
|
||||
; registers, so that we can use them for restoring the high regs.
|
||||
define <4 x i32> @four_high_four_return() {
|
||||
; CHECK-LABEL: four_high_four_return:
|
||||
entry:
|
||||
; CHECK: push {r4, r5, r6, r7, lr}
|
||||
; CHECK: mov lr, r11
|
||||
; CHECK: mov r7, r10
|
||||
; CHECK: mov r6, r9
|
||||
; CHECK: mov r5, r8
|
||||
; CHECK: push {r5, r6, r7, lr}
|
||||
tail call void asm sideeffect "", "~{r8},~{r9},~{r10},~{r11}"()
|
||||
%vecinit = insertelement <4 x i32> undef, i32 1, i32 0
|
||||
%vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1
|
||||
%vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2
|
||||
%vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3
|
||||
; CHECK: pop {r4, r5, r6, r7}
|
||||
; CHECK: mov r8, r4
|
||||
; CHECK: mov r9, r5
|
||||
; CHECK: mov r10, r6
|
||||
; CHECK: mov r11, r7
|
||||
; CHECK: pop {r4, r5, r6, r7, pc}
|
||||
ret <4 x i32> %vecinit13
|
||||
}
|
||||
|
||||
; 4 high regs clobbered, all args & returns used, frame pointer requested and
|
||||
; llvm.returnaddress called. This leaves us with 3 low registers available (r4,
|
||||
; r5, r6), with which to save 4 high registers, so we have to use two pushes
|
||||
; and pops.
|
||||
define <4 x i32> @all_of_the_above(i32 %a, i32 %b, i32 %c, i32 %d) "no-frame-pointer-elim"="true" {
|
||||
; CHECK-LABEL: all_of_the_above
|
||||
entry:
|
||||
; CHECK: push {r4, r5, r6, r7, lr}
|
||||
; CHECK: add r7, sp, #12
|
||||
; CHECK: mov r6, r11
|
||||
; CHECK: mov r5, r10
|
||||
; CHECK: mov r4, r9
|
||||
; CHECK: push {r4, r5, r6}
|
||||
; CHECK: mov r6, r8
|
||||
; CHECK: push {r6}
|
||||
tail call void asm sideeffect "", "r,r,r,r,~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"(i32 %a, i32 %b, i32 %c, i32 %d)
|
||||
%e = tail call i8* @llvm.returnaddress(i32 0)
|
||||
%f = ptrtoint i8* %e to i32
|
||||
%vecinit = insertelement <4 x i32> undef, i32 %f, i32 0
|
||||
%vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1
|
||||
%vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2
|
||||
%vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3
|
||||
; CHECK: pop {r4, r5, r6}
|
||||
; CHECK: mov r8, r4
|
||||
; CHECK: mov r9, r5
|
||||
; CHECK: mov r10, r6
|
||||
; CHECK: pop {r4}
|
||||
; CHECK: mov r11, r4
|
||||
; CHECK: pop {r4, r5, r6, r7, pc}
|
||||
ret <4 x i32> %vecinit13
|
||||
}
|
||||
|
||||
; When a base pointer is being used, we can safely use it for saving/restoring
|
||||
; the high regs because it is set after the last push, and not used at all in the
|
||||
; epliogue. We can also use r4 for restoring the registers despite it also being
|
||||
; used when restoring sp from fp, as that happens before the first pop.
|
||||
define <4 x i32> @base_pointer(i32 %a) {
|
||||
; CHECK-LABEL: base_pointer:
|
||||
entry:
|
||||
; CHECK: push {r4, r6, r7, lr}
|
||||
; CHECK: add r7, sp, #8
|
||||
; CHECK: mov lr, r9
|
||||
; CHECK: mov r6, r8
|
||||
; CHECK: push {r6, lr}
|
||||
; CHECK: mov r6, sp
|
||||
%b = alloca i32, i32 %a
|
||||
call void asm sideeffect "", "r,~{r8},~{r9}"(i32* %b)
|
||||
%vecinit = insertelement <4 x i32> undef, i32 1, i32 0
|
||||
%vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1
|
||||
%vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2
|
||||
%vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3
|
||||
; CHECK: subs r4, r7, #7
|
||||
; CHECK: subs r4, #9
|
||||
; CHECK: mov sp, r4
|
||||
; CHECK: pop {r4, r6}
|
||||
; CHECK: mov r8, r4
|
||||
; CHECK: mov r9, r6
|
||||
; CHECK: pop {r4, r6, r7, pc}
|
||||
ret <4 x i32> %vecinit13
|
||||
}
|
|
@ -1,9 +1,9 @@
|
|||
; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=ALIGN4
|
||||
; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=ALIGN8
|
||||
; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-apple-ios
|
||||
; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s
|
||||
; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=ALIGN4
|
||||
; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-none-eabi
|
||||
; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s
|
||||
; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=ALIGN8
|
||||
|
||||
; Largest stack for which a single tADDspi/tSUBspi is enough
|
||||
define void @test1() {
|
||||
|
@ -33,7 +33,9 @@ define void @test100_nofpelim() "no-frame-pointer-elim"="true" {
|
|||
; CHECK: sub sp, #508
|
||||
; CHECK: sub sp, #508
|
||||
; CHECK: sub sp, #508
|
||||
; CHECK: subs r4, r7, #4
|
||||
; ALIGN4: subs r4, r7, #4
|
||||
; ALIGN8: subs r4, r7, #7
|
||||
; ALIGN8: subs r4, #1
|
||||
; CHECK: mov sp, r4
|
||||
%tmp = alloca [ 1524 x i8 ] , align 4
|
||||
ret void
|
||||
|
@ -55,7 +57,9 @@ define void @test2_nofpelim() "no-frame-pointer-elim"="true" {
|
|||
; CHECK-LABEL: test2_nofpelim:
|
||||
; CHECK: ldr [[TEMP:r[0-7]]],
|
||||
; CHECK: add sp, [[TEMP]]
|
||||
; CHECK: subs r4, r7, #4
|
||||
; ALIGN4: subs r4, r7, #4
|
||||
; ALIGN8: subs r4, r7, #7
|
||||
; ALIGN8: subs r4, #1
|
||||
; CHECK: mov sp, r4
|
||||
%tmp = alloca [ 1528 x i8 ] , align 4
|
||||
ret void
|
||||
|
|
|
@ -27,9 +27,9 @@ define void @leaf_nofpelim() "no-frame-pointer-elim"="true" {
|
|||
; need to use a frame pointer.
|
||||
define void @leaf_lowreg_nofpelim() "no-frame-pointer-elim"="true" {
|
||||
; CHECK-LABEL: leaf_lowreg_nofpelim:
|
||||
; CHECK: push {r4, r7, lr}
|
||||
; CHECK: add r7, sp, #4
|
||||
; CHECK: pop {r4, r7, pc}
|
||||
; CHECK: push {r4, r6, r7, lr}
|
||||
; CHECK: add r7, sp, #8
|
||||
; CHECK: pop {r4, r6, r7, pc}
|
||||
call void asm sideeffect "", "~{r4}" ()
|
||||
ret void
|
||||
}
|
||||
|
@ -40,11 +40,11 @@ define void @leaf_lowreg_nofpelim() "no-frame-pointer-elim"="true" {
|
|||
; the stack.
|
||||
define void @leaf_highreg_nofpelim() "no-frame-pointer-elim"="true" {
|
||||
; CHECK-LABEL: leaf_highreg_nofpelim:
|
||||
; CHECK: push {r7, lr}
|
||||
; CHECK: mov r7, sp
|
||||
; CHECK: push {r6, r7, lr}
|
||||
; CHECK: add r7, sp, #4
|
||||
; CHECK: str r8, [sp, #-4]!
|
||||
; CHECK: ldr r8, [sp], #4
|
||||
; CHECK: pop {r7, pc}
|
||||
; CHECK: pop {r6, r7, pc}
|
||||
call void asm sideeffect "", "~{r8}" ()
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue