[AArch64] Reduce number of callee-save save/restores.

Summary:
Before this change, callee-save registers would be rounded up to even
pairs of GPRs and FPRs.  This change eliminates these extra padding
load/stores, though it does keep the stack allocation the same size
unless both the GPR and FPR sets have an odd size, in which case one
full pair stack slot (16 bytes) is saved.

This optimization cannot currently be done for MachO targets since they
rely on a fast-path .debug_frame equivalent that can only encode
callee-save registers as pairs.

Reviewers: t.p.northover, rengolin, mcrosier, jmolloy

Subscribers: aemerson, rengolin, mcrosier, llvm-commits

Differential Revision: http://reviews.llvm.org/D17000

llvm-svn: 260689
This commit is contained in:
Geoff Berry 2016-02-12 16:31:41 +00:00
parent bdb04d9032
commit c25d3bd238
5 changed files with 384 additions and 143 deletions

View File

@ -675,21 +675,41 @@ struct RegPairInfo {
int FrameIdx;
int Offset;
bool IsGPR;
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
};
static void
computeCalleeSaveRegisterPairs(const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI,
SmallVectorImpl<RegPairInfo> &RegPairs) {
static void computeCalleeSaveRegisterPairs(
MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
if (CSI.empty())
return;
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Count = CSI.size();
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
// MachO's compact unwind format relies on all registers being stored in
// pairs.
assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() ||
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
unsigned Offset = AFI->getCalleeSavedStackSize();
for (unsigned i = 0; i < Count; i += 2) {
unsigned idx = Count - i - 2;
for (unsigned i = 0; i < Count; ++i) {
RegPairInfo RPI;
RPI.Reg1 = CSI[idx].getReg();
RPI.Reg2 = CSI[idx + 1].getReg();
RPI.Reg1 = CSI[i].getReg();
assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
AArch64::FPR64RegClass.contains(RPI.Reg1));
RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
// Add the next reg to the pair if it is in the same register class.
if (i + 1 < Count) {
unsigned NextReg = CSI[i + 1].getReg();
if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
(!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
RPI.Reg2 = NextReg;
}
// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
// list to come in sorted by frame index so that we can issue the store
@ -697,26 +717,45 @@ computeCalleeSaveRegisterPairs(const std::vector<CalleeSavedInfo> &CSI,
//
// The order of the registers in the list is controlled by
// getCalleeSavedRegs(), so they will always be in-order, as well.
assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
assert((!RPI.isPaired() ||
(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
"Out of order callee saved regs!");
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
RPI.FrameIdx = CSI[idx + 1].getFrameIdx();
if (AArch64::GPR64RegClass.contains(RPI.Reg1))
RPI.IsGPR = true;
else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
RPI.IsGPR = false;
else
llvm_unreachable("Unexpected callee saved register!");
// Compute offset: i = 0 => offset = Count;
// i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
RPI.Offset = (i == 0) ? Count : i;
// MachO's compact unwind format relies on all registers being stored in
// adjacent register pairs.
assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() ||
(RPI.isPaired() &&
((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
RPI.Reg1 + 1 == RPI.Reg2))) &&
"Callee-save registers not saved as adjacent register pair!");
RPI.FrameIdx = CSI[i].getFrameIdx();
if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
Offset -= 16;
assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
MFI->setObjectSize(RPI.FrameIdx, 16);
} else
Offset -= RPI.isPaired() ? 16 : 8;
assert(Offset % 8 == 0);
RPI.Offset = Offset / 8;
assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
"Offset out of bounds for LDP/STP immediate");
RegPairs.push_back(RPI);
if (RPI.isPaired())
++i;
}
// Align first offset to even 16-byte boundary to avoid additional SP
// adjustment instructions.
// Last pair offset is size of whole callee-save region for SP
// pre-dec/post-inc.
RegPairInfo &LastPair = RegPairs.back();
assert(AFI->getCalleeSavedStackSize() % 8 == 0);
LastPair.Offset = AFI->getCalleeSavedStackSize() / 8;
}
bool AArch64FrameLowering::spillCalleeSavedRegisters(
@ -728,9 +767,9 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs);
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
++RPII) {
RegPairInfo RPI = *RPII;
unsigned Reg1 = RPI.Reg1;
@ -746,36 +785,48 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Rationale: This sequence saves uop updates compared to a sequence of
// pre-increment spills like stp xi,xj,[sp,#-16]!
// Note: Similar rationale and sequence for restores in epilog.
bool BumpSP = RPII == RegPairs.begin();
bool BumpSP = RPII == RegPairs.rbegin();
if (RPI.IsGPR) {
// For first spill use pre-increment store.
if (BumpSP)
StrOpc = AArch64::STPXpre;
StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre;
else
StrOpc = AArch64::STPXi;
StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
} else {
// For first spill use pre-increment store.
if (BumpSP)
StrOpc = AArch64::STPDpre;
StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre;
else
StrOpc = AArch64::STPDi;
StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
}
DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
<< TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx
<< ", " << RPI.FrameIdx+1 << ")\n");
DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
if (RPI.isPaired())
dbgs() << ", " << TRI->getName(Reg2);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired())
dbgs() << ", " << RPI.FrameIdx+1;
dbgs() << ")\n");
const int Offset = BumpSP ? -RPI.Offset : RPI.Offset;
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
if (BumpSP)
MIB.addReg(AArch64::SP, RegState::Define);
MBB.addLiveIn(Reg1);
MBB.addLiveIn(Reg2);
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
if (RPI.isPaired()) {
MBB.addLiveIn(Reg1);
MBB.addLiveIn(Reg2);
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
.addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
.setMIFlag(MachineInstr::FrameSetup);
} else {
MBB.addLiveIn(Reg1);
MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
.addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled
.setMIFlag(MachineInstr::FrameSetup);
}
}
return true;
}
@ -792,9 +843,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
if (MI != MBB.end())
DL = MI->getDebugLoc();
computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs);
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
++RPII) {
RegPairInfo RPI = *RPII;
unsigned Reg1 = RPI.Reg1;
@ -808,33 +859,43 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
// ldp x22, x21, [sp], #48 // addImm(+6)
// Note: see comment in spillCalleeSavedRegisters()
unsigned LdrOpc;
bool BumpSP = RPII == std::prev(RegPairs.rend());
bool BumpSP = RPII == std::prev(RegPairs.end());
if (RPI.IsGPR) {
if (BumpSP)
LdrOpc = AArch64::LDPXpost;
LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost;
else
LdrOpc = AArch64::LDPXi;
LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
} else {
if (BumpSP)
LdrOpc = AArch64::LDPDpost;
LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost;
else
LdrOpc = AArch64::LDPDi;
LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
}
DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
<< TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx
<< ", " << RPI.FrameIdx+1 << ")\n");
DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
if (RPI.isPaired())
dbgs() << ", " << TRI->getName(Reg2);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired())
dbgs() << ", " << RPI.FrameIdx+1;
dbgs() << ")\n");
const int Offset = RPI.Offset;
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
if (BumpSP)
MIB.addReg(AArch64::SP, RegState::Define);
MIB.addReg(Reg2, getDefRegState(true))
if (RPI.isPaired())
MIB.addReg(Reg2, getDefRegState(true))
.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
.addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8]
// where the factor * 8 is implicit
.setMIFlag(MachineInstr::FrameDestroy);
else
MIB.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
.addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled
.setMIFlag(MachineInstr::FrameDestroy);
}
return true;
}
@ -851,8 +912,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
SmallVector<unsigned, 4> UnspilledCSGPRs;
SmallVector<unsigned, 4> UnspilledCSFPRs;
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
// The frame record needs to be created by saving the appropriate registers
if (hasFP(MF)) {
@ -860,80 +922,54 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(AArch64::LR);
}
// Spill the BasePtr if it's used. Do this first thing so that the
// getCalleeSavedRegs() below will get the right answer.
unsigned BasePointerReg = AArch64::NoRegister;
if (RegInfo->hasBasePointer(MF))
SavedRegs.set(RegInfo->getBaseRegister());
BasePointerReg = RegInfo->getBaseRegister();
unsigned StackAlignReg = AArch64::NoRegister;
if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
SavedRegs.set(AArch64::X9);
StackAlignReg = AArch64::X9;
// If any callee-saved registers are used, the frame cannot be eliminated.
unsigned NumGPRSpilled = 0;
unsigned NumFPRSpilled = 0;
bool ExtraCSSpill = false;
bool CanEliminateFrame = true;
DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
const unsigned Reg = CSRegs[i];
// Check pairs of consecutive callee-saved registers.
for (unsigned i = 0; CSRegs[i]; i += 2) {
assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
// Add the stack re-align scratch register and base pointer register to
// SavedRegs set only if they are callee-save.
if (Reg == BasePointerReg || Reg == StackAlignReg)
SavedRegs.set(Reg);
const unsigned OddReg = CSRegs[i];
const unsigned EvenReg = CSRegs[i + 1];
assert((AArch64::GPR64RegClass.contains(OddReg) &&
AArch64::GPR64RegClass.contains(EvenReg)) ^
(AArch64::FPR64RegClass.contains(OddReg) &&
AArch64::FPR64RegClass.contains(EvenReg)) &&
"Register class mismatch!");
const bool OddRegUsed = SavedRegs.test(OddReg);
const bool EvenRegUsed = SavedRegs.test(EvenReg);
// Early exit if none of the registers in the register pair is actually
// used.
if (!OddRegUsed && !EvenRegUsed) {
if (AArch64::GPR64RegClass.contains(OddReg)) {
UnspilledCSGPRs.push_back(OddReg);
UnspilledCSGPRs.push_back(EvenReg);
} else {
UnspilledCSFPRs.push_back(OddReg);
UnspilledCSFPRs.push_back(EvenReg);
bool RegUsed = SavedRegs.test(Reg);
unsigned PairedReg = CSRegs[i ^ 1];
if (!RegUsed) {
if (AArch64::GPR64RegClass.contains(Reg) &&
!RegInfo->isReservedReg(MF, Reg)) {
UnspilledCSGPR = Reg;
UnspilledCSGPRPaired = PairedReg;
}
continue;
}
unsigned Reg = AArch64::NoRegister;
// If only one of the registers of the register pair is used, make sure to
// mark the other one as used as well.
if (OddRegUsed ^ EvenRegUsed) {
// Find out which register is the additional spill.
Reg = OddRegUsed ? EvenReg : OddReg;
SavedRegs.set(Reg);
// MachO's compact unwind format relies on all registers being stored in
// pairs.
// FIXME: the usual format is actually better if unwinding isn't needed.
if (Subtarget.isTargetMachO() && !SavedRegs.test(PairedReg)) {
SavedRegs.set(PairedReg);
ExtraCSSpill = true;
}
DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
(RegInfo->getEncodingValue(OddReg) + 1 ==
RegInfo->getEncodingValue(EvenReg))) &&
"Register pair of non-adjacent registers!");
if (AArch64::GPR64RegClass.contains(OddReg)) {
NumGPRSpilled += 2;
// If it's not a reserved register, we can use it in lieu of an
// emergency spill slot for the register scavenger.
// FIXME: It would be better to instead keep looking and choose another
// unspilled register that isn't reserved, if there is one.
if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
ExtraCSSpill = true;
} else
NumFPRSpilled += 2;
CanEliminateFrame = false;
}
DEBUG(dbgs() << "\n");
DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
for (int Reg = SavedRegs.find_first(); Reg != -1;
Reg = SavedRegs.find_next(Reg))
dbgs() << ' ' << PrintReg(Reg, RegInfo);
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
unsigned NumRegsSpilled = SavedRegs.count();
bool CanEliminateFrame = NumRegsSpilled == 0;
// FIXME: Set BigStack if any stack slot references may be out of range.
// For now, just conservatively guestimate based on unscaled indexing
@ -942,8 +978,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned CFSize =
MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled;
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
bool BigStack = (CFSize >= 256);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
@ -956,20 +991,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// above to keep the number of spills even, we don't need to do anything else
// here.
if (BigStack && !ExtraCSSpill) {
// If we're adding a register to spill here, we have to add two of them
// to keep the number of regs to spill even.
assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
unsigned Count = 0;
while (!UnspilledCSGPRs.empty() && Count < 2) {
unsigned Reg = UnspilledCSGPRs.back();
UnspilledCSGPRs.pop_back();
DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
<< " to get a scratch register.\n");
SavedRegs.set(Reg);
if (UnspilledCSGPR != AArch64::NoRegister) {
DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
<< " to get a scratch register.\n");
SavedRegs.set(UnspilledCSGPR);
// MachO's compact unwind format relies on all registers being stored in
// pairs, so if we need to spill one extra for BigStack, then we need to
// store the pair.
if (Subtarget.isTargetMachO())
SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = true;
++Count;
++NumGPRSpilled;
NumRegsSpilled = SavedRegs.count();
}
// If we didn't find an extra callee-saved register to spill, create
@ -983,5 +1015,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
AFI->setCalleeSavedStackSize(8 * (NumGPRSpilled + NumFPRSpilled));
// Round up to register pair alignment to avoid additional SP adjustment
// instructions.
AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
}

View File

@ -1,4 +1,5 @@
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO
; This test aims to check basic correctness of frame layout &
; frame access code. There are 8 functions in this test file,
@ -97,7 +98,7 @@ entry:
; CHECK-LABEL: novla_nodynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
; CHECK: stp x20, x19, [sp, #-32]!
; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@ -105,8 +106,7 @@ entry:
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
; CHECK: .cfi_offset w19, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
@ -114,10 +114,34 @@ entry:
; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
; Check epilogue:
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldp x20, x19, [sp], #32
; CHECK: ldr x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
; CHECK-MACHO-LABEL: _novla_nodynamicrealign_call:
; CHECK-MACHO: .cfi_startproc
; Check that used callee-saved registers are saved
; CHECK-MACHO: stp x20, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK-MACHO: stp x29, x30, [sp, #16]
; CHECK-MACHO: add x29, sp, #16
; Check correctness of cfi pseudo-instructions
; CHECK-MACHO: .cfi_def_cfa w29, 16
; CHECK-MACHO: .cfi_offset w30, -8
; CHECK-MACHO: .cfi_offset w29, -16
; CHECK-MACHO: .cfi_offset w19, -24
; CHECK-MACHO: .cfi_offset w20, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
; Check correct access to local variable on the stack, through stack pointer
; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp, #12]
; Check epilogue:
; CHECK-MACHO: ldp x29, x30, [sp, #16]
; CHECK-MACHO: ldp x20, x19, [sp], #32
; CHECK-MACHO: ret
; CHECK-MACHO: .cfi_endproc
declare i32 @g() #0
@ -159,7 +183,7 @@ entry:
; CHECK-LABEL: novla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
; CHECK: stp x20, x19, [sp, #-32]!
; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@ -170,8 +194,7 @@ entry:
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
; CHECK: .cfi_offset w19, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
@ -181,10 +204,39 @@ entry:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16 // =16
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldp x20, x19, [sp], #32
; CHECK: ldr x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
; CHECK-MACHO-LABEL: _novla_dynamicrealign_call:
; CHECK-MACHO: .cfi_startproc
; Check that used callee-saved registers are saved
; CHECK-MACHO: stp x20, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK-MACHO: stp x29, x30, [sp, #16]
; CHECK-MACHO: add x29, sp, #16
; Check the dynamic realignment of the stack pointer to a 128-byte boundary
; CHECK-MACHO: sub x9, sp, #96
; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
; Check correctness of cfi pseudo-instructions
; CHECK-MACHO: .cfi_def_cfa w29, 16
; CHECK-MACHO: .cfi_offset w30, -8
; CHECK-MACHO: .cfi_offset w29, -16
; CHECK-MACHO: .cfi_offset w19, -24
; CHECK-MACHO: .cfi_offset w20, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
; Check correct access to local variable on the stack, through re-aligned stack pointer
; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK-MACHO: sub sp, x29, #16
; CHECK-MACHO: ldp x29, x30, [sp, #16]
; CHECK-MACHO: ldp x20, x19, [sp], #32
; CHECK-MACHO: ret
; CHECK-MACHO: .cfi_endproc
; Function Attrs: nounwind
define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@ -336,7 +388,7 @@ entry:
; CHECK-LABEL: vla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
; CHECK: stp x22, x21, [sp, #-48]!
; CHECK: str x21, [sp, #-48]!
; CHECK: stp x20, x19, [sp, #16]
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #32]
@ -354,8 +406,7 @@ entry:
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
; CHECK: .cfi_offset w21, -40
; CHECK: .cfi_offset w22, -48
; CHECK: .cfi_offset w21, -48
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
@ -376,10 +427,57 @@ entry:
; CHECK: sub sp, x29, #32
; CHECK: ldp x29, x30, [sp, #32]
; CHECK: ldp x20, x19, [sp, #16]
; CHECK: ldp x22, x21, [sp], #48
; CHECK: ldr x21, [sp], #48
; CHECK: ret
; CHECK: .cfi_endproc
; CHECK-MACHO-LABEL: _vla_dynamicrealign_call:
; CHECK-MACHO: .cfi_startproc
; Check that used callee-saved registers are saved
; CHECK-MACHO: stp x22, x21, [sp, #-48]!
; CHECK-MACHO: stp x20, x19, [sp, #16]
; Check that the frame pointer is created:
; CHECK-MACHO: stp x29, x30, [sp, #32]
; CHECK-MACHO: add x29, sp, #32
; Check that the stack pointer gets re-aligned to 128
; bytes & the base pointer (x19) gets initialized to
; this 128-byte aligned area for local variables &
; spill slots
; CHECK-MACHO: sub x9, sp, #80
; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
; CHECK-MACHO: mov x19, sp
; Check correctness of cfi pseudo-instructions
; CHECK-MACHO: .cfi_def_cfa w29, 16
; CHECK-MACHO: .cfi_offset w30, -8
; CHECK-MACHO: .cfi_offset w29, -16
; CHECK-MACHO: .cfi_offset w19, -24
; CHECK-MACHO: .cfi_offset w20, -32
; CHECK-MACHO: .cfi_offset w21, -40
; CHECK-MACHO: .cfi_offset w22, -48
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; and set-up of base pointer (x19).
; CHECK-MACHO: mov w9, w0
; CHECK-MACHO: mov x10, sp
; CHECK-MACHO: lsl x9, x9, #2
; CHECK-MACHO: add x9, x9, #15
; CHECK-MACHO: and x9, x9, #0x7fffffff0
; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK-MACHO: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through base pointer
; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK-MACHO: sub sp, x29, #32
; CHECK-MACHO: ldp x29, x30, [sp, #32]
; CHECK-MACHO: ldp x20, x19, [sp, #16]
; CHECK-MACHO: ldp x22, x21, [sp], #48
; CHECK-MACHO: ret
; CHECK-MACHO: .cfi_endproc
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@ -398,7 +496,7 @@ entry:
; CHECK-LABEL: vla_dynamicrealign_nocall
; Check that used callee-saved registers are saved
; CHECK: stp x20, x19, [sp, #-32]!
; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@ -428,9 +526,44 @@ entry:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldp x20, x19, [sp], #32
; CHECK: ldr x19, [sp], #32
; CHECK: ret
; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall:
; Check that used callee-saved registers are saved
; CHECK-MACHO: stp x20, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK-MACHO: stp x29, x30, [sp, #16]
; CHECK-MACHO: add x29, sp, #16
; Check that the stack pointer gets re-aligned to 128
; bytes & the base pointer (x19) gets initialized to
; this 128-byte aligned area for local variables &
; spill slots
; CHECK-MACHO: sub x9, sp, #96
; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
; CHECK-MACHO: mov x19, sp
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; and set-up of base pointer (x19).
; CHECK-MACHO: mov w9, w0
; CHECK-MACHO: mov x10, sp
; CHECK-MACHO: lsl x9, x9, #2
; CHECK-MACHO: add x9, x9, #15
; CHECK-MACHO: and x9, x9, #0x7fffffff0
; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK-MACHO: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through base pointer
; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK-MACHO: sub sp, x29, #16
; CHECK-MACHO: ldp x29, x30, [sp, #16]
; CHECK-MACHO: ldp x20, x19, [sp], #32
; CHECK-MACHO: ret
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@ -449,7 +582,7 @@ entry:
; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
; Check that used callee-saved registers are saved
; CHECK: stp x20, x19, [sp, #-32]!
; CHECK: stp x28, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@ -479,9 +612,44 @@ entry:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldp x20, x19, [sp], #32
; CHECK: ldp x28, x19, [sp], #32
; CHECK: ret
; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall_large_align:
; Check that used callee-saved registers are saved
; CHECK-MACHO: stp x20, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK-MACHO: stp x29, x30, [sp, #16]
; CHECK-MACHO: add x29, sp, #16
; Check that the stack pointer gets re-aligned to 128
; bytes & the base pointer (x19) gets initialized to
; this 128-byte aligned area for local variables &
; spill slots
; CHECK-MACHO: sub x9, sp, #7, lsl #12
; CHECK-MACHO: and sp, x9, #0xffffffffffff8000
; CHECK-MACHO: mov x19, sp
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; and set-up of base pointer (x19).
; CHECK-MACHO: mov w9, w0
; CHECK-MACHO: mov x10, sp
; CHECK-MACHO: lsl x9, x9, #2
; CHECK-MACHO: add x9, x9, #15
; CHECK-MACHO: and x9, x9, #0x7fffffff0
; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK-MACHO: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through base pointer
; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK-MACHO: sub sp, x29, #16
; CHECK-MACHO: ldp x29, x30, [sp, #16]
; CHECK-MACHO: ldp x20, x19, [sp], #32
; CHECK-MACHO: ret
define void @realign_conditional(i1 %b) {
entry:

View File

@ -1,4 +1,5 @@
; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
; RUN: llc -mtriple=arm64-apple-ios -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK-MACHO
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
declare void @use_addr(i8*)
@ -113,14 +114,21 @@ define void @test_variadic_alloca(i64 %n, ...) {
define void @test_alloca_large_frame(i64 %n) {
; CHECK-LABEL: test_alloca_large_frame:
; CHECK-MACHO-LABEL: test_alloca_large_frame:
; CHECK: stp x20, x19, [sp, #-32]!
; CHECK: stp x28, x19, [sp, #-32]!
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; CHECK: sub sp, sp, #1953, lsl #12
; CHECK: sub sp, sp, #512
; CHECK-MACHO: stp x20, x19, [sp, #-32]!
; CHECK-MACHO: stp x29, x30, [sp, #16]
; CHECK-MACHO: add x29, sp, #16
; CHECK-MACHO: sub sp, sp, #1953, lsl #12
; CHECK-MACHO: sub sp, sp, #512
%addr1 = alloca i8, i64 %n
%addr2 = alloca i64, i64 1000000
@ -130,7 +138,11 @@ define void @test_alloca_large_frame(i64 %n) {
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldp x20, x19, [sp], #32
; CHECK: ldp x28, x19, [sp], #32
; CHECK-MACHO: sub sp, x29, #16
; CHECK-MACHO: ldp x29, x30, [sp, #16]
; CHECK-MACHO: ldp x20, x19, [sp], #32
}
declare i8* @llvm.stacksave()

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
; rdar://9167275

View File

@ -1,4 +1,5 @@
; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck -check-prefix CHECK-NOTMACHO %s
;
; rdar://14075006
@ -23,6 +24,19 @@ define void @odd() nounwind {
; CHECK: ldp d11, d10, [sp, #32]
; CHECK: ldp d13, d12, [sp, #16]
; CHECK: ldp d15, d14, [sp], #144
; CHECK-NOTMACHO-LABEL: odd:
; CHECK-NOTMACHO: stp d14, d12, [sp, #-80]!
; CHECK-NOTMACHO: stp d10, d8, [sp, #16]
; CHECK-NOTMACHO: str x27, [sp, #32]
; CHECK-NOTMACHO: stp x25, x23, [sp, #48]
; CHECK-NOTMACHO: stp x21, x19, [sp, #64]
; CHECK-NOTMACHO: movz x0, #0x2a
; CHECK-NOTMACHO: ldp x21, x19, [sp, #64]
; CHECK-NOTMACHO: ldp x25, x23, [sp, #48]
; CHECK-NOTMACHO: ldr x27, [sp, #32]
; CHECK-NOTMACHO: ldp d10, d8, [sp, #16]
; CHECK-NOTMACHO: ldp d14, d12, [sp], #80
call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"() nounwind
ret void
}
@ -48,6 +62,19 @@ define void @even() nounwind {
; CHECK: ldp d11, d10, [sp, #32]
; CHECK: ldp d13, d12, [sp, #16]
; CHECK: ldp d15, d14, [sp], #144
; CHECK-NOTMACHO-LABEL: even:
; CHECK-NOTMACHO: stp d15, d13, [sp, #-80]!
; CHECK-NOTMACHO: stp d11, d9, [sp, #16]
; CHECK-NOTMACHO: str x28, [sp, #32]
; CHECK-NOTMACHO: stp x26, x24, [sp, #48]
; CHECK-NOTMACHO: stp x22, x20, [sp, #64]
; CHECK-NOTMACHO: movz x0, #0x2a
; CHECK-NOTMACHO: ldp x22, x20, [sp, #64]
; CHECK-NOTMACHO: ldp x26, x24, [sp, #48]
; CHECK-NOTMACHO: ldr x28, [sp, #32]
; CHECK-NOTMACHO: ldp d11, d9, [sp, #16]
; CHECK-NOTMACHO: ldp d15, d13, [sp], #80
call void asm sideeffect "mov x0, #42", "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"() nounwind
ret void
}