forked from OSchip/llvm-project
[AArch64][ShrinkWrap] Fix bug in prolog clobbering live reg when shrink wrapping.
Summary: See bug https://llvm.org/bugs/show_bug.cgi?id=26642 Reviewers: qcolombet, t.p.northover Subscribers: aemerson, rengolin, mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D17350 llvm-svn: 261349
This commit is contained in:
parent
f6fee29ceb
commit
7e4ba3dc02
|
@ -250,6 +250,63 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
|
|||
}
|
||||
}
|
||||
|
||||
// Find a scratch register that we can use at the start of the prologue to
|
||||
// re-align the stack pointer. We avoid using callee-save registers since they
|
||||
// may appear to be free when this is called from canUseAsPrologue (during
|
||||
// shrink wrapping), but then no longer be free when this is called from
|
||||
// emitPrologue.
|
||||
//
|
||||
// FIXME: This is a bit conservative, since in the above case we could use one
|
||||
// of the callee-save registers as a scratch temp to re-align the stack pointer,
|
||||
// but we would then have to make sure that we were in fact saving at least one
|
||||
// callee-save register in the prologue, which is additional complexity that
|
||||
// doesn't seem worth the benefit.
|
||||
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
|
||||
MachineFunction *MF = MBB->getParent();
|
||||
|
||||
// If MBB is an entry block, use X9 as the scratch register
|
||||
if (&MF->front() == MBB)
|
||||
return AArch64::X9;
|
||||
|
||||
RegScavenger RS;
|
||||
RS.enterBasicBlock(MBB);
|
||||
|
||||
// Prefer X9 since it was historically used for the prologue scratch reg.
|
||||
if (!RS.isRegUsed(AArch64::X9))
|
||||
return AArch64::X9;
|
||||
|
||||
// Find a free non callee-save reg.
|
||||
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
|
||||
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF);
|
||||
BitVector CalleeSaveRegs(RegInfo->getNumRegs());
|
||||
for (unsigned i = 0; CSRegs[i]; ++i)
|
||||
CalleeSaveRegs.set(CSRegs[i]);
|
||||
|
||||
BitVector Available = RS.getRegsAvailable(&AArch64::GPR64RegClass);
|
||||
for (int AvailReg = Available.find_first(); AvailReg != -1;
|
||||
AvailReg = Available.find_next(AvailReg))
|
||||
if (!CalleeSaveRegs.test(AvailReg))
|
||||
return AvailReg;
|
||||
|
||||
return AArch64::NoRegister;
|
||||
}
|
||||
|
||||
bool AArch64FrameLowering::canUseAsPrologue(
|
||||
const MachineBasicBlock &MBB) const {
|
||||
const MachineFunction *MF = MBB.getParent();
|
||||
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
|
||||
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
|
||||
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||
|
||||
// Don't need a scratch register if we're not going to re-align the stack.
|
||||
if (!RegInfo->needsStackRealignment(*MF))
|
||||
return true;
|
||||
// Otherwise, we can use any block as long as it has a scratch register
|
||||
// available.
|
||||
return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
|
||||
}
|
||||
|
||||
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
|
@ -331,8 +388,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
|||
const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
|
||||
unsigned scratchSPReg = AArch64::SP;
|
||||
if (NumBytes && NeedsRealignment) {
|
||||
// Use the first callee-saved register as a scratch register.
|
||||
scratchSPReg = AArch64::X9;
|
||||
scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
|
||||
assert(scratchSPReg != AArch64::NoRegister);
|
||||
}
|
||||
|
||||
// If we're a leaf function, try using the red zone.
|
||||
|
@ -926,19 +983,14 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||
if (RegInfo->hasBasePointer(MF))
|
||||
BasePointerReg = RegInfo->getBaseRegister();
|
||||
|
||||
unsigned StackAlignReg = AArch64::NoRegister;
|
||||
if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
|
||||
StackAlignReg = AArch64::X9;
|
||||
|
||||
bool ExtraCSSpill = false;
|
||||
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
|
||||
// Figure out which callee-saved registers to save/restore.
|
||||
for (unsigned i = 0; CSRegs[i]; ++i) {
|
||||
const unsigned Reg = CSRegs[i];
|
||||
|
||||
// Add the stack re-align scratch register and base pointer register to
|
||||
// SavedRegs set only if they are callee-save.
|
||||
if (Reg == BasePointerReg || Reg == StackAlignReg)
|
||||
// Add the base pointer register to SavedRegs if it is callee-save.
|
||||
if (Reg == BasePointerReg)
|
||||
SavedRegs.set(Reg);
|
||||
|
||||
bool RegUsed = SavedRegs.test(Reg);
|
||||
|
|
|
@ -37,6 +37,8 @@ public:
|
|||
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
|
||||
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
|
||||
|
||||
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
|
||||
|
||||
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg) const override;
|
||||
int resolveFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
|
|
|
@ -630,3 +630,92 @@ loop2b: ; preds = %loop1
|
|||
end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Re-aligned stack pointer. See bug 26642. Avoid clobbering live
|
||||
; values in the prologue when re-aligning the stack pointer.
|
||||
; CHECK-LABEL: stack_realign:
|
||||
; ENABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
|
||||
; ENABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
|
||||
; DISABLE-NOT: lsl w[[LSL1:[0-9]+]], w0, w1
|
||||
; DISABLE-NOT: lsl w[[LSL2:[0-9]+]], w1, w0
|
||||
; CHECK: stp x29, x30, [sp, #-16]!
|
||||
; CHECK: mov x29, sp
|
||||
; ENABLE-NOT: sub x[[LSL1]], sp, #16
|
||||
; ENABLE-NOT: sub x[[LSL2]], sp, #16
|
||||
; DISABLE: sub x{{[0-9]+}}, sp, #16
|
||||
; DISABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
|
||||
; DISABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
|
||||
; CHECK-DAG: str w[[LSL1]],
|
||||
; CHECK-DAG: str w[[LSL2]],
|
||||
|
||||
define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) {
|
||||
%tmp = alloca i32, align 32
|
||||
%shl1 = shl i32 %a, %b
|
||||
%shl2 = shl i32 %b, %a
|
||||
%tmp2 = icmp slt i32 %a, %b
|
||||
br i1 %tmp2, label %true, label %false
|
||||
|
||||
true:
|
||||
store i32 %a, i32* %tmp, align 4
|
||||
%tmp4 = load i32, i32* %tmp
|
||||
br label %false
|
||||
|
||||
false:
|
||||
%tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
|
||||
store i32 %shl1, i32* %ptr1
|
||||
store i32 %shl2, i32* %ptr2
|
||||
ret i32 %tmp.0
|
||||
}
|
||||
|
||||
; Re-aligned stack pointer with all caller-save regs live. See bug
|
||||
; 26642. In this case we currently avoid shrink wrapping because
|
||||
; ensuring we have a scratch register to re-align the stack pointer is
|
||||
; too complicated. Output should be the same for both enabled and
|
||||
; disabled shrink wrapping.
|
||||
; CHECK-LABEL: stack_realign2:
|
||||
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #-{{[0-9]+}}]!
|
||||
; CHECK: add x29, sp, #{{[0-9]+}}
|
||||
; CHECK: lsl {{w[0-9]+}}, w0, w1
|
||||
|
||||
define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) {
|
||||
%tmp = alloca i32, align 32
|
||||
%tmp1 = shl i32 %a, %b
|
||||
%tmp2 = shl i32 %b, %a
|
||||
%tmp3 = lshr i32 %a, %b
|
||||
%tmp4 = lshr i32 %b, %a
|
||||
%tmp5 = add i32 %b, %a
|
||||
%tmp6 = sub i32 %b, %a
|
||||
%tmp7 = add i32 %tmp1, %tmp2
|
||||
%tmp8 = sub i32 %tmp2, %tmp3
|
||||
%tmp9 = add i32 %tmp3, %tmp4
|
||||
%tmp10 = add i32 %tmp4, %tmp5
|
||||
%cmp = icmp slt i32 %a, %b
|
||||
br i1 %cmp, label %true, label %false
|
||||
|
||||
true:
|
||||
store i32 %a, i32* %tmp, align 4
|
||||
call void asm sideeffect "nop", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind
|
||||
br label %false
|
||||
|
||||
false:
|
||||
store i32 %tmp1, i32* %ptr1, align 4
|
||||
store i32 %tmp2, i32* %ptr2, align 4
|
||||
store i32 %tmp3, i32* %ptr3, align 4
|
||||
store i32 %tmp4, i32* %ptr4, align 4
|
||||
store i32 %tmp5, i32* %ptr5, align 4
|
||||
store i32 %tmp6, i32* %ptr6, align 4
|
||||
%idx1 = getelementptr inbounds i32, i32* %ptr1, i64 1
|
||||
store i32 %a, i32* %idx1, align 4
|
||||
%idx2 = getelementptr inbounds i32, i32* %ptr1, i64 2
|
||||
store i32 %b, i32* %idx2, align 4
|
||||
%idx3 = getelementptr inbounds i32, i32* %ptr1, i64 3
|
||||
store i32 %tmp7, i32* %idx3, align 4
|
||||
%idx4 = getelementptr inbounds i32, i32* %ptr1, i64 4
|
||||
store i32 %tmp8, i32* %idx4, align 4
|
||||
%idx5 = getelementptr inbounds i32, i32* %ptr1, i64 5
|
||||
store i32 %tmp9, i32* %idx5, align 4
|
||||
%idx6 = getelementptr inbounds i32, i32* %ptr1, i64 6
|
||||
store i32 %tmp10, i32* %idx6, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue