forked from OSchip/llvm-project
[X86] Use BuildStackAdjustment in stack probes
This has the advantage of dealing with live EFLAGS, using LEA instead of SUB if needed to avoid clobbering. That also respects feature "lea-sp". We could allow unrolled stack probing from blocks with live-EFLAGS, if canUseAsEpilogue learns when emitStackProbeInlineGeneric will be used. Differential Revision: https://reviews.llvm.org/D134495
This commit is contained in:
parent
26c37b461a
commit
cb46ffdbf4
|
@ -660,7 +660,6 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
|
|||
const bool HasFP = hasFP(MF);
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
const X86TargetLowering &TLI = *STI.getTargetLowering();
|
||||
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
|
||||
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
|
||||
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
|
||||
|
||||
|
@ -671,16 +670,14 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
|
|||
// If the offset is so small it fits within a page, there's nothing to do.
|
||||
if (StackProbeSize < Offset + AlignOffset) {
|
||||
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(StackProbeSize - AlignOffset)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
uint64_t StackAdjustment = StackProbeSize - AlignOffset;
|
||||
BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
if (!HasFP && NeedsDwarfCFI) {
|
||||
BuildCFI(MBB, MBBI, DL,
|
||||
MCCFIInstruction::createAdjustCfaOffset(
|
||||
nullptr, StackProbeSize - AlignOffset));
|
||||
BuildCFI(
|
||||
MBB, MBBI, DL,
|
||||
MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
|
||||
}
|
||||
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
|
||||
|
||||
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
|
||||
.setMIFlag(MachineInstr::FrameSetup),
|
||||
|
@ -695,11 +692,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
|
|||
// natural probes but it implies much more logic and there was very few
|
||||
// interesting natural probes to interleave.
|
||||
while (CurrentOffset + StackProbeSize < Offset) {
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(StackProbeSize)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
|
||||
BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
||||
if (!HasFP && NeedsDwarfCFI) {
|
||||
BuildCFI(
|
||||
|
@ -717,13 +711,20 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
|
|||
|
||||
// No need to probe the tail, it is smaller than a Page.
|
||||
uint64_t ChunkSize = Offset - CurrentOffset;
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(ChunkSize)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
if (ChunkSize == SlotSize) {
|
||||
// Use push for slot sized adjustments as a size optimization,
|
||||
// like emitSPUpdate does when not probing.
|
||||
unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
|
||||
unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
|
||||
BuildMI(MBB, MBBI, DL, TII.get(Opc))
|
||||
.addReg(Reg, RegState::Undef)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
} else {
|
||||
BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
// No need to adjust Dwarf CFA offset here, the last position of the stack has
|
||||
// been defined
|
||||
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
|
||||
}
|
||||
|
||||
void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
||||
|
@ -732,6 +733,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
|||
uint64_t AlignOffset) const {
|
||||
assert(Offset && "null offset");
|
||||
|
||||
assert(!MBB.isLiveIn(X86::EFLAGS) &&
|
||||
"Inline stack probe loop will clobber live EFLAGS.");
|
||||
|
||||
const bool NeedsDwarfCFI = needsDwarfCFI(MF);
|
||||
const bool HasFP = hasFP(MF);
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
|
@ -742,12 +746,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
|||
if (AlignOffset) {
|
||||
if (AlignOffset < StackProbeSize) {
|
||||
// Perform a first smaller allocation followed by a probe.
|
||||
const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset);
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(AlignOffset)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
|
||||
BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
||||
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
|
||||
.setMIFlag(MachineInstr::FrameSetup),
|
||||
|
@ -806,13 +806,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
|||
}
|
||||
|
||||
// allocate a page
|
||||
{
|
||||
const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
|
||||
BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(StackProbeSize)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
|
||||
/*InEpilogue=*/false)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
||||
// touch the page
|
||||
addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
|
||||
|
@ -841,13 +837,11 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
|||
MBB.addSuccessor(testMBB);
|
||||
|
||||
// handle tail
|
||||
const unsigned TailOffset = Offset % StackProbeSize;
|
||||
const uint64_t TailOffset = Offset % StackProbeSize;
|
||||
MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
|
||||
if (TailOffset) {
|
||||
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset);
|
||||
BuildMI(*tailMBB, TailMBBIter, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(TailOffset)
|
||||
BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
|
||||
/*InEpilogue=*/false)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
|
@ -879,6 +873,9 @@ void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
|
|||
const TargetInstrInfo &TII = *STI.getInstrInfo();
|
||||
const BasicBlock *LLVM_BB = MBB.getBasicBlock();
|
||||
|
||||
assert(!MBB.isLiveIn(X86::EFLAGS) &&
|
||||
"Inline stack probe loop will clobber live EFLAGS.");
|
||||
|
||||
// RAX contains the number of bytes of desired stack adjustment.
|
||||
// The handling here assumes this value has already been updated so as to
|
||||
// maintain stack alignment.
|
||||
|
@ -1115,6 +1112,9 @@ void X86FrameLowering::emitStackProbeCall(
|
|||
report_fatal_error("Emitting stack probe calls on 64-bit with the large "
|
||||
"code model and indirect thunks not yet implemented.");
|
||||
|
||||
assert(!MBB.isLiveIn(X86::EFLAGS) &&
|
||||
"Stack probe calls will clobber live EFLAGS.");
|
||||
|
||||
unsigned CallOp;
|
||||
if (Is64Bit)
|
||||
CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
|
||||
|
@ -3510,6 +3510,7 @@ bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
|
|||
return true;
|
||||
|
||||
// If stack probes have to loop inline or call, that will clobber EFLAGS.
|
||||
// FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
const X86TargetLowering &TLI = *STI.getTargetLowering();
|
||||
if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
|
||||
|
|
|
@ -13,7 +13,7 @@ define i32 @f(i32 %a, i32 %b) #0 {
|
|||
; CHECK: # %bb.0: # %bb13.i
|
||||
; CHECK-NEXT: pushq %rbp
|
||||
; CHECK-NEXT: pushq %rbx
|
||||
; CHECK-NEXT: subq $8, %rsp
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: je .LBB0_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb16.i
|
||||
|
|
Loading…
Reference in New Issue