[X86] Use BuildStackAdjustment in stack probes

This has the advantage of dealing with live EFLAGS, using LEA instead of
SUB if needed to avoid clobbering. That also respects feature "lea-sp".

We could allow unrolled stack probing from blocks with live-EFLAGS, if
canUseAsEpilogue learns when emitStackProbeInlineGeneric will be used.

Differential Revision: https://reviews.llvm.org/D134495
This commit is contained in:
Josh Stone 2022-09-22 12:56:20 -07:00
parent 26c37b461a
commit cb46ffdbf4
2 changed files with 39 additions and 38 deletions

View File

@ -660,7 +660,6 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
const bool HasFP = hasFP(MF);
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
@ -671,16 +670,14 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
// If the offset is so small it fits within a page, there's nothing to do.
if (StackProbeSize < Offset + AlignOffset) {
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize - AlignOffset)
.setMIFlag(MachineInstr::FrameSetup);
uint64_t StackAdjustment = StackProbeSize - AlignOffset;
BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
.setMIFlag(MachineInstr::FrameSetup);
if (!HasFP && NeedsDwarfCFI) {
BuildCFI(MBB, MBBI, DL,
MCCFIInstruction::createAdjustCfaOffset(
nullptr, StackProbeSize - AlignOffset));
BuildCFI(
MBB, MBBI, DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
}
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
.setMIFlag(MachineInstr::FrameSetup),
@ -695,11 +692,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
// natural probes but it implies much more logic and there was very few
// interesting natural probes to interleave.
while (CurrentOffset + StackProbeSize < Offset) {
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
.setMIFlag(MachineInstr::FrameSetup);
if (!HasFP && NeedsDwarfCFI) {
BuildCFI(
@ -717,13 +711,20 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
// No need to probe the tail, it is smaller than a Page.
uint64_t ChunkSize = Offset - CurrentOffset;
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(ChunkSize)
.setMIFlag(MachineInstr::FrameSetup);
if (ChunkSize == SlotSize) {
// Use push for slot sized adjustments as a size optimization,
// like emitSPUpdate does when not probing.
unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
BuildMI(MBB, MBBI, DL, TII.get(Opc))
.addReg(Reg, RegState::Undef)
.setMIFlag(MachineInstr::FrameSetup);
} else {
BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
.setMIFlag(MachineInstr::FrameSetup);
}
// No need to adjust Dwarf CFA offset here, the last position of the stack has
// been defined
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
}
void X86FrameLowering::emitStackProbeInlineGenericLoop(
@ -732,6 +733,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
uint64_t AlignOffset) const {
assert(Offset && "null offset");
assert(!MBB.isLiveIn(X86::EFLAGS) &&
"Inline stack probe loop will clobber live EFLAGS.");
const bool NeedsDwarfCFI = needsDwarfCFI(MF);
const bool HasFP = hasFP(MF);
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
@ -742,12 +746,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
if (AlignOffset) {
if (AlignOffset < StackProbeSize) {
// Perform a first smaller allocation followed by a probe.
const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset);
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)
.addImm(AlignOffset)
.setMIFlag(MachineInstr::FrameSetup);
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
.setMIFlag(MachineInstr::FrameSetup);
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
.setMIFlag(MachineInstr::FrameSetup),
@ -806,13 +806,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
}
// allocate a page
{
const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
}
BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
/*InEpilogue=*/false)
.setMIFlag(MachineInstr::FrameSetup);
// touch the page
addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
@ -841,13 +837,11 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
MBB.addSuccessor(testMBB);
// handle tail
const unsigned TailOffset = Offset % StackProbeSize;
const uint64_t TailOffset = Offset % StackProbeSize;
MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
if (TailOffset) {
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset);
BuildMI(*tailMBB, TailMBBIter, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(TailOffset)
BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
/*InEpilogue=*/false)
.setMIFlag(MachineInstr::FrameSetup);
}
@ -879,6 +873,9 @@ void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
const TargetInstrInfo &TII = *STI.getInstrInfo();
const BasicBlock *LLVM_BB = MBB.getBasicBlock();
assert(!MBB.isLiveIn(X86::EFLAGS) &&
"Inline stack probe loop will clobber live EFLAGS.");
// RAX contains the number of bytes of desired stack adjustment.
// The handling here assumes this value has already been updated so as to
// maintain stack alignment.
@ -1115,6 +1112,9 @@ void X86FrameLowering::emitStackProbeCall(
report_fatal_error("Emitting stack probe calls on 64-bit with the large "
"code model and indirect thunks not yet implemented.");
assert(!MBB.isLiveIn(X86::EFLAGS) &&
"Stack probe calls will clobber live EFLAGS.");
unsigned CallOp;
if (Is64Bit)
CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
@ -3510,6 +3510,7 @@ bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
return true;
// If stack probes have to loop inline or call, that will clobber EFLAGS.
// FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();
if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))

View File

@ -13,7 +13,7 @@ define i32 @f(i32 %a, i32 %b) #0 {
; CHECK: # %bb.0: # %bb13.i
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $8, %rsp
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: je .LBB0_1
; CHECK-NEXT: # %bb.2: # %bb16.i