forked from OSchip/llvm-project
[X86] add dwarf information for loop stack probe
This patch is based on https://reviews.llvm.org/D99585. While inside the stack probing loop, temporarily change the CFA to be based on r11/eax, which are already used to hold the loop bound. The stack pointer cannot be used for CFI here as it changes during the loop, so it does not have a constant offset to the CFA. Co-authored-by: YangKeao <keao.yang@yahoo.com> Reviewed By: nagisa Differential Revision: https://reviews.llvm.org/D116628
This commit is contained in:
parent
f3a344d212
commit
a8ac117d98
llvm
|
@ -647,6 +647,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
|||
uint64_t AlignOffset) const {
|
||||
assert(Offset && "null offset");
|
||||
|
||||
const bool NeedsDwarfCFI = needsDwarfCFI(MF);
|
||||
const bool HasFP = hasFP(MF);
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
const X86TargetLowering &TLI = *STI.getTargetLowering();
|
||||
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
|
||||
|
@ -686,17 +688,36 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
|||
Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
|
||||
: Is64Bit ? X86::R11D
|
||||
: X86::EAX;
|
||||
|
||||
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
|
||||
.addReg(StackPtr)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
||||
// save loop bound
|
||||
{
|
||||
const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset);
|
||||
const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
|
||||
const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset);
|
||||
BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
|
||||
.addReg(FinalStackProbed)
|
||||
.addImm(Offset / StackProbeSize * StackProbeSize)
|
||||
.addImm(BoundOffset)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
||||
// while in the loop, use loop-invariant reg for CFI,
|
||||
// instead of the stack pointer, which changes during the loop
|
||||
if (!HasFP && NeedsDwarfCFI) {
|
||||
// x32 uses the same DWARF register numbers as x86-64,
|
||||
// so there isn't a register number for r11d, we must use r11 instead
|
||||
const Register DwarfFinalStackProbed =
|
||||
STI.isTarget64BitILP32()
|
||||
? Register(getX86SubSuperRegister(FinalStackProbed, 64))
|
||||
: FinalStackProbed;
|
||||
|
||||
BuildCFI(MBB, MBBI, DL,
|
||||
MCCFIInstruction::createDefCfaRegister(
|
||||
nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
|
||||
BuildCFI(MBB, MBBI, DL,
|
||||
MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
|
||||
}
|
||||
}
|
||||
|
||||
// allocate a page
|
||||
|
@ -735,15 +756,30 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
|||
MBB.addSuccessor(testMBB);
|
||||
|
||||
// handle tail
|
||||
unsigned TailOffset = Offset % StackProbeSize;
|
||||
const unsigned TailOffset = Offset % StackProbeSize;
|
||||
MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
|
||||
if (TailOffset) {
|
||||
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset);
|
||||
BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr)
|
||||
BuildMI(*tailMBB, TailMBBIter, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(TailOffset)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
// after the loop, switch back to stack pointer for CFI
|
||||
if (!HasFP && NeedsDwarfCFI) {
|
||||
// x32 uses the same DWARF register numbers as x86-64,
|
||||
// so there isn't a register number for esp, we must use rsp instead
|
||||
const Register DwarfStackPtr =
|
||||
STI.isTarget64BitILP32()
|
||||
? Register(getX86SubSuperRegister(StackPtr, 64))
|
||||
: Register(StackPtr);
|
||||
|
||||
BuildCFI(*tailMBB, TailMBBIter, DL,
|
||||
MCCFIInstruction::createDefCfaRegister(
|
||||
nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
|
||||
}
|
||||
|
||||
// Update Live In information
|
||||
recomputeLiveIns(*testMBB);
|
||||
recomputeLiveIns(*tailMBB);
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
|
||||
; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s
|
||||
; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s
|
||||
; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s
|
||||
|
@ -7,6 +8,8 @@ define i32 @foo() local_unnamed_addr #0 {
|
|||
; CHECK-X64: # %bb.0:
|
||||
; CHECK-X64-NEXT: movq %rsp, %r11
|
||||
; CHECK-X64-NEXT: subq $69632, %r11 # imm = 0x11000
|
||||
; CHECK-X64-NEXT: .cfi_def_cfa_register %r11
|
||||
; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 69632
|
||||
; CHECK-X64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-X64-NEXT: movq $0, (%rsp)
|
||||
|
@ -14,6 +17,7 @@ define i32 @foo() local_unnamed_addr #0 {
|
|||
; CHECK-X64-NEXT: jne .LBB0_1
|
||||
; CHECK-X64-NEXT: # %bb.2:
|
||||
; CHECK-X64-NEXT: subq $2248, %rsp # imm = 0x8C8
|
||||
; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp
|
||||
; CHECK-X64-NEXT: .cfi_def_cfa_offset 71888
|
||||
; CHECK-X64-NEXT: movl $1, 264(%rsp)
|
||||
; CHECK-X64-NEXT: movl $1, 28664(%rsp)
|
||||
|
@ -26,6 +30,8 @@ define i32 @foo() local_unnamed_addr #0 {
|
|||
; CHECK-X86: # %bb.0:
|
||||
; CHECK-X86-NEXT: movl %esp, %eax
|
||||
; CHECK-X86-NEXT: subl $69632, %eax # imm = 0x11000
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_register %eax
|
||||
; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 69632
|
||||
; CHECK-X86-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000
|
||||
; CHECK-X86-NEXT: movl $0, (%esp)
|
||||
|
@ -33,6 +39,7 @@ define i32 @foo() local_unnamed_addr #0 {
|
|||
; CHECK-X86-NEXT: jne .LBB0_1
|
||||
; CHECK-X86-NEXT: # %bb.2:
|
||||
; CHECK-X86-NEXT: subl $2380, %esp # imm = 0x94C
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_register %esp
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 72016
|
||||
; CHECK-X86-NEXT: movl $1, 392(%esp)
|
||||
; CHECK-X86-NEXT: movl $1, 28792(%esp)
|
||||
|
@ -45,6 +52,8 @@ define i32 @foo() local_unnamed_addr #0 {
|
|||
; CHECK-X32: # %bb.0:
|
||||
; CHECK-X32-NEXT: movl %esp, %r11d
|
||||
; CHECK-X32-NEXT: subl $69632, %r11d # imm = 0x11000
|
||||
; CHECK-X32-NEXT: .cfi_def_cfa_register %r11
|
||||
; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 69632
|
||||
; CHECK-X32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000
|
||||
; CHECK-X32-NEXT: movq $0, (%esp)
|
||||
|
@ -52,6 +61,7 @@ define i32 @foo() local_unnamed_addr #0 {
|
|||
; CHECK-X32-NEXT: jne .LBB0_1
|
||||
; CHECK-X32-NEXT: # %bb.2:
|
||||
; CHECK-X32-NEXT: subl $2248, %esp # imm = 0x8C8
|
||||
; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp
|
||||
; CHECK-X32-NEXT: .cfi_def_cfa_offset 71888
|
||||
; CHECK-X32-NEXT: movl $1, 264(%esp)
|
||||
; CHECK-X32-NEXT: movl $1, 28664(%esp)
|
||||
|
@ -68,4 +78,139 @@ define i32 @foo() local_unnamed_addr #0 {
|
|||
ret i32 %c
|
||||
}
|
||||
|
||||
define void @push_before_probe(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) "probe-stack"="inline-asm" "no_caller_saved_registers" {
|
||||
; CHECK-X64-LABEL: push_before_probe:
|
||||
; CHECK-X64: # %bb.0:
|
||||
; CHECK-X64-NEXT: pushq %rax
|
||||
; CHECK-X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-X64-NEXT: movq %rsp, %r11
|
||||
; CHECK-X64-NEXT: subq $69632, %r11 # imm = 0x11000
|
||||
; CHECK-X64-NEXT: .cfi_def_cfa_register %r11
|
||||
; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 69632
|
||||
; CHECK-X64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-X64-NEXT: movq $0, (%rsp)
|
||||
; CHECK-X64-NEXT: cmpq %r11, %rsp
|
||||
; CHECK-X64-NEXT: jne .LBB1_1
|
||||
; CHECK-X64-NEXT: # %bb.2:
|
||||
; CHECK-X64-NEXT: subq $2240, %rsp # imm = 0x8C0
|
||||
; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp
|
||||
; CHECK-X64-NEXT: .cfi_def_cfa_offset 71888
|
||||
; CHECK-X64-NEXT: .cfi_offset %rax, -16
|
||||
; CHECK-X64-NEXT: movl 71888(%rsp), %eax
|
||||
; CHECK-X64-NEXT: addl %esi, %edi
|
||||
; CHECK-X64-NEXT: addl %ecx, %edx
|
||||
; CHECK-X64-NEXT: addl %edi, %edx
|
||||
; CHECK-X64-NEXT: addl %r9d, %r8d
|
||||
; CHECK-X64-NEXT: addl 71896(%rsp), %eax
|
||||
; CHECK-X64-NEXT: addl %r8d, %eax
|
||||
; CHECK-X64-NEXT: addl %edx, %eax
|
||||
; CHECK-X64-NEXT: movl %eax, 264(%rsp)
|
||||
; CHECK-X64-NEXT: movl %eax, 28664(%rsp)
|
||||
; CHECK-X64-NEXT: addq $71872, %rsp # imm = 0x118C0
|
||||
; CHECK-X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-X64-NEXT: popq %rax
|
||||
; CHECK-X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-X64-NEXT: retq
|
||||
;
|
||||
; CHECK-X86-LABEL: push_before_probe:
|
||||
; CHECK-X86: # %bb.0:
|
||||
; CHECK-X86-NEXT: pushl %esi
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-X86-NEXT: pushl %edx
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; CHECK-X86-NEXT: pushl %ecx
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-X86-NEXT: pushl %eax
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 20
|
||||
; CHECK-X86-NEXT: movl %esp, %eax
|
||||
; CHECK-X86-NEXT: subl $69632, %eax # imm = 0x11000
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_register %eax
|
||||
; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 69632
|
||||
; CHECK-X86-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000
|
||||
; CHECK-X86-NEXT: movl $0, (%esp)
|
||||
; CHECK-X86-NEXT: cmpl %eax, %esp
|
||||
; CHECK-X86-NEXT: jne .LBB1_1
|
||||
; CHECK-X86-NEXT: # %bb.2:
|
||||
; CHECK-X86-NEXT: subl $2380, %esp # imm = 0x94C
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_register %esp
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 72032
|
||||
; CHECK-X86-NEXT: .cfi_offset %eax, -20
|
||||
; CHECK-X86-NEXT: .cfi_offset %ecx, -16
|
||||
; CHECK-X86-NEXT: .cfi_offset %edx, -12
|
||||
; CHECK-X86-NEXT: .cfi_offset %esi, -8
|
||||
; CHECK-X86-NEXT: movl 72056(%esp), %eax
|
||||
; CHECK-X86-NEXT: movl 72048(%esp), %edx
|
||||
; CHECK-X86-NEXT: movl 72040(%esp), %ecx
|
||||
; CHECK-X86-NEXT: movl 72032(%esp), %esi
|
||||
; CHECK-X86-NEXT: addl 72036(%esp), %esi
|
||||
; CHECK-X86-NEXT: addl 72044(%esp), %ecx
|
||||
; CHECK-X86-NEXT: addl %esi, %ecx
|
||||
; CHECK-X86-NEXT: addl 72052(%esp), %edx
|
||||
; CHECK-X86-NEXT: addl 72060(%esp), %eax
|
||||
; CHECK-X86-NEXT: addl %edx, %eax
|
||||
; CHECK-X86-NEXT: addl %ecx, %eax
|
||||
; CHECK-X86-NEXT: movl %eax, 392(%esp)
|
||||
; CHECK-X86-NEXT: movl %eax, 28792(%esp)
|
||||
; CHECK-X86-NEXT: addl $72012, %esp # imm = 0x1194C
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 20
|
||||
; CHECK-X86-NEXT: popl %eax
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-X86-NEXT: popl %ecx
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; CHECK-X86-NEXT: popl %edx
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-X86-NEXT: popl %esi
|
||||
; CHECK-X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; CHECK-X86-NEXT: retl
|
||||
;
|
||||
; CHECK-X32-LABEL: push_before_probe:
|
||||
; CHECK-X32: # %bb.0:
|
||||
; CHECK-X32-NEXT: pushq %rax
|
||||
; CHECK-X32-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-X32-NEXT: movl %esp, %r11d
|
||||
; CHECK-X32-NEXT: subl $69632, %r11d # imm = 0x11000
|
||||
; CHECK-X32-NEXT: .cfi_def_cfa_register %r11
|
||||
; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 69632
|
||||
; CHECK-X32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000
|
||||
; CHECK-X32-NEXT: movq $0, (%esp)
|
||||
; CHECK-X32-NEXT: cmpl %r11d, %esp
|
||||
; CHECK-X32-NEXT: jne .LBB1_1
|
||||
; CHECK-X32-NEXT: # %bb.2:
|
||||
; CHECK-X32-NEXT: subl $2240, %esp # imm = 0x8C0
|
||||
; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp
|
||||
; CHECK-X32-NEXT: .cfi_def_cfa_offset 71888
|
||||
; CHECK-X32-NEXT: .cfi_offset %rax, -16
|
||||
; CHECK-X32-NEXT: movl 71888(%esp), %eax
|
||||
; CHECK-X32-NEXT: addl %esi, %edi
|
||||
; CHECK-X32-NEXT: addl %ecx, %edx
|
||||
; CHECK-X32-NEXT: addl %edi, %edx
|
||||
; CHECK-X32-NEXT: addl %r9d, %r8d
|
||||
; CHECK-X32-NEXT: addl 71896(%esp), %eax
|
||||
; CHECK-X32-NEXT: addl %r8d, %eax
|
||||
; CHECK-X32-NEXT: addl %edx, %eax
|
||||
; CHECK-X32-NEXT: movl %eax, 264(%esp)
|
||||
; CHECK-X32-NEXT: movl %eax, 28664(%esp)
|
||||
; CHECK-X32-NEXT: addl $71872, %esp # imm = 0x118C0
|
||||
; CHECK-X32-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-X32-NEXT: popq %rax
|
||||
; CHECK-X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-X32-NEXT: retq
|
||||
%all = alloca i32, i64 18000, align 16
|
||||
%b0 = getelementptr inbounds i32, i32* %all, i64 98
|
||||
%b1 = getelementptr inbounds i32, i32* %all, i64 7198
|
||||
%ab = add i32 %a, %b
|
||||
%cd = add i32 %c, %d
|
||||
%ef = add i32 %e, %f
|
||||
%gh = add i32 %g, %h
|
||||
%abcd = add i32 %ab, %cd
|
||||
%efgh = add i32 %ef, %gh
|
||||
%sum = add i32 %abcd, %efgh
|
||||
store volatile i32 %sum, i32* %b0
|
||||
store volatile i32 %sum, i32* %b1
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
||||
|
|
Loading…
Reference in New Issue