forked from OSchip/llvm-project
X86: Fix X86CallFrameOptimization to search for the COPY StackPointer
SelectionDAG inserts a copy of ESP into a virtual register. X86CallFrameOptimization assumed that the COPY, if present, is always right after the call-frame setup instruction (ADJCALLSTACKDOWN). This was a wrong assumption as the COPY can be located anywhere between the call-frame setup instruction and its first use. If the COPY happened to be located in a different location than what X86CallFrameOptimization assumed, visiting it while processing the call chain would lead to a conservative bail-out. The fix is quite straightfoward, scan ahead for the stack-pointer copy and make note of it so it can be ignored while processing the call chain. Fixes pr34903 Differential Revision: https://reviews.llvm.org/D38730 llvm-svn: 316416
This commit is contained in:
parent
d3abd15d8c
commit
3c0d385598
|
@ -363,14 +363,23 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
|
|||
++I;
|
||||
|
||||
unsigned StackPtr = RegInfo.getStackRegister();
|
||||
auto StackPtrCopyInst = MBB.end();
|
||||
// SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual
|
||||
// register here. If it's there, use that virtual register as stack pointer
|
||||
// instead.
|
||||
if (I->isCopy() && I->getOperand(0).isReg() && I->getOperand(1).isReg() &&
|
||||
I->getOperand(1).getReg() == StackPtr) {
|
||||
Context.SPCopy = &*I++;
|
||||
StackPtr = Context.SPCopy->getOperand(0).getReg();
|
||||
}
|
||||
// register. If it's there, use that virtual register as stack pointer
|
||||
// instead. Also, we need to locate this instruction so that we can later
|
||||
// safely ignore it while doing the conservative processing of the call chain.
|
||||
// The COPY can be located anywhere between the call-frame setup
|
||||
// instruction and its first use. We use the call instruction as a boundary
|
||||
// because it is usually cheaper to check if an instruction is a call than
|
||||
// checking if an instruction uses a register.
|
||||
for (auto J = I; !J->isCall(); ++J)
|
||||
if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
|
||||
J->getOperand(1).getReg() == StackPtr) {
|
||||
StackPtrCopyInst = J;
|
||||
Context.SPCopy = &*J++;
|
||||
StackPtr = Context.SPCopy->getOperand(0).getReg();
|
||||
break;
|
||||
}
|
||||
|
||||
// Scan the call setup sequence for the pattern we're looking for.
|
||||
// We only handle a simple case - a sequence of store instructions that
|
||||
|
@ -379,16 +388,15 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
|
|||
if (MaxAdjust > 4)
|
||||
Context.MovVector.resize(MaxAdjust, nullptr);
|
||||
|
||||
InstClassification Classification;
|
||||
DenseSet<unsigned int> UsedRegs;
|
||||
|
||||
while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) !=
|
||||
Exit) {
|
||||
if (Classification == Skip) {
|
||||
++I;
|
||||
for (InstClassification Classification = Skip; Classification != Exit; ++I) {
|
||||
// If this is the COPY of the stack pointer, it's ok to ignore.
|
||||
if (I == StackPtrCopyInst)
|
||||
continue;
|
||||
Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs);
|
||||
if (Classification != Convert)
|
||||
continue;
|
||||
}
|
||||
|
||||
// We know the instruction has a supported store opcode.
|
||||
// We only want movs of the form:
|
||||
// mov imm/reg, k(%StackPtr)
|
||||
|
@ -431,10 +439,10 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
|
|||
if (RegInfo.isPhysicalRegister(Reg))
|
||||
UsedRegs.insert(Reg);
|
||||
}
|
||||
|
||||
++I;
|
||||
}
|
||||
|
||||
--I;
|
||||
|
||||
// We now expect the end of the sequence. If we stopped early,
|
||||
// or reached the end of the block without finding a call, bail.
|
||||
if (I == MBB.end() || !I->isCall())
|
||||
|
|
|
@ -31,18 +31,44 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
|
|||
; i386-NEXT: sahf
|
||||
; i386-NEXT: jne
|
||||
|
||||
; In the following case we get a long chain of EFLAGS save/restore due to
|
||||
; a sequence of:
|
||||
; cmpxchg8b (implicit-def eflags)
|
||||
; eax = copy eflags
|
||||
; adjcallstackdown32
|
||||
; ...
|
||||
; use of eax
|
||||
; During PEI the adjcallstackdown32 is replaced with the subl which
|
||||
; clobbers eflags, effectively interfering in the liveness interval.
|
||||
; Is this a case we care about? Maybe no, considering this issue
|
||||
; happens with the fast pre-regalloc scheduler enforced. A more
|
||||
; performant scheduler would move the adjcallstackdown32 out of the
|
||||
; eflags liveness interval.
|
||||
|
||||
; i386f-LABEL: test_intervening_call:
|
||||
; i386f: cmpxchg8b
|
||||
; i386f-NEXT: movl %eax, (%esp)
|
||||
; i386f-NEXT: movl %edx, 4(%esp)
|
||||
; i386f-NEXT: seto %al
|
||||
; i386f-NEXT: pushl %eax
|
||||
; i386f-NEXT: seto %al
|
||||
; i386f-NEXT: lahf
|
||||
; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
|
||||
; i386f-NEXT: calll bar
|
||||
; i386f-NEXT: movl [[FLAGS]], %eax
|
||||
; i386f-NEXT: addb $127, %al
|
||||
; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
|
||||
; i386f-NEXT: popl %eax
|
||||
; i386f-NEXT: subl $8, %esp
|
||||
; i386f-NEXT: pushl %eax
|
||||
; i386f-NEXT: movl %ecx, %eax
|
||||
; i386f-NEXT: addb $127, %al
|
||||
; i386f-NEXT: sahf
|
||||
; i386f-NEXT: jne
|
||||
; i386f-NEXT: popl %eax
|
||||
; i386f-NEXT: pushl %eax
|
||||
; i386f-NEXT: seto %al
|
||||
; i386f-NEXT: lahf
|
||||
; i386f-NEXT: movl %eax, %esi
|
||||
; i386f-NEXT: popl %eax
|
||||
; i386f-NEXT: pushl %edx
|
||||
; i386f-NEXT: pushl %eax
|
||||
; i386f-NEXT: calll bar
|
||||
; i386f-NEXT: addl $16, %esp
|
||||
; i386f-NEXT: movl %esi, %eax
|
||||
; i386f-NEXT: addb $127, %al
|
||||
|
||||
; x8664-LABEL: test_intervening_call:
|
||||
; x8664: cmpxchgq
|
||||
|
|
|
@ -228,16 +228,16 @@ entry:
|
|||
; NORMAL-NEXT: pushl $2
|
||||
; NORMAL-NEXT: pushl $1
|
||||
; NORMAL-NEXT: call
|
||||
; NORMAL-NEXT: subl $4, %esp
|
||||
; NORMAL-NEXT: movl 20(%esp), [[E1:%e..]]
|
||||
; NORMAL-NEXT: movl 24(%esp), [[E2:%e..]]
|
||||
; NORMAL-NEXT: movl [[E2]], 4(%esp)
|
||||
; NORMAL-NEXT: movl [[E1]], (%esp)
|
||||
; NORMAL-NEXT: leal 32(%esp), [[E3:%e..]]
|
||||
; NORMAL-NEXT: movl [[E3]], 16(%esp)
|
||||
; NORMAL-NEXT: leal 28(%esp), [[E4:%e..]]
|
||||
; NORMAL-NEXT: movl [[E4]], 12(%esp)
|
||||
; NORMAL-NEXT: movl $6, 8(%esp)
|
||||
; NORMAL-NEXT: addl $16, %esp
|
||||
; NORMAL-NEXT: movl (%esp), [[E1:%e..]]
|
||||
; NORMAL-NEXT: movl 4(%esp), [[E2:%e..]]
|
||||
; NORMAL-NEXT: leal 16(%esp), [[E3:%e..]]
|
||||
; NORMAL-NEXT: leal 12(%esp), [[E4:%e..]]
|
||||
; NORMAL-NEXT: pushl [[E3]]
|
||||
; NORMAL-NEXT: pushl [[E4]]
|
||||
; NORMAL-NEXT: pushl $6
|
||||
; NORMAL-NEXT: pushl [[E2]]
|
||||
; NORMAL-NEXT: pushl [[E1]]
|
||||
; NORMAL-NEXT: call
|
||||
; NORMAL-NEXT: addl $20, %esp
|
||||
define void @test9() optsize {
|
||||
|
@ -297,10 +297,10 @@ define void @test11() optsize {
|
|||
; Converting one mov into a push isn't worth it when
|
||||
; doing so forces too much overhead for other calls.
|
||||
; NORMAL-LABEL: test12:
|
||||
; NORMAL: movl $8, 12(%esp)
|
||||
; NORMAL-NEXT: movl $7, 8(%esp)
|
||||
; NORMAL-NEXT: movl $6, 4(%esp)
|
||||
; NORMAL-NEXT: movl $5, (%esp)
|
||||
; NORMAL: pushl $8
|
||||
; NORMAL-NEXT: pushl $7
|
||||
; NORMAL-NEXT: pushl $6
|
||||
; NORMAL-NEXT: pushl $5
|
||||
; NORMAL-NEXT: calll _good
|
||||
define void @test12() optsize {
|
||||
entry:
|
||||
|
@ -318,18 +318,22 @@ entry:
|
|||
; NORMAL-NEXT: pushl $2
|
||||
; NORMAL-NEXT: pushl $1
|
||||
; NORMAL-NEXT: calll _good
|
||||
; NORMAL-NEXT: subl $4, %esp
|
||||
; NORMAL: movl $8, 16(%esp)
|
||||
; NORMAL-NEXT: movl $7, 12(%esp)
|
||||
; NORMAL-NEXT: movl $6, 8(%esp)
|
||||
; NORMAL-NEXT: calll _struct
|
||||
; NORMAL-NEXT: addl $20, %esp
|
||||
; NORMAL-NEXT: pushl $12
|
||||
; NORMAL-NEXT: pushl $11
|
||||
; NORMAL-NEXT: pushl $10
|
||||
; NORMAL-NEXT: pushl $9
|
||||
; NORMAL-NEXT: calll _good
|
||||
; NORMAL-NEXT: addl $16, %esp
|
||||
; NORMAL-NEXT: addl $16, %esp
|
||||
; NORMAL=NEXT: movl (%esp), %eax
|
||||
; NORMAL=NEXT: movl 4(%esp), %ecx
|
||||
; NORMAL=NEXT: pushl $8
|
||||
; NORMAL=NEXT: pushl $7
|
||||
; NORMAL=NEXT: pushl $6
|
||||
; NORMAL=NEXT: pushl %ecx
|
||||
; NORMAL=NEXT: pushl %eax
|
||||
; NORMAL=NEXT: calll _struct
|
||||
; NORMAL=NEXT: addl $20, %esp
|
||||
; NORMAL=NEXT: pushl $12
|
||||
; NORMAL=NEXT: pushl $11
|
||||
; NORMAL=NEXT: pushl $10
|
||||
; NORMAL=NEXT: pushl $9
|
||||
; NORMAL=NEXT: calll _good
|
||||
; NORMAL=NEXT: addl $16, %esp
|
||||
define void @test12b() optsize {
|
||||
entry:
|
||||
%s = alloca %struct.s, align 4
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# RUN: llc -mtriple=i686-windows --run-pass="x86-cf-opt" %s -o - | FileCheck %s
|
||||
|
||||
# PR34903
|
||||
--- |
|
||||
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
|
||||
target triple = "i686--windows-msvc"
|
||||
|
@ -39,17 +40,16 @@
|
|||
# CHECK-NEXT: PUSH32i8 1, implicit-def %esp, implicit %esp
|
||||
# CHECK-NEXT: CALLpcrel32 @good, csr_32, implicit %esp, implicit-def %esp
|
||||
# CHECK-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp
|
||||
# CHECK-NEXT: ADJCALLSTACKDOWN32 20, 0, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp
|
||||
# CHECK-NEXT: ADJCALLSTACKDOWN32 20, 0, 20, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp
|
||||
# CHECK-NEXT: %1 = MOV32rm %stack.2.s, 1, _, 0, _ :: (load 4 from %stack.2.s, align 8)
|
||||
# CHECK-NEXT: %2 = MOV32rm %stack.2.s, 1, _, 4, _ :: (load 4 from %stack.2.s + 4)
|
||||
# CHECK-NEXT: %3 = COPY %esp
|
||||
# CHECK-NEXT: MOV32mr %3, 1, _, 4, _, killed %2 :: (store 4)
|
||||
# CHECK-NEXT: MOV32mr %3, 1, _, 0, _, killed %1 :: (store 4)
|
||||
# CHECK-NEXT: %4 = LEA32r %stack.0.p, 1, _, 0, _
|
||||
# CHECK-NEXT: MOV32mr %3, 1, _, 16, _, killed %4 :: (store 4 into stack + 16)
|
||||
# CHECK-NEXT: %5 = LEA32r %stack.1.q, 1, _, 0, _
|
||||
# CHECK-NEXT: MOV32mr %3, 1, _, 12, _, killed %5 :: (store 4 into stack + 12)
|
||||
# CHECK-NEXT: MOV32mi %3, 1, _, 8, _, 6 :: (store 4 into stack + 8)
|
||||
# CHECK-NEXT: PUSH32r %4, implicit-def %esp, implicit %esp
|
||||
# CHECK-NEXT: PUSH32r %5, implicit-def %esp, implicit %esp
|
||||
# CHECK-NEXT: PUSH32i8 6, implicit-def %esp, implicit %esp
|
||||
# CHECK-NEXT: PUSH32r %2, implicit-def %esp, implicit %esp
|
||||
# CHECK-NEXT: PUSH32r %1, implicit-def %esp, implicit %esp
|
||||
# CHECK-NEXT: CALLpcrel32 @struct, csr_32, implicit %esp, implicit-def %esp
|
||||
# CHECK-NEXT: ADJCALLSTACKUP32 20, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp
|
||||
# CHECK-NEXT: RET 0
|
||||
|
|
Loading…
Reference in New Issue