forked from OSchip/llvm-project
Target/X86: Tweak allocating shadow area (aka home) on Win64. It must be enough for caller to allocate one.
llvm-svn: 124949
This commit is contained in:
parent
b21c3db920
commit
1850c80afb
|
@ -397,11 +397,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||||
if (HasFP) MinSize += SlotSize;
|
if (HasFP) MinSize += SlotSize;
|
||||||
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
|
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
|
||||||
MFI->setStackSize(StackSize);
|
MFI->setStackSize(StackSize);
|
||||||
} else if (IsWin64) {
|
|
||||||
// We need to always allocate 32 bytes as register spill area.
|
|
||||||
// FIXME: We might reuse these 32 bytes for leaf functions.
|
|
||||||
StackSize += 32;
|
|
||||||
MFI->setStackSize(StackSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert stack pointer adjustment for later moving of return addr. Only
|
// Insert stack pointer adjustment for later moving of return addr. Only
|
||||||
|
|
|
@ -28,8 +28,7 @@ public:
|
||||||
explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
|
explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
|
||||||
: TargetFrameLowering(StackGrowsDown,
|
: TargetFrameLowering(StackGrowsDown,
|
||||||
sti.getStackAlignment(),
|
sti.getStackAlignment(),
|
||||||
(sti.isTargetWin64() ? -40 :
|
(sti.is64Bit() ? -8 : -4)),
|
||||||
(sti.is64Bit() ? -8 : -4))),
|
|
||||||
TM(tm), STI(sti) {
|
TM(tm), STI(sti) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1544,6 +1544,12 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
||||||
SmallVector<CCValAssign, 16> ArgLocs;
|
SmallVector<CCValAssign, 16> ArgLocs;
|
||||||
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
|
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
|
||||||
ArgLocs, *DAG.getContext());
|
ArgLocs, *DAG.getContext());
|
||||||
|
|
||||||
|
// Allocate shadow area for Win64
|
||||||
|
if (IsWin64) {
|
||||||
|
CCInfo.AllocateStack(32, 8);
|
||||||
|
}
|
||||||
|
|
||||||
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
|
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
|
||||||
|
|
||||||
unsigned LastVal = ~0U;
|
unsigned LastVal = ~0U;
|
||||||
|
@ -1778,8 +1784,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
|
||||||
DebugLoc dl, SelectionDAG &DAG,
|
DebugLoc dl, SelectionDAG &DAG,
|
||||||
const CCValAssign &VA,
|
const CCValAssign &VA,
|
||||||
ISD::ArgFlagsTy Flags) const {
|
ISD::ArgFlagsTy Flags) const {
|
||||||
const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
|
unsigned LocMemOffset = VA.getLocMemOffset();
|
||||||
unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
|
|
||||||
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
|
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
|
||||||
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
|
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
|
||||||
if (Flags.isByVal())
|
if (Flags.isByVal())
|
||||||
|
@ -1864,6 +1869,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||||
SmallVector<CCValAssign, 16> ArgLocs;
|
SmallVector<CCValAssign, 16> ArgLocs;
|
||||||
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
|
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
|
||||||
ArgLocs, *DAG.getContext());
|
ArgLocs, *DAG.getContext());
|
||||||
|
|
||||||
|
// Allocate shadow area for Win64
|
||||||
|
if (IsWin64) {
|
||||||
|
CCInfo.AllocateStack(32, 8);
|
||||||
|
}
|
||||||
|
|
||||||
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
|
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
|
||||||
|
|
||||||
// Get a count of how many bytes are to be pushed on the stack.
|
// Get a count of how many bytes are to be pushed on the stack.
|
||||||
|
@ -2447,6 +2458,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
|
||||||
SmallVector<CCValAssign, 16> ArgLocs;
|
SmallVector<CCValAssign, 16> ArgLocs;
|
||||||
CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
|
CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
|
||||||
ArgLocs, *DAG.getContext());
|
ArgLocs, *DAG.getContext());
|
||||||
|
|
||||||
|
// Allocate shadow area for Win64
|
||||||
|
if (Subtarget->isTargetWin64()) {
|
||||||
|
CCInfo.AllocateStack(32, 8);
|
||||||
|
}
|
||||||
|
|
||||||
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
|
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
|
||||||
if (CCInfo.getNextStackOffset()) {
|
if (CCInfo.getNextStackOffset()) {
|
||||||
MachineFunction &MF = DAG.getMachineFunction();
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp"
|
; RUN: llc -mtriple=x86_64-pc-mingw64 < %s | FileCheck %s
|
||||||
target triple = "x86_64-pc-mingw64"
|
; CHECK-NOT: -{{[1-9][0-9]*}}(%rsp)
|
||||||
|
|
||||||
define x86_fp80 @a(i64 %x) nounwind readnone {
|
define x86_fp80 @a(i64 %x) nounwind readnone {
|
||||||
entry:
|
entry:
|
||||||
%conv = sitofp i64 %x to x86_fp80 ; <x86_fp80> [#uses=1]
|
%conv = sitofp i64 %x to x86_fp80 ; <x86_fp80> [#uses=1]
|
||||||
ret x86_fp80 %conv
|
ret x86_fp80 %conv
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,10 @@
|
||||||
; RUN: llc < %s -o %t1
|
; RUN: llc -mtriple=x86_64-pc-mingw64 < %s | FileCheck %s
|
||||||
; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
|
; CHECK: subq $40, %rsp
|
||||||
; RUN: grep "movaps \\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
|
; CHECK: movaps %xmm8, (%rsp)
|
||||||
; RUN: grep "movaps \\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
|
; CHECK: movaps %xmm7, 16(%rsp)
|
||||||
target triple = "x86_64-pc-mingw64"
|
|
||||||
|
|
||||||
define i32 @a() nounwind {
|
define i32 @a() nounwind {
|
||||||
entry:
|
entry:
|
||||||
tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
|
tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
|
||||||
ret i32 undef
|
ret i32 undef
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,8 +4,8 @@
|
||||||
; on the stack.
|
; on the stack.
|
||||||
define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
|
define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: movl 80(%rsp), %eax
|
; CHECK: movl 48(%rsp), %eax
|
||||||
; CHECK: addl 72(%rsp), %eax
|
; CHECK: addl 40(%rsp), %eax
|
||||||
%add = add nsw i32 %p6, %p5
|
%add = add nsw i32 %p6, %p5
|
||||||
ret i32 %add
|
ret i32 %add
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,11 +5,11 @@
|
||||||
; calculated.
|
; calculated.
|
||||||
define void @average_va(i32 %count, ...) nounwind {
|
define void @average_va(i32 %count, ...) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: subq $40, %rsp
|
; CHECK: pushq
|
||||||
; CHECK: movq %r9, 72(%rsp)
|
; CHECK: movq %r9, 40(%rsp)
|
||||||
; CHECK: movq %r8, 64(%rsp)
|
; CHECK: movq %r8, 32(%rsp)
|
||||||
; CHECK: movq %rdx, 56(%rsp)
|
; CHECK: movq %rdx, 24(%rsp)
|
||||||
; CHECK: leaq 56(%rsp), %rax
|
; CHECK: leaq 24(%rsp), %rax
|
||||||
|
|
||||||
%ap = alloca i8*, align 8 ; <i8**> [#uses=1]
|
%ap = alloca i8*, align 8 ; <i8**> [#uses=1]
|
||||||
%ap1 = bitcast i8** %ap to i8* ; <i8*> [#uses=1]
|
%ap1 = bitcast i8** %ap to i8* ; <i8*> [#uses=1]
|
||||||
|
|
Loading…
Reference in New Issue