Target/X86: Tweak allocating shadow area (aka home) on Win64. It must be enough for caller to allocate one.

llvm-svn: 124949
This commit is contained in:
NAKAMURA Takumi 2011-02-05 15:11:32 +00:00
parent b21c3db920
commit 1850c80afb
7 changed files with 37 additions and 29 deletions

View File

@ -397,11 +397,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (HasFP) MinSize += SlotSize; if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
MFI->setStackSize(StackSize); MFI->setStackSize(StackSize);
} else if (IsWin64) {
// We need to always allocate 32 bytes as register spill area.
// FIXME: We might reuse these 32 bytes for leaf functions.
StackSize += 32;
MFI->setStackSize(StackSize);
} }
// Insert stack pointer adjustment for later moving of return addr. Only // Insert stack pointer adjustment for later moving of return addr. Only

View File

@ -28,8 +28,7 @@ public:
explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti) explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
: TargetFrameLowering(StackGrowsDown, : TargetFrameLowering(StackGrowsDown,
sti.getStackAlignment(), sti.getStackAlignment(),
(sti.isTargetWin64() ? -40 : (sti.is64Bit() ? -8 : -4)),
(sti.is64Bit() ? -8 : -4))),
TM(tm), STI(sti) { TM(tm), STI(sti) {
} }

View File

@ -1544,6 +1544,12 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs; SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(), CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext()); ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsWin64) {
CCInfo.AllocateStack(32, 8);
}
CCInfo.AnalyzeFormalArguments(Ins, CC_X86); CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
unsigned LastVal = ~0U; unsigned LastVal = ~0U;
@ -1778,8 +1784,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
DebugLoc dl, SelectionDAG &DAG, DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA, const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const { ISD::ArgFlagsTy Flags) const {
const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0); unsigned LocMemOffset = VA.getLocMemOffset();
unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
if (Flags.isByVal()) if (Flags.isByVal())
@ -1864,6 +1869,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs; SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(), CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext()); ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsWin64) {
CCInfo.AllocateStack(32, 8);
}
CCInfo.AnalyzeCallOperands(Outs, CC_X86); CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack. // Get a count of how many bytes are to be pushed on the stack.
@ -2447,6 +2458,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs; SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext()); ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (Subtarget->isTargetWin64()) {
CCInfo.AllocateStack(32, 8);
}
CCInfo.AnalyzeCallOperands(Outs, CC_X86); CCInfo.AnalyzeCallOperands(Outs, CC_X86);
if (CCInfo.getNextStackOffset()) { if (CCInfo.getNextStackOffset()) {
MachineFunction &MF = DAG.getMachineFunction(); MachineFunction &MF = DAG.getMachineFunction();

View File

@ -1,9 +1,8 @@
; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp" ; RUN: llc -mtriple=x86_64-pc-mingw64 < %s | FileCheck %s
target triple = "x86_64-pc-mingw64" ; CHECK-NOT: -{{[1-9][0-9]*}}(%rsp)
define x86_fp80 @a(i64 %x) nounwind readnone { define x86_fp80 @a(i64 %x) nounwind readnone {
entry: entry:
%conv = sitofp i64 %x to x86_fp80 ; <x86_fp80> [#uses=1] %conv = sitofp i64 %x to x86_fp80 ; <x86_fp80> [#uses=1]
ret x86_fp80 %conv ret x86_fp80 %conv
} }

View File

@ -1,12 +1,10 @@
; RUN: llc < %s -o %t1 ; RUN: llc -mtriple=x86_64-pc-mingw64 < %s | FileCheck %s
; RUN: grep "subq.*\\\$72, \\\%rsp" %t1 ; CHECK: subq $40, %rsp
; RUN: grep "movaps \\\%xmm8, 32\\\(\\\%rsp\\\)" %t1 ; CHECK: movaps %xmm8, (%rsp)
; RUN: grep "movaps \\\%xmm7, 48\\\(\\\%rsp\\\)" %t1 ; CHECK: movaps %xmm7, 16(%rsp)
target triple = "x86_64-pc-mingw64"
define i32 @a() nounwind { define i32 @a() nounwind {
entry: entry:
tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
ret i32 undef ret i32 undef
} }

View File

@ -4,8 +4,8 @@
; on the stack. ; on the stack.
define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize { define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
entry: entry:
; CHECK: movl 80(%rsp), %eax ; CHECK: movl 48(%rsp), %eax
; CHECK: addl 72(%rsp), %eax ; CHECK: addl 40(%rsp), %eax
%add = add nsw i32 %p6, %p5 %add = add nsw i32 %p6, %p5
ret i32 %add ret i32 %add
} }

View File

@ -5,11 +5,11 @@
; calculated. ; calculated.
define void @average_va(i32 %count, ...) nounwind { define void @average_va(i32 %count, ...) nounwind {
entry: entry:
; CHECK: subq $40, %rsp ; CHECK: pushq
; CHECK: movq %r9, 72(%rsp) ; CHECK: movq %r9, 40(%rsp)
; CHECK: movq %r8, 64(%rsp) ; CHECK: movq %r8, 32(%rsp)
; CHECK: movq %rdx, 56(%rsp) ; CHECK: movq %rdx, 24(%rsp)
; CHECK: leaq 56(%rsp), %rax ; CHECK: leaq 24(%rsp), %rax
%ap = alloca i8*, align 8 ; <i8**> [#uses=1] %ap = alloca i8*, align 8 ; <i8**> [#uses=1]
%ap1 = bitcast i8** %ap to i8* ; <i8*> [#uses=1] %ap1 = bitcast i8** %ap to i8* ; <i8*> [#uses=1]