[X86] Put no-op ADJCALLSTACK markers around all dynamic lowerings

Summary:
These ADJCALLSTACK markers don't generate code, but they keep dynamic
alloca code that calls chkstk out of the prologue.

This slightly pessimizes inalloca calls by preventing some register copy
coalescing, but I can live with that.

Reviewers: qcolombet

Subscribers: hans, llvm-commits

Differential Revision: http://reviews.llvm.org/D15200

llvm-svn: 254645
This commit is contained in:
Reid Kleckner 2015-12-03 20:46:59 +00:00
parent 69a372dc50
commit 93fc520339
4 changed files with 78 additions and 48 deletions

View File

@ -15622,54 +15622,40 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SplitStack;
SDLoc dl(Op);
// Get the inputs.
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
EVT VT = Node->getValueType(0);
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
bool Is64Bit = Subtarget->is64Bit();
MVT SPTy = getPointerTy(DAG.getDataLayout());
SDValue Result;
if (!Lower) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDNode* Node = Op.getNode();
unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
" not tell us which reg is the stack pointer!");
EVT VT = Node->getValueType(0);
SDValue Tmp1 = SDValue(Node, 0);
SDValue Tmp2 = SDValue(Node, 1);
SDValue Tmp3 = Node->getOperand(2);
SDValue Chain = Tmp1.getOperand(0);
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true),
SDLoc(Node));
SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
DAG.getConstant(-(uint64_t)Align, dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
DAG.getIntPtrConstant(0, dl, true), SDValue(),
SDLoc(Node));
SDValue Ops[2] = { Tmp1, Tmp2 };
return DAG.getMergeValues(Ops, dl);
}
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
MVT SPTy = getPointerTy(DAG.getDataLayout());
if (SplitStack) {
Result = DAG.getNode(ISD::AND, dl, VT, Result,
DAG.getConstant(-(uint64_t)Align, dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
} else if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
if (Is64Bit) {
@ -15687,10 +15673,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
DAG.getRegister(Vreg, SPTy));
SDValue Ops1[2] = { Value, Chain };
return DAG.getMergeValues(Ops1, dl);
} else {
SDValue Flag;
const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX);
@ -15712,9 +15696,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
}
SDValue Ops1[2] = { SP, Chain };
return DAG.getMergeValues(Ops1, dl);
Result = SP;
}
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
SDValue Ops[2] = {Result, Chain};
return DAG.getMergeValues(Ops, dl);
}
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {

View File

@ -14,8 +14,9 @@ define void @g() {
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
; CHECK: movl $13, (%esp)
; CHECK: movl $42, 4(%esp)
; CHECK: movl %esp, %eax
; CHECK: movl $13, (%eax)
; CHECK: movl $42, 4(%eax)
call x86_stdcallcc void @f(%Foo* inalloca %b)
; CHECK: calll _f@8
; CHECK-NOT: %esp

View File

@ -14,8 +14,9 @@ entry:
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
; CHECK: movl $13, (%esp)
; CHECK: movl $42, 4(%esp)
; CHECK: movl %esp, %eax
; CHECK: movl $13, (%eax)
; CHECK: movl $42, 4(%eax)
call void @f(%Foo* inalloca %b)
; CHECK: calll _f
ret void
@ -33,8 +34,9 @@ entry:
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
; CHECK: movl $13, (%esp)
; CHECK: movl $42, 4(%esp)
; CHECK: movl %esp, %eax
; CHECK: movl $13, (%eax)
; CHECK: movl $42, 4(%eax)
call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b)
; CHECK: movl $1, %eax
; CHECK: calll _inreg_with_inalloca
@ -53,8 +55,9 @@ entry:
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
; CHECK-DAG: movl $13, (%esp)
; CHECK-DAG: movl $42, 4(%esp)
; CHECK: movl %esp, %eax
; CHECK-DAG: movl $13, (%eax)
; CHECK-DAG: movl $42, 4(%eax)
call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b)
; CHECK-DAG: xorl %ecx, %ecx
; CHECK: calll _thiscall_with_inalloca

View File

@ -0,0 +1,37 @@
; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s
; chkstk cannot come before the usual prologue, since it adjusts ESP.
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i686-pc-windows-msvc18.0.0"
%struct.S = type { [12 x i8] }
define x86_thiscallcc void @call_inalloca(i1 %x) {
entry:
%argmem = alloca inalloca <{ %struct.S }>, align 4
%argidx1 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 0
%argidx2 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 1
store i8 42, i8* %argidx2, align 4
br i1 %x, label %bb1, label %bb2
bb1:
store i8 42, i8* %argidx1, align 4
br label %bb2
bb2:
call void @inalloca_params(<{ %struct.S }>* inalloca nonnull %argmem)
ret void
}
; CHECK-LABEL: _call_inalloca: # @call_inalloca
; CHECK: pushl %ebp
; CHECK: movl %esp, %ebp
; CHECK: movl $12, %eax
; CHECK: calll __chkstk
; CHECK: calll _inalloca_params
; CHECK: movl %ebp, %esp
; CHECK: popl %ebp
; CHECK: retl
declare void @inalloca_params(<{ %struct.S }>* inalloca)