forked from OSchip/llvm-project
[AArch64] Implement dynamic stack probing for windows
This makes sure that alloca() function calls properly probe the stack as needed. Differential Revision: https://reviews.llvm.org/D42356 llvm-svn: 325433
This commit is contained in:
parent
63db669013
commit
a63a5b993e
|
@ -1023,6 +1023,10 @@ bool IRTranslator::translateAlloca(const User &U,
|
|||
return true;
|
||||
}
|
||||
|
||||
// FIXME: support stack probing for Windows.
|
||||
if (MF->getTarget().getTargetTriple().isOSWindows())
|
||||
return false;
|
||||
|
||||
// Now we're in the harder dynamic case.
|
||||
Type *Ty = AI.getAllocatedType();
|
||||
unsigned Align =
|
||||
|
|
|
@ -345,3 +345,7 @@ def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>;
|
|||
def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
|
||||
(sequence "X%u", 9, 15))>;
|
||||
|
||||
def CSR_AArch64_StackProbe_Windows
|
||||
: CalleeSavedRegs<(add (sequence "X%u", 0, 15),
|
||||
(sequence "X%u", 18, 28), FP, SP,
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
|
|
@ -253,6 +253,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
// Variable-sized objects.
|
||||
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
|
||||
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
|
||||
|
||||
if (Subtarget->isTargetWindows())
|
||||
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
|
||||
else
|
||||
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
|
||||
|
||||
// Constant pool entries
|
||||
|
@ -2687,6 +2691,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
|||
return LowerATOMIC_LOAD_SUB(Op, DAG);
|
||||
case ISD::ATOMIC_LOAD_AND:
|
||||
return LowerATOMIC_LOAD_AND(Op, DAG);
|
||||
case ISD::DYNAMIC_STACKALLOC:
|
||||
return LowerDYNAMIC_STACKALLOC(Op, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7415,6 +7421,67 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
|
|||
AN->getMemOperand());
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
|
||||
SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
|
||||
|
||||
const uint32_t *Mask =
|
||||
Subtarget->getRegisterInfo()->getWindowsStackProbePreservedMask();
|
||||
|
||||
Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
|
||||
DAG.getConstant(4, dl, MVT::i64));
|
||||
Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
|
||||
Chain =
|
||||
DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue),
|
||||
Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
|
||||
DAG.getRegisterMask(Mask), Chain.getValue(1));
|
||||
// To match the actual intent better, we should read the output from X15 here
|
||||
// again (instead of potentially spilling it to the stack), but rereading Size
|
||||
// from X15 here doesn't work at -O0, since it thinks that X15 is undefined
|
||||
// here.
|
||||
|
||||
Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
|
||||
DAG.getConstant(4, dl, MVT::i64));
|
||||
return Chain;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(Subtarget->isTargetWindows() &&
|
||||
"Only Windows alloca probing supported");
|
||||
SDLoc dl(Op);
|
||||
// Get the inputs.
|
||||
SDNode *Node = Op.getNode();
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Size = Op.getOperand(1);
|
||||
unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
|
||||
EVT VT = Node->getValueType(0);
|
||||
|
||||
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
|
||||
|
||||
Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
|
||||
|
||||
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
|
||||
Chain = SP.getValue(1);
|
||||
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
|
||||
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
|
||||
|
||||
if (Align) {
|
||||
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
|
||||
DAG.getConstant(-(uint64_t)Align, dl, VT));
|
||||
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
|
||||
}
|
||||
|
||||
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
|
||||
DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
|
||||
|
||||
SDValue Ops[2] = {SP, Chain};
|
||||
return DAG.getMergeValues(Ops, dl);
|
||||
}
|
||||
|
||||
/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
|
||||
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
|
||||
/// specified in the intrinsic calls.
|
||||
|
|
|
@ -598,6 +598,10 @@ private:
|
|||
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
|
||||
SDValue &Size,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
||||
std::vector<SDNode *> *Created) const override;
|
||||
|
|
|
@ -114,6 +114,10 @@ AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
|
|||
return CSR_AArch64_AAPCS_ThisReturn_RegMask;
|
||||
}
|
||||
|
||||
const uint32_t *AArch64RegisterInfo::getWindowsStackProbePreservedMask() const {
|
||||
return CSR_AArch64_StackProbe_Windows_RegMask;
|
||||
}
|
||||
|
||||
BitVector
|
||||
AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
const AArch64FrameLowering *TFI = getFrameLowering(MF);
|
||||
|
|
|
@ -61,6 +61,9 @@ public:
|
|||
const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
|
||||
CallingConv::ID) const;
|
||||
|
||||
/// Stack probing calls preserve different CSRs to the normal CC.
|
||||
const uint32_t *getWindowsStackProbePreservedMask() const;
|
||||
|
||||
BitVector getReservedRegs(const MachineFunction &MF) const override;
|
||||
bool isConstantPhysReg(unsigned PhysReg) const override;
|
||||
const TargetRegisterClass *
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
; RUN: llc -mtriple aarch64-windows -verify-machineinstrs -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-OPT
|
||||
; RUN: llc -mtriple aarch64-windows -verify-machineinstrs -filetype asm -o - %s -O0 | FileCheck %s
|
||||
|
||||
define void @func(i64 %a) {
|
||||
entry:
|
||||
%0 = alloca i8, i64 %a, align 16
|
||||
call void @func2(i8* nonnull %0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @func2(i8*)
|
||||
|
||||
; The -O0 version here ends up much less elegant, so just check the
|
||||
; details of the optimized form, but check that -O0 at least emits the
|
||||
; call to __chkstk.
|
||||
|
||||
; CHECK: add [[REG1:x[0-9]+]], x0, #15
|
||||
; CHECK-OPT: lsr x15, [[REG1]], #4
|
||||
; CHECK: bl __chkstk
|
||||
; CHECK: mov [[REG2:x[0-9]+]], sp
|
||||
; CHECK-OPT: sub [[REG3:x[0-9]+]], [[REG2]], x15, lsl #4
|
||||
; CHECK-OPT: mov sp, [[REG3:x[0-9]+]]
|
||||
; CHECK: bl func2
|
|
@ -159,14 +159,15 @@ attributes #6 = { "no-frame-pointer-elim"="true" }
|
|||
; CHECK: stur x8, [x29, #-40]
|
||||
; CHECK: mov w8, w0
|
||||
; CHECK: add x8, x8, #15
|
||||
; CHECK: mov x9, sp
|
||||
; CHECK: and x8, x8, #0x1fffffff0
|
||||
; CHECK: sub [[REG:x[0-9]+]], x9, x8
|
||||
; CHECK: lsr x15, x8, #4
|
||||
; CHECK: mov x19, x1
|
||||
; CHECK: mov [[REG2:x[0-9]+]], sp
|
||||
; CHECK: stp x6, x7, [x29, #48]
|
||||
; CHECK: stp x4, x5, [x29, #32]
|
||||
; CHECK: stp x2, x3, [x29, #16]
|
||||
; CHECK: bl __chkstk
|
||||
; CHECK: mov x8, sp
|
||||
; CHECK: sub [[REG:x[0-9]+]], x8, x15, lsl #4
|
||||
; CHECK: mov sp, [[REG]]
|
||||
; CHECK: ldur [[REG3:x[0-9]+]], [x29, #-40]
|
||||
; CHECK: sxtw [[REG4:x[0-9]+]], w0
|
||||
|
|
Loading…
Reference in New Issue