[VE] Dynamic stack allocation

Summary:
This patch implements dynamic stack allocation for the VE target. Changes:
* compiler-rt: `__ve_grow_stack` to request stack allocation on the VE.
* VE: base pointer support, dynamic stack allocation.

Differential Revision: https://reviews.llvm.org/D79084
This commit is contained in:
Kazushi (Jam) Marukawa 2020-05-27 09:39:39 +02:00 committed by Simon Moll
parent fc44da746f
commit dedaf3a2ac
18 changed files with 366 additions and 89 deletions

View File

@ -166,6 +166,7 @@ macro(detect_target_arch)
check_symbol_exists(__sparcv9 "" __SPARCV9)
check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
check_symbol_exists(__ve__ "" __VE)
if(__ARM)
add_default_target_arch(arm)
elseif(__AARCH64)
@ -200,6 +201,8 @@ macro(detect_target_arch)
add_default_target_arch(wasm32)
elseif(__WEBASSEMBLY64)
add_default_target_arch(wasm64)
elseif(__VE)
add_default_target_arch(ve)
endif()
endmacro()

View File

@ -237,6 +237,8 @@ macro(test_targets)
test_target_arch(wasm32 "" "--target=wasm32-unknown-unknown")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm64")
test_target_arch(wasm64 "" "--target=wasm64-unknown-unknown")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "ve")
test_target_arch(ve "__ve__" "--target=ve-unknown-none")
endif()
set(COMPILER_RT_OS_SUFFIX "")
endif()

View File

@ -37,6 +37,7 @@ set(SPARC sparc)
set(SPARCV9 sparcv9)
set(WASM32 wasm32)
set(WASM64 wasm64)
set(VE ve)
if(APPLE)
set(ARM64 arm64 arm64e)
@ -44,8 +45,11 @@ if(APPLE)
set(X86_64 x86_64 x86_64h)
endif()
set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64} ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} ${WASM32} ${WASM64})
set(ALL_BUILTIN_SUPPORTED_ARCH
${X86} ${X86_64} ${ARM32} ${ARM64}
${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64}
${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
${WASM32} ${WASM64} ${VE})
include(CompilerRTUtils)
include(CompilerRTDarwinUtils)

View File

@ -573,6 +573,12 @@ set(wasm64_SOURCES
${GENERIC_SOURCES}
)
set(ve_SOURCES
ve/grow_stack.S
ve/grow_stack_align.S
${GENERIC_TF_SOURCES}
${GENERIC_SOURCES})
add_custom_target(builtins)
set_target_properties(builtins PROPERTIES FOLDER "Compiler-RT Misc")

View File

@ -0,0 +1,31 @@
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "../assembly.h"
// grow_stack routine
// This routine is VE specific
// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf
// destroy %s62 and %s63 only
#ifdef __ve__
.text
.p2align 4
DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack)
subu.l %sp, %sp, %s0 # sp -= alloca size
and %sp, -16, %sp # align sp
brge.l.t %sp, %sl, 1f
ld %s63, 0x18(,%tp) # load param area
lea %s62, 0x13b # syscall # of grow
shm.l %s62, 0x0(%s63) # stored at addr:0
shm.l %sl, 0x8(%s63) # old limit at addr:8
shm.l %sp, 0x10(%s63) # new limit at addr:16
monc
1:
b.l (,%lr)
END_COMPILERRT_FUNCTION(__ve_grow_stack)
#endif // __ve__

View File

@ -0,0 +1,31 @@
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "../assembly.h"
// grow_stack routine
// This routine is VE specific
// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf
// destroy %s62 and %s63 only
#ifdef __ve__
.text
.p2align 4
DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack_align)
subu.l %sp, %sp, %s0 # sp -= alloca size
and %sp, %sp, %s1 # align sp
brge.l.t %sp, %sl, 1f
ld %s63, 0x18(,%tp) # load param area
lea %s62, 0x13b # syscall # of grow
shm.l %s62, 0x0(%s63) # stored at addr:0
shm.l %sl, 0x8(%s63) # old limit at addr:8
shm.l %sp, 0x10(%s63) # new limit at addr:16
monc
1:
b.l (,%lr)
END_COMPILERRT_FUNCTION(__ve_grow_stack_align)
#endif // __ve__

View File

@ -84,3 +84,6 @@ def RetCC_VE : CallingConv<[
// Callee-saved registers
def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>;
def CSR_NoRegs : CalleeSavedRegs<(add)>;
// PreserveAll (clobbers s62,s63) - used for ve_grow_stack
def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>;

View File

@ -30,12 +30,13 @@ using namespace llvm;
VEFrameLowering::VEFrameLowering(const VESubtarget &ST)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0,
Align(16)) {}
Align(16)),
STI(ST) {}
void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
int NumBytes,
uint64_t NumBytes,
bool RequireFPUpdate) const {
DebugLoc dl;
@ -47,6 +48,7 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
// st %lr, 8(,%sp)
// st %got, 24(,%sp)
// st %plt, 32(,%sp)
// st %s17, 40(,%sp) iff this function is using s17 as BP
// or %fp, 0, %sp
BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
@ -69,6 +71,12 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
.addImm(0)
.addImm(32)
.addReg(VE::SX16);
if (hasBP(MF))
BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
.addReg(VE::SX11)
.addImm(0)
.addImm(40)
.addReg(VE::SX17);
BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9)
.addReg(VE::SX11)
.addImm(0);
@ -77,7 +85,7 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
int NumBytes,
uint64_t NumBytes,
bool RequireFPUpdate) const {
DebugLoc dl;
@ -86,6 +94,7 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
// Insert following codes here as epilogue
//
// or %sp, 0, %fp
// ld %s17, 40(,%sp) iff this function is using s17 as BP
// ld %got, 32(,%sp)
// ld %plt, 24(,%sp)
// ld %lr, 8(,%sp)
@ -94,6 +103,11 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11)
.addReg(VE::SX9)
.addImm(0);
if (hasBP(MF))
BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17)
.addReg(VE::SX11)
.addImm(0)
.addImm(40);
BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16)
.addReg(VE::SX11)
.addImm(0)
@ -115,7 +129,8 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
int NumBytes) const {
int64_t NumBytes,
MaybeAlign MaybeAlign) const {
DebugLoc dl;
const VEInstrInfo &TII =
*static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
@ -143,11 +158,17 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
.addReg(VE::SX11)
.addReg(VE::SX13)
.addImm(Hi_32(NumBytes));
if (MaybeAlign) {
// and %sp, %sp, Align-1
BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11)
.addReg(VE::SX11)
.addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value())));
}
}
void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
int NumBytes) const {
MachineBasicBlock::iterator MBBI) const {
DebugLoc dl;
const VEInstrInfo &TII =
*static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
@ -186,11 +207,8 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
MachineFrameInfo &MFI = MF.getFrameInfo();
const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
const VEInstrInfo &TII =
*static_cast<const VEInstrInfo *>(Subtarget.getInstrInfo());
const VERegisterInfo &RegInfo =
*static_cast<const VERegisterInfo *>(Subtarget.getRegisterInfo());
const VEInstrInfo &TII = *STI.getInstrInfo();
const VERegisterInfo &RegInfo = *STI.getRegisterInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@ -209,30 +227,15 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
"(probably because it has a dynamic alloca).");
// Get the number of bytes to allocate from the FrameInfo
int NumBytes = (int)MFI.getStackSize();
// The VE ABI requires a reserved 176-byte area in the user's stack, starting
// at %sp + 16. This is for the callee Register Save Area (RSA).
//
// We therefore need to add that offset to the total stack size
// after all the stack objects are placed by
// PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack
// needs to be aligned *after* the extra size is added, we need to disable
// calculateFrameObjectOffsets's built-in stack alignment, by having
// targetHandlesStackFrameRounding return true.
uint64_t NumBytes = MFI.getStackSize();
// Add the extra call frame stack size, if needed. (This is the same
// code as in PrologEpilogInserter, but also gets disabled by
// targetHandlesStackFrameRounding)
if (MFI.adjustsStack() && hasReservedCallFrame(MF))
NumBytes += MFI.getMaxCallFrameSize();
// Adds the VE subtarget-specific spill area to the stack
// size. Also ensures target-required alignment.
NumBytes = Subtarget.getAdjustedFrameSize(NumBytes);
// The VE ABI requires a reserved 176 bytes area at the top
// of stack as described in VESubtarget.cpp. So, we adjust it here.
NumBytes = STI.getAdjustedFrameSize(NumBytes);
// Finally, ensure that the size is sufficiently aligned for the
// data on the stack.
NumBytes = alignTo(NumBytes, MFI.getMaxAlign().value());
NumBytes = alignTo(NumBytes, MFI.getMaxAlign());
// Update stack size with corrected value.
MFI.setStackSize(NumBytes);
@ -241,16 +244,25 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
emitPrologueInsns(MF, MBB, MBBI, NumBytes, true);
// Emit stack adjust instructions
emitSPAdjustment(MF, MBB, MBBI, -NumBytes);
MaybeAlign RuntimeAlign =
NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None;
emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign);
if (hasBP(MF)) {
// Copy SP to BP.
BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17)
.addReg(VE::SX11)
.addImm(0);
}
// Emit stack extend instructions
emitSPExtend(MF, MBB, MBBI, -NumBytes);
emitSPExtend(MF, MBB, MBBI);
unsigned regFP = RegInfo.getDwarfRegNum(VE::SX9, true);
Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true);
// Emit ".cfi_def_cfa_register 30".
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP));
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@ -265,7 +277,7 @@ MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
MachineBasicBlock::iterator I) const {
if (!hasReservedCallFrame(MF)) {
MachineInstr &MI = *I;
int Size = MI.getOperand(0).getImm();
int64_t Size = MI.getOperand(0).getImm();
if (MI.getOpcode() == VE::ADJCALLSTACKDOWN)
Size = -Size;
@ -281,20 +293,17 @@ void VEFrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo &MFI = MF.getFrameInfo();
int NumBytes = (int)MFI.getStackSize();
uint64_t NumBytes = MFI.getStackSize();
// Emit Epilogue instructions to restore %lr
emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true);
}
bool VEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
// Reserve call frame if there are no variable sized objects on the stack.
return !MF.getFrameInfo().hasVarSizedObjects();
}
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
// pointer register. This is true if the function has variable sized allocas
// or if frame pointer elimination is disabled. For the case of VE, we don't
// implement FP eliminator yet, but we returns false from this function to
// not refer fp from generated code.
bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@ -304,44 +313,41 @@ bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
MFI.isFrameAddressTaken();
}
bool VEFrameLowering::hasBP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
}
int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const {
const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const VERegisterInfo *RegInfo = STI.getRegisterInfo();
const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
bool isFixed = MFI.isFixedObjectIndex(FI);
// Addressable stack objects are accessed using neg. offsets from
// %fp, or positive offsets from %sp.
bool UseFP = true;
int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
// VE uses FP-based references in general, even when "hasFP" is
// false. That function is rather a misnomer, because %fp is
// actually always available, unless isLeafProc.
if (FuncInfo->isLeafProc()) {
// If there's a leaf proc, all offsets need to be %sp-based,
// because we haven't caused %fp to actually point to our frame.
UseFP = false;
} else if (isFixed) {
// Otherwise, argument access should always use %fp.
UseFP = true;
} else if (RegInfo->needsStackRealignment(MF)) {
FrameReg = VE::SX11; // %sp
return FrameOffset + MF.getFrameInfo().getStackSize();
}
if (RegInfo->needsStackRealignment(MF) && !isFixed) {
// If there is dynamic stack realignment, all local object
// references need to be via %sp, to take account of the
// re-alignment.
UseFP = false;
// references need to be via %sp or %s17 (bp), to take account
// of the re-alignment.
if (hasBP(MF))
FrameReg = VE::SX17; // %bp
else
FrameReg = VE::SX11; // %sp
return FrameOffset + MF.getFrameInfo().getStackSize();
}
int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
return FrameOffset;
}
FrameReg = VE::SX11; // %sp
return FrameOffset + MF.getFrameInfo().getStackSize();
// Finally, default to using %fp.
FrameReg = RegInfo->getFrameRegister(MF);
return FrameOffset;
}
bool VEFrameLowering::isLeafProc(MachineFunction &MF) const {

View File

@ -28,18 +28,23 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, int NumBytes,
MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
bool RequireFPUpdate) const;
void emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, int NumBytes,
MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
bool RequireFPUpdate) const;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
bool hasBP(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const override;
// VE reserves argument space always for call sites in the function
// immediately on entry of the current function.
bool hasReservedCallFrame(const MachineFunction &MF) const override {
return true;
}
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
@ -58,10 +63,8 @@ public:
return Offsets;
}
/// targetHandlesStackFrameRounding - Returns true if the target is
/// responsible for rounding up the stack frame (probably at emitPrologue
/// time).
bool targetHandlesStackFrameRounding() const override { return true; }
protected:
const VESubtarget &STI;
private:
// Returns true if MF is a leaf procedure.
@ -69,11 +72,12 @@ private:
// Emits code for adjusting SP in function prologue/epilogue.
void emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, int NumBytes) const;
MachineBasicBlock::iterator MBBI, int64_t NumBytes,
MaybeAlign MayAlign = MaybeAlign()) const;
// Emits code for extending SP in function prologue/epilogue.
void emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, int NumBytes) const;
MachineBasicBlock::iterator MBBI) const;
};
} // namespace llvm

View File

@ -583,6 +583,11 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VAEND, MVT::Other, Expand);
/// } VAARG handling
/// Stack {
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
/// } Stack
/// Int Ops {
for (MVT IntVT : {MVT::i32, MVT::i64}) {
// VE has no REM or DIVREM operations.
@ -641,6 +646,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(Lo)
TARGET_NODE_CASE(Hi)
TARGET_NODE_CASE(GETFUNPLT)
TARGET_NODE_CASE(GETSTACKTOP)
TARGET_NODE_CASE(GETTLSADDR)
TARGET_NODE_CASE(CALL)
TARGET_NODE_CASE(RET_FLAG)
@ -860,12 +866,79 @@ SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
}
SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
// Generate following code.
// (void)__llvm_grow_stack(size);
// ret = GETSTACKTOP; // pseudo instruction
SDLoc DL(Op);
// Get the inputs.
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
MaybeAlign Alignment(Op.getConstantOperandVal(2));
EVT VT = Node->getValueType(0);
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
Align StackAlign = TFI.getStackAlign();
bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
// Prepare arguments
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Size;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
Args.push_back(Entry);
if (NeedsAlign) {
Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
Args.push_back(Entry);
}
Type *RetTy = Type::getVoidTy(*DAG.getContext());
EVT PtrVT = Op.getValueType();
SDValue Callee;
if (NeedsAlign) {
Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
} else {
Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
}
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL)
.setChain(Chain)
.setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
.setDiscardResult(true);
std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
Chain = pair.second;
SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
if (NeedsAlign) {
Result = DAG.getNode(ISD::ADD, DL, VT, Result,
DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
Result = DAG.getNode(ISD::AND, DL, VT, Result,
DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
}
// Chain = Result.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
SDValue Ops[2] = {Result, Chain};
return DAG.getMergeValues(Ops, DL);
}
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
llvm_unreachable("Should not custom lower this!");
case ISD::BlockAddress:
return LowerBlockAddress(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return lowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:

View File

@ -27,8 +27,10 @@ enum NodeType : unsigned {
Hi,
Lo, // Hi/Lo operations, typically on a global address.
GETFUNPLT, // load function address through %plt insturction
GETTLSADDR, // load address for TLS access
GETFUNPLT, // load function address through %plt insturction
GETTLSADDR, // load address for TLS access
GETSTACKTOP, // retrieve address of stack top (first address of
// locals and temporaries)
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
@ -81,6 +83,7 @@ public:
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
/// } Custom Lower
SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;

View File

@ -25,7 +25,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define DEBUG_TYPE "ve"
#define DEBUG_TYPE "ve-instr-info"
using namespace llvm;
@ -457,6 +457,9 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent(); // The pseudo instruction is gone now.
return true;
}
case VE::GETSTACKTOP: {
return expandGetStackTopPseudo(MI);
}
}
return false;
}
@ -464,8 +467,8 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const VEInstrInfo &TII =
*static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
const VEInstrInfo &TII = *STI.getInstrInfo();
DebugLoc dl = MBB.findDebugLoc(MI);
// Create following instructions and multiple basic blocks.
@ -544,3 +547,35 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
MI.eraseFromParent(); // The pseudo instruction is gone now.
return true;
}
bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const {
MachineBasicBlock *MBB = MI.getParent();
MachineFunction &MF = *MBB->getParent();
const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
const VEInstrInfo &TII = *STI.getInstrInfo();
DebugLoc DL = MBB->findDebugLoc(MI);
// Create following instruction
//
// dst = %sp + target specific frame + the size of parameter area
const MachineFrameInfo &MFI = MF.getFrameInfo();
const VEFrameLowering &TFL = *STI.getFrameLowering();
// The VE ABI requires a reserved 176 bytes area at the top
// of stack as described in VESubtarget.cpp. So, we adjust it here.
unsigned NumBytes = STI.getAdjustedFrameSize(0);
// Also adds the size of parameter area.
if (MFI.adjustsStack() && TFL.hasReservedCallFrame(MF))
NumBytes += MFI.getMaxCallFrameSize();
BuildMI(*MBB, MI, DL, TII.get(VE::LEArii))
.addDef(MI.getOperand(0).getReg())
.addReg(VE::SX11)
.addImm(0)
.addImm(NumBytes);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return true;
}

View File

@ -81,6 +81,7 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
bool expandExtendStackPseudo(MachineInstr &MI) const;
bool expandGetStackTopPseudo(MachineInstr &MI) const;
};
} // namespace llvm

View File

@ -414,6 +414,9 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
// GETSTACKTOP
def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
//===----------------------------------------------------------------------===//
@ -1398,6 +1401,14 @@ def EXTEND_STACK_GUARD : Pseudo<(outs), (ins),
"# EXTEND STACK GUARD",
[]>;
// Dynamic stack allocation yields a __llvm_grow_stack for VE targets.
// These calls are needed to probe the stack when allocating more over
// %s8 (%sl - stack limit).
let Uses = [SX11], hasSideEffects = 1 in
def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
"# GET STACK TOP",
[(set iPTR:$dst, (GetStackTop))]>;
// SETCC pattern matches
//
// CMP %tmp, lhs, rhs ; compare lhs and rhs

View File

@ -34,12 +34,22 @@ VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {}
const MCPhysReg *
VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_SaveList;
switch (MF->getFunction().getCallingConv()) {
default:
return CSR_SaveList;
case CallingConv::PreserveAll:
return CSR_preserve_all_SaveList;
}
}
const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
return CSR_RegMask;
switch (CC) {
default:
return CSR_RegMask;
case CallingConv::PreserveAll:
return CSR_preserve_all_RegMask;
}
}
const uint32_t *VERegisterInfo::getNoPreservedMask() const {

View File

@ -42,7 +42,7 @@ public:
const TargetMachine &TM);
const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const TargetFrameLowering *getFrameLowering() const override {
const VEFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
const VERegisterInfo *getRegisterInfo() const override {

View File

@ -0,0 +1,25 @@
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
declare void @bar(i8*, i64)
; Function Attrs: nounwind
define void @test(i64 %n) {
; CHECK-LABEL: test:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s1, 0, %s0
; CHECK-NEXT: lea %s0, 15(, %s0)
; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s2, __ve_grow_stack@lo
; CHECK-NEXT: and %s2, %s2, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s2)
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: lea %s0, 240(, %s11)
; CHECK-NEXT: lea %s2, bar@lo
; CHECK-NEXT: and %s2, %s2, (32)0
; CHECK-NEXT: lea.sl %s12, bar@hi(, %s2)
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: or %s11, 0, %s9
%dyna = alloca i8, i64 %n, align 8
call void @bar(i8* %dyna, i64 %n)
ret void
}

View File

@ -0,0 +1,29 @@
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
declare void @bar(i8*, i64)
; Function Attrs: nounwind
define void @test(i64 %n) {
; CHECK-LABEL: test:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s2, 0, %s0
; CHECK-NEXT: lea %s0, 15(, %s0)
; CHECK-NEXT: and %s0, -16, %s0
; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1)
; CHECK-NEXT: or %s1, -32, (0)1
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: lea %s0, 240(, %s11)
; CHECK-NEXT: lea %s0, 31(, %s0)
; CHECK-NEXT: and %s0, -32, %s0
; CHECK-NEXT: lea %s1, bar@lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, bar@hi(, %s1)
; CHECK-NEXT: or %s1, 0, %s2
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: or %s11, 0, %s9
%dyna = alloca i8, i64 %n, align 32
call void @bar(i8* %dyna, i64 %n)
ret void
}