forked from OSchip/llvm-project
[VE] Dynamic stack allocation
Summary: This patch implements dynamic stack allocation for the VE target. Changes: * compiler-rt: `__ve_grow_stack` to request stack allocation on the VE. * VE: base pointer support, dynamic stack allocation. Differential Revision: https://reviews.llvm.org/D79084
This commit is contained in:
parent
fc44da746f
commit
dedaf3a2ac
|
@ -166,6 +166,7 @@ macro(detect_target_arch)
|
|||
check_symbol_exists(__sparcv9 "" __SPARCV9)
|
||||
check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
|
||||
check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
|
||||
check_symbol_exists(__ve__ "" __VE)
|
||||
if(__ARM)
|
||||
add_default_target_arch(arm)
|
||||
elseif(__AARCH64)
|
||||
|
@ -200,6 +201,8 @@ macro(detect_target_arch)
|
|||
add_default_target_arch(wasm32)
|
||||
elseif(__WEBASSEMBLY64)
|
||||
add_default_target_arch(wasm64)
|
||||
elseif(__VE)
|
||||
add_default_target_arch(ve)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
|
|
|
@ -237,6 +237,8 @@ macro(test_targets)
|
|||
test_target_arch(wasm32 "" "--target=wasm32-unknown-unknown")
|
||||
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm64")
|
||||
test_target_arch(wasm64 "" "--target=wasm64-unknown-unknown")
|
||||
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "ve")
|
||||
test_target_arch(ve "__ve__" "--target=ve-unknown-none")
|
||||
endif()
|
||||
set(COMPILER_RT_OS_SUFFIX "")
|
||||
endif()
|
||||
|
|
|
@ -37,6 +37,7 @@ set(SPARC sparc)
|
|||
set(SPARCV9 sparcv9)
|
||||
set(WASM32 wasm32)
|
||||
set(WASM64 wasm64)
|
||||
set(VE ve)
|
||||
|
||||
if(APPLE)
|
||||
set(ARM64 arm64 arm64e)
|
||||
|
@ -44,8 +45,11 @@ if(APPLE)
|
|||
set(X86_64 x86_64 x86_64h)
|
||||
endif()
|
||||
|
||||
set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
|
||||
${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64} ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} ${WASM32} ${WASM64})
|
||||
set(ALL_BUILTIN_SUPPORTED_ARCH
|
||||
${X86} ${X86_64} ${ARM32} ${ARM64}
|
||||
${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64}
|
||||
${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
|
||||
${WASM32} ${WASM64} ${VE})
|
||||
|
||||
include(CompilerRTUtils)
|
||||
include(CompilerRTDarwinUtils)
|
||||
|
|
|
@ -573,6 +573,12 @@ set(wasm64_SOURCES
|
|||
${GENERIC_SOURCES}
|
||||
)
|
||||
|
||||
set(ve_SOURCES
|
||||
ve/grow_stack.S
|
||||
ve/grow_stack_align.S
|
||||
${GENERIC_TF_SOURCES}
|
||||
${GENERIC_SOURCES})
|
||||
|
||||
add_custom_target(builtins)
|
||||
set_target_properties(builtins PROPERTIES FOLDER "Compiler-RT Misc")
|
||||
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#include "../assembly.h"
|
||||
|
||||
// grow_stack routine
|
||||
// This routine is VE specific
|
||||
// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf
|
||||
|
||||
// destroy %s62 and %s63 only
|
||||
|
||||
#ifdef __ve__
|
||||
|
||||
.text
|
||||
.p2align 4
|
||||
DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack)
|
||||
subu.l %sp, %sp, %s0 # sp -= alloca size
|
||||
and %sp, -16, %sp # align sp
|
||||
brge.l.t %sp, %sl, 1f
|
||||
ld %s63, 0x18(,%tp) # load param area
|
||||
lea %s62, 0x13b # syscall # of grow
|
||||
shm.l %s62, 0x0(%s63) # stored at addr:0
|
||||
shm.l %sl, 0x8(%s63) # old limit at addr:8
|
||||
shm.l %sp, 0x10(%s63) # new limit at addr:16
|
||||
monc
|
||||
1:
|
||||
b.l (,%lr)
|
||||
END_COMPILERRT_FUNCTION(__ve_grow_stack)
|
||||
|
||||
#endif // __ve__
|
|
@ -0,0 +1,31 @@
|
|||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#include "../assembly.h"
|
||||
|
||||
// grow_stack routine
|
||||
// This routine is VE specific
|
||||
// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf
|
||||
|
||||
// destroy %s62 and %s63 only
|
||||
|
||||
#ifdef __ve__
|
||||
|
||||
.text
|
||||
.p2align 4
|
||||
DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack_align)
|
||||
subu.l %sp, %sp, %s0 # sp -= alloca size
|
||||
and %sp, %sp, %s1 # align sp
|
||||
brge.l.t %sp, %sl, 1f
|
||||
ld %s63, 0x18(,%tp) # load param area
|
||||
lea %s62, 0x13b # syscall # of grow
|
||||
shm.l %s62, 0x0(%s63) # stored at addr:0
|
||||
shm.l %sl, 0x8(%s63) # old limit at addr:8
|
||||
shm.l %sp, 0x10(%s63) # new limit at addr:16
|
||||
monc
|
||||
1:
|
||||
b.l (,%lr)
|
||||
END_COMPILERRT_FUNCTION(__ve_grow_stack_align)
|
||||
|
||||
#endif // __ve__
|
|
@ -84,3 +84,6 @@ def RetCC_VE : CallingConv<[
|
|||
// Callee-saved registers
|
||||
def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>;
|
||||
def CSR_NoRegs : CalleeSavedRegs<(add)>;
|
||||
|
||||
// PreserveAll (clobbers s62,s63) - used for ve_grow_stack
|
||||
def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>;
|
||||
|
|
|
@ -30,12 +30,13 @@ using namespace llvm;
|
|||
|
||||
VEFrameLowering::VEFrameLowering(const VESubtarget &ST)
|
||||
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0,
|
||||
Align(16)) {}
|
||||
Align(16)),
|
||||
STI(ST) {}
|
||||
|
||||
void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
int NumBytes,
|
||||
uint64_t NumBytes,
|
||||
bool RequireFPUpdate) const {
|
||||
|
||||
DebugLoc dl;
|
||||
|
@ -47,6 +48,7 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
|
|||
// st %lr, 8(,%sp)
|
||||
// st %got, 24(,%sp)
|
||||
// st %plt, 32(,%sp)
|
||||
// st %s17, 40(,%sp) iff this function is using s17 as BP
|
||||
// or %fp, 0, %sp
|
||||
|
||||
BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
|
||||
|
@ -69,6 +71,12 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
|
|||
.addImm(0)
|
||||
.addImm(32)
|
||||
.addReg(VE::SX16);
|
||||
if (hasBP(MF))
|
||||
BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
|
||||
.addReg(VE::SX11)
|
||||
.addImm(0)
|
||||
.addImm(40)
|
||||
.addReg(VE::SX17);
|
||||
BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9)
|
||||
.addReg(VE::SX11)
|
||||
.addImm(0);
|
||||
|
@ -77,7 +85,7 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
|
|||
void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
int NumBytes,
|
||||
uint64_t NumBytes,
|
||||
bool RequireFPUpdate) const {
|
||||
|
||||
DebugLoc dl;
|
||||
|
@ -86,6 +94,7 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
|
|||
// Insert following codes here as epilogue
|
||||
//
|
||||
// or %sp, 0, %fp
|
||||
// ld %s17, 40(,%sp) iff this function is using s17 as BP
|
||||
// ld %got, 32(,%sp)
|
||||
// ld %plt, 24(,%sp)
|
||||
// ld %lr, 8(,%sp)
|
||||
|
@ -94,6 +103,11 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
|
|||
BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11)
|
||||
.addReg(VE::SX9)
|
||||
.addImm(0);
|
||||
if (hasBP(MF))
|
||||
BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17)
|
||||
.addReg(VE::SX11)
|
||||
.addImm(0)
|
||||
.addImm(40);
|
||||
BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16)
|
||||
.addReg(VE::SX11)
|
||||
.addImm(0)
|
||||
|
@ -115,7 +129,8 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
|
|||
void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
int NumBytes) const {
|
||||
int64_t NumBytes,
|
||||
MaybeAlign MaybeAlign) const {
|
||||
DebugLoc dl;
|
||||
const VEInstrInfo &TII =
|
||||
*static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
|
@ -143,11 +158,17 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
|
|||
.addReg(VE::SX11)
|
||||
.addReg(VE::SX13)
|
||||
.addImm(Hi_32(NumBytes));
|
||||
|
||||
if (MaybeAlign) {
|
||||
// and %sp, %sp, Align-1
|
||||
BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11)
|
||||
.addReg(VE::SX11)
|
||||
.addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value())));
|
||||
}
|
||||
}
|
||||
|
||||
void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
int NumBytes) const {
|
||||
MachineBasicBlock::iterator MBBI) const {
|
||||
DebugLoc dl;
|
||||
const VEInstrInfo &TII =
|
||||
*static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
|
@ -186,11 +207,8 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
MachineBasicBlock &MBB) const {
|
||||
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
|
||||
const VEInstrInfo &TII =
|
||||
*static_cast<const VEInstrInfo *>(Subtarget.getInstrInfo());
|
||||
const VERegisterInfo &RegInfo =
|
||||
*static_cast<const VERegisterInfo *>(Subtarget.getRegisterInfo());
|
||||
const VEInstrInfo &TII = *STI.getInstrInfo();
|
||||
const VERegisterInfo &RegInfo = *STI.getRegisterInfo();
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
// Debug location must be unknown since the first debug location is used
|
||||
// to determine the end of the prologue.
|
||||
|
@ -209,30 +227,15 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
"(probably because it has a dynamic alloca).");
|
||||
|
||||
// Get the number of bytes to allocate from the FrameInfo
|
||||
int NumBytes = (int)MFI.getStackSize();
|
||||
// The VE ABI requires a reserved 176-byte area in the user's stack, starting
|
||||
// at %sp + 16. This is for the callee Register Save Area (RSA).
|
||||
//
|
||||
// We therefore need to add that offset to the total stack size
|
||||
// after all the stack objects are placed by
|
||||
// PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack
|
||||
// needs to be aligned *after* the extra size is added, we need to disable
|
||||
// calculateFrameObjectOffsets's built-in stack alignment, by having
|
||||
// targetHandlesStackFrameRounding return true.
|
||||
uint64_t NumBytes = MFI.getStackSize();
|
||||
|
||||
// Add the extra call frame stack size, if needed. (This is the same
|
||||
// code as in PrologEpilogInserter, but also gets disabled by
|
||||
// targetHandlesStackFrameRounding)
|
||||
if (MFI.adjustsStack() && hasReservedCallFrame(MF))
|
||||
NumBytes += MFI.getMaxCallFrameSize();
|
||||
|
||||
// Adds the VE subtarget-specific spill area to the stack
|
||||
// size. Also ensures target-required alignment.
|
||||
NumBytes = Subtarget.getAdjustedFrameSize(NumBytes);
|
||||
// The VE ABI requires a reserved 176 bytes area at the top
|
||||
// of stack as described in VESubtarget.cpp. So, we adjust it here.
|
||||
NumBytes = STI.getAdjustedFrameSize(NumBytes);
|
||||
|
||||
// Finally, ensure that the size is sufficiently aligned for the
|
||||
// data on the stack.
|
||||
NumBytes = alignTo(NumBytes, MFI.getMaxAlign().value());
|
||||
NumBytes = alignTo(NumBytes, MFI.getMaxAlign());
|
||||
|
||||
// Update stack size with corrected value.
|
||||
MFI.setStackSize(NumBytes);
|
||||
|
@ -241,16 +244,25 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
emitPrologueInsns(MF, MBB, MBBI, NumBytes, true);
|
||||
|
||||
// Emit stack adjust instructions
|
||||
emitSPAdjustment(MF, MBB, MBBI, -NumBytes);
|
||||
MaybeAlign RuntimeAlign =
|
||||
NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None;
|
||||
emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign);
|
||||
|
||||
if (hasBP(MF)) {
|
||||
// Copy SP to BP.
|
||||
BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17)
|
||||
.addReg(VE::SX11)
|
||||
.addImm(0);
|
||||
}
|
||||
|
||||
// Emit stack extend instructions
|
||||
emitSPExtend(MF, MBB, MBBI, -NumBytes);
|
||||
emitSPExtend(MF, MBB, MBBI);
|
||||
|
||||
unsigned regFP = RegInfo.getDwarfRegNum(VE::SX9, true);
|
||||
Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true);
|
||||
|
||||
// Emit ".cfi_def_cfa_register 30".
|
||||
unsigned CFIIndex =
|
||||
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP));
|
||||
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP));
|
||||
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
|
||||
|
@ -265,7 +277,7 @@ MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
|
|||
MachineBasicBlock::iterator I) const {
|
||||
if (!hasReservedCallFrame(MF)) {
|
||||
MachineInstr &MI = *I;
|
||||
int Size = MI.getOperand(0).getImm();
|
||||
int64_t Size = MI.getOperand(0).getImm();
|
||||
if (MI.getOpcode() == VE::ADJCALLSTACKDOWN)
|
||||
Size = -Size;
|
||||
|
||||
|
@ -281,20 +293,17 @@ void VEFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
DebugLoc dl = MBBI->getDebugLoc();
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
|
||||
int NumBytes = (int)MFI.getStackSize();
|
||||
uint64_t NumBytes = MFI.getStackSize();
|
||||
|
||||
// Emit Epilogue instructions to restore %lr
|
||||
emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true);
|
||||
}
|
||||
|
||||
bool VEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
||||
// Reserve call frame if there are no variable sized objects on the stack.
|
||||
return !MF.getFrameInfo().hasVarSizedObjects();
|
||||
}
|
||||
|
||||
// hasFP - Return true if the specified function should have a dedicated frame
|
||||
// pointer register. This is true if the function has variable sized allocas or
|
||||
// if frame pointer elimination is disabled.
|
||||
// pointer register. This is true if the function has variable sized allocas
|
||||
// or if frame pointer elimination is disabled. For the case of VE, we don't
|
||||
// implement FP eliminator yet, but we returns false from this function to
|
||||
// not refer fp from generated code.
|
||||
bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
|
||||
|
||||
|
@ -304,44 +313,41 @@ bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
|
|||
MFI.isFrameAddressTaken();
|
||||
}
|
||||
|
||||
bool VEFrameLowering::hasBP(const MachineFunction &MF) const {
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
|
||||
|
||||
return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
|
||||
}
|
||||
|
||||
int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
Register &FrameReg) const {
|
||||
const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||
const VERegisterInfo *RegInfo = STI.getRegisterInfo();
|
||||
const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
|
||||
bool isFixed = MFI.isFixedObjectIndex(FI);
|
||||
|
||||
// Addressable stack objects are accessed using neg. offsets from
|
||||
// %fp, or positive offsets from %sp.
|
||||
bool UseFP = true;
|
||||
int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
|
||||
|
||||
// VE uses FP-based references in general, even when "hasFP" is
|
||||
// false. That function is rather a misnomer, because %fp is
|
||||
// actually always available, unless isLeafProc.
|
||||
if (FuncInfo->isLeafProc()) {
|
||||
// If there's a leaf proc, all offsets need to be %sp-based,
|
||||
// because we haven't caused %fp to actually point to our frame.
|
||||
UseFP = false;
|
||||
} else if (isFixed) {
|
||||
// Otherwise, argument access should always use %fp.
|
||||
UseFP = true;
|
||||
} else if (RegInfo->needsStackRealignment(MF)) {
|
||||
FrameReg = VE::SX11; // %sp
|
||||
return FrameOffset + MF.getFrameInfo().getStackSize();
|
||||
}
|
||||
if (RegInfo->needsStackRealignment(MF) && !isFixed) {
|
||||
// If there is dynamic stack realignment, all local object
|
||||
// references need to be via %sp, to take account of the
|
||||
// re-alignment.
|
||||
UseFP = false;
|
||||
// references need to be via %sp or %s17 (bp), to take account
|
||||
// of the re-alignment.
|
||||
if (hasBP(MF))
|
||||
FrameReg = VE::SX17; // %bp
|
||||
else
|
||||
FrameReg = VE::SX11; // %sp
|
||||
return FrameOffset + MF.getFrameInfo().getStackSize();
|
||||
}
|
||||
|
||||
int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
|
||||
|
||||
if (UseFP) {
|
||||
FrameReg = RegInfo->getFrameRegister(MF);
|
||||
return FrameOffset;
|
||||
}
|
||||
|
||||
FrameReg = VE::SX11; // %sp
|
||||
return FrameOffset + MF.getFrameInfo().getStackSize();
|
||||
// Finally, default to using %fp.
|
||||
FrameReg = RegInfo->getFrameRegister(MF);
|
||||
return FrameOffset;
|
||||
}
|
||||
|
||||
bool VEFrameLowering::isLeafProc(MachineFunction &MF) const {
|
||||
|
|
|
@ -28,18 +28,23 @@ public:
|
|||
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
|
||||
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
|
||||
void emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, int NumBytes,
|
||||
MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
|
||||
bool RequireFPUpdate) const;
|
||||
void emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, int NumBytes,
|
||||
MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
|
||||
bool RequireFPUpdate) const;
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const override;
|
||||
|
||||
bool hasReservedCallFrame(const MachineFunction &MF) const override;
|
||||
bool hasBP(const MachineFunction &MF) const;
|
||||
bool hasFP(const MachineFunction &MF) const override;
|
||||
// VE reserves argument space always for call sites in the function
|
||||
// immediately on entry of the current function.
|
||||
bool hasReservedCallFrame(const MachineFunction &MF) const override {
|
||||
return true;
|
||||
}
|
||||
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
|
||||
RegScavenger *RS = nullptr) const override;
|
||||
|
||||
|
@ -58,10 +63,8 @@ public:
|
|||
return Offsets;
|
||||
}
|
||||
|
||||
/// targetHandlesStackFrameRounding - Returns true if the target is
|
||||
/// responsible for rounding up the stack frame (probably at emitPrologue
|
||||
/// time).
|
||||
bool targetHandlesStackFrameRounding() const override { return true; }
|
||||
protected:
|
||||
const VESubtarget &STI;
|
||||
|
||||
private:
|
||||
// Returns true if MF is a leaf procedure.
|
||||
|
@ -69,11 +72,12 @@ private:
|
|||
|
||||
// Emits code for adjusting SP in function prologue/epilogue.
|
||||
void emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, int NumBytes) const;
|
||||
MachineBasicBlock::iterator MBBI, int64_t NumBytes,
|
||||
MaybeAlign MayAlign = MaybeAlign()) const;
|
||||
|
||||
// Emits code for extending SP in function prologue/epilogue.
|
||||
void emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, int NumBytes) const;
|
||||
MachineBasicBlock::iterator MBBI) const;
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
|
|
@ -583,6 +583,11 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::VAEND, MVT::Other, Expand);
|
||||
/// } VAARG handling
|
||||
|
||||
/// Stack {
|
||||
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
|
||||
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
|
||||
/// } Stack
|
||||
|
||||
/// Int Ops {
|
||||
for (MVT IntVT : {MVT::i32, MVT::i64}) {
|
||||
// VE has no REM or DIVREM operations.
|
||||
|
@ -641,6 +646,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
TARGET_NODE_CASE(Lo)
|
||||
TARGET_NODE_CASE(Hi)
|
||||
TARGET_NODE_CASE(GETFUNPLT)
|
||||
TARGET_NODE_CASE(GETSTACKTOP)
|
||||
TARGET_NODE_CASE(GETTLSADDR)
|
||||
TARGET_NODE_CASE(CALL)
|
||||
TARGET_NODE_CASE(RET_FLAG)
|
||||
|
@ -860,12 +866,79 @@ SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
|
|||
std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
|
||||
}
|
||||
|
||||
SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
// Generate following code.
|
||||
// (void)__llvm_grow_stack(size);
|
||||
// ret = GETSTACKTOP; // pseudo instruction
|
||||
SDLoc DL(Op);
|
||||
|
||||
// Get the inputs.
|
||||
SDNode *Node = Op.getNode();
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Size = Op.getOperand(1);
|
||||
MaybeAlign Alignment(Op.getConstantOperandVal(2));
|
||||
EVT VT = Node->getValueType(0);
|
||||
|
||||
// Chain the dynamic stack allocation so that it doesn't modify the stack
|
||||
// pointer when other instructions are using the stack.
|
||||
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
|
||||
|
||||
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
|
||||
Align StackAlign = TFI.getStackAlign();
|
||||
bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
|
||||
|
||||
// Prepare arguments
|
||||
TargetLowering::ArgListTy Args;
|
||||
TargetLowering::ArgListEntry Entry;
|
||||
Entry.Node = Size;
|
||||
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
|
||||
Args.push_back(Entry);
|
||||
if (NeedsAlign) {
|
||||
Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
|
||||
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
|
||||
Args.push_back(Entry);
|
||||
}
|
||||
Type *RetTy = Type::getVoidTy(*DAG.getContext());
|
||||
|
||||
EVT PtrVT = Op.getValueType();
|
||||
SDValue Callee;
|
||||
if (NeedsAlign) {
|
||||
Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
|
||||
} else {
|
||||
Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
|
||||
}
|
||||
|
||||
TargetLowering::CallLoweringInfo CLI(DAG);
|
||||
CLI.setDebugLoc(DL)
|
||||
.setChain(Chain)
|
||||
.setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
|
||||
.setDiscardResult(true);
|
||||
std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
|
||||
Chain = pair.second;
|
||||
SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
|
||||
if (NeedsAlign) {
|
||||
Result = DAG.getNode(ISD::ADD, DL, VT, Result,
|
||||
DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
|
||||
Result = DAG.getNode(ISD::AND, DL, VT, Result,
|
||||
DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
|
||||
}
|
||||
// Chain = Result.getValue(1);
|
||||
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
|
||||
DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
|
||||
|
||||
SDValue Ops[2] = {Result, Chain};
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
}
|
||||
|
||||
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
switch (Op.getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Should not custom lower this!");
|
||||
case ISD::BlockAddress:
|
||||
return LowerBlockAddress(Op, DAG);
|
||||
case ISD::DYNAMIC_STACKALLOC:
|
||||
return lowerDYNAMIC_STACKALLOC(Op, DAG);
|
||||
case ISD::GlobalAddress:
|
||||
return LowerGlobalAddress(Op, DAG);
|
||||
case ISD::GlobalTLSAddress:
|
||||
|
|
|
@ -27,8 +27,10 @@ enum NodeType : unsigned {
|
|||
Hi,
|
||||
Lo, // Hi/Lo operations, typically on a global address.
|
||||
|
||||
GETFUNPLT, // load function address through %plt insturction
|
||||
GETTLSADDR, // load address for TLS access
|
||||
GETFUNPLT, // load function address through %plt insturction
|
||||
GETTLSADDR, // load address for TLS access
|
||||
GETSTACKTOP, // retrieve address of stack top (first address of
|
||||
// locals and temporaries)
|
||||
|
||||
CALL, // A call instruction.
|
||||
RET_FLAG, // Return with a flag operand.
|
||||
|
@ -81,6 +83,7 @@ public:
|
|||
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
||||
/// } Custom Lower
|
||||
|
||||
SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
#define DEBUG_TYPE "ve"
|
||||
#define DEBUG_TYPE "ve-instr-info"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -457,6 +457,9 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
MI.eraseFromParent(); // The pseudo instruction is gone now.
|
||||
return true;
|
||||
}
|
||||
case VE::GETSTACKTOP: {
|
||||
return expandGetStackTopPseudo(MI);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -464,8 +467,8 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const VEInstrInfo &TII =
|
||||
*static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
|
||||
const VEInstrInfo &TII = *STI.getInstrInfo();
|
||||
DebugLoc dl = MBB.findDebugLoc(MI);
|
||||
|
||||
// Create following instructions and multiple basic blocks.
|
||||
|
@ -544,3 +547,35 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
|
|||
MI.eraseFromParent(); // The pseudo instruction is gone now.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const {
|
||||
MachineBasicBlock *MBB = MI.getParent();
|
||||
MachineFunction &MF = *MBB->getParent();
|
||||
const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
|
||||
const VEInstrInfo &TII = *STI.getInstrInfo();
|
||||
DebugLoc DL = MBB->findDebugLoc(MI);
|
||||
|
||||
// Create following instruction
|
||||
//
|
||||
// dst = %sp + target specific frame + the size of parameter area
|
||||
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const VEFrameLowering &TFL = *STI.getFrameLowering();
|
||||
|
||||
// The VE ABI requires a reserved 176 bytes area at the top
|
||||
// of stack as described in VESubtarget.cpp. So, we adjust it here.
|
||||
unsigned NumBytes = STI.getAdjustedFrameSize(0);
|
||||
|
||||
// Also adds the size of parameter area.
|
||||
if (MFI.adjustsStack() && TFL.hasReservedCallFrame(MF))
|
||||
NumBytes += MFI.getMaxCallFrameSize();
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII.get(VE::LEArii))
|
||||
.addDef(MI.getOperand(0).getReg())
|
||||
.addReg(VE::SX11)
|
||||
.addImm(0)
|
||||
.addImm(NumBytes);
|
||||
|
||||
MI.eraseFromParent(); // The pseudo instruction is gone now.
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -81,6 +81,7 @@ public:
|
|||
bool expandPostRAPseudo(MachineInstr &MI) const override;
|
||||
|
||||
bool expandExtendStackPseudo(MachineInstr &MI) const;
|
||||
bool expandGetStackTopPseudo(MachineInstr &MI) const;
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
|
|
@ -414,6 +414,9 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
|
|||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
|
||||
// GETSTACKTOP
|
||||
def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
|
||||
[SDNPHasChain, SDNPSideEffect]>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1398,6 +1401,14 @@ def EXTEND_STACK_GUARD : Pseudo<(outs), (ins),
|
|||
"# EXTEND STACK GUARD",
|
||||
[]>;
|
||||
|
||||
// Dynamic stack allocation yields a __llvm_grow_stack for VE targets.
|
||||
// These calls are needed to probe the stack when allocating more over
|
||||
// %s8 (%sl - stack limit).
|
||||
|
||||
let Uses = [SX11], hasSideEffects = 1 in
|
||||
def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
|
||||
"# GET STACK TOP",
|
||||
[(set iPTR:$dst, (GetStackTop))]>;
|
||||
// SETCC pattern matches
|
||||
//
|
||||
// CMP %tmp, lhs, rhs ; compare lhs and rhs
|
||||
|
|
|
@ -34,12 +34,22 @@ VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {}
|
|||
|
||||
const MCPhysReg *
|
||||
VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
return CSR_SaveList;
|
||||
switch (MF->getFunction().getCallingConv()) {
|
||||
default:
|
||||
return CSR_SaveList;
|
||||
case CallingConv::PreserveAll:
|
||||
return CSR_preserve_all_SaveList;
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF,
|
||||
CallingConv::ID CC) const {
|
||||
return CSR_RegMask;
|
||||
switch (CC) {
|
||||
default:
|
||||
return CSR_RegMask;
|
||||
case CallingConv::PreserveAll:
|
||||
return CSR_preserve_all_RegMask;
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t *VERegisterInfo::getNoPreservedMask() const {
|
||||
|
|
|
@ -42,7 +42,7 @@ public:
|
|||
const TargetMachine &TM);
|
||||
|
||||
const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; }
|
||||
const TargetFrameLowering *getFrameLowering() const override {
|
||||
const VEFrameLowering *getFrameLowering() const override {
|
||||
return &FrameLowering;
|
||||
}
|
||||
const VERegisterInfo *getRegisterInfo() const override {
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
|
||||
|
||||
declare void @bar(i8*, i64)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test(i64 %n) {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: or %s1, 0, %s0
|
||||
; CHECK-NEXT: lea %s0, 15(, %s0)
|
||||
; CHECK-NEXT: and %s0, -16, %s0
|
||||
; CHECK-NEXT: lea %s2, __ve_grow_stack@lo
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s2)
|
||||
; CHECK-NEXT: bsic %s10, (, %s12)
|
||||
; CHECK-NEXT: lea %s0, 240(, %s11)
|
||||
; CHECK-NEXT: lea %s2, bar@lo
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, bar@hi(, %s2)
|
||||
; CHECK-NEXT: bsic %s10, (, %s12)
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
%dyna = alloca i8, i64 %n, align 8
|
||||
call void @bar(i8* %dyna, i64 %n)
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
|
||||
|
||||
declare void @bar(i8*, i64)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test(i64 %n) {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: or %s2, 0, %s0
|
||||
; CHECK-NEXT: lea %s0, 15(, %s0)
|
||||
; CHECK-NEXT: and %s0, -16, %s0
|
||||
; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1)
|
||||
; CHECK-NEXT: or %s1, -32, (0)1
|
||||
; CHECK-NEXT: bsic %s10, (, %s12)
|
||||
; CHECK-NEXT: lea %s0, 240(, %s11)
|
||||
; CHECK-NEXT: lea %s0, 31(, %s0)
|
||||
; CHECK-NEXT: and %s0, -32, %s0
|
||||
; CHECK-NEXT: lea %s1, bar@lo
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, bar@hi(, %s1)
|
||||
; CHECK-NEXT: or %s1, 0, %s2
|
||||
; CHECK-NEXT: bsic %s10, (, %s12)
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
%dyna = alloca i8, i64 %n, align 32
|
||||
call void @bar(i8* %dyna, i64 %n)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue