forked from OSchip/llvm-project
[XCore] Support functions returning more than 4 words.
Summary: If a function returns a large struct by value return the first 4 words in registers and the rest on the stack in a location reserved by the caller. This is needed to support the xC language which supports functions returning an arbitrary number of return values. Reviewers: robertlytton Reviewed By: robertlytton CC: llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D2889 llvm-svn: 202397
This commit is contained in:
parent
ed7e2ad090
commit
e82bf0988e
|
@ -14,7 +14,11 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
def RetCC_XCore : CallingConv<[
|
||||
// i32 are returned in registers R0, R1, R2, R3
|
||||
CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>
|
||||
CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
|
||||
|
||||
// Integer values get stored in stack slots that are 4 bytes in
|
||||
// size and 4-byte aligned.
|
||||
CCIfType<[i32], CCAssignToStack<4, 4>>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -50,6 +50,7 @@ getTargetNodeName(unsigned Opcode) const
|
|||
case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
|
||||
case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
|
||||
case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
|
||||
case XCoreISD::LDWSP : return "XCoreISD::LDWSP";
|
||||
case XCoreISD::STWSP : return "XCoreISD::STWSP";
|
||||
case XCoreISD::RETSP : return "XCoreISD::RETSP";
|
||||
case XCoreISD::LADD : return "XCoreISD::LADD";
|
||||
|
@ -1085,13 +1086,41 @@ LowerCallResult(SDValue Chain, SDValue InFlag,
|
|||
const SmallVectorImpl<CCValAssign> &RVLocs,
|
||||
SDLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) {
|
||||
// Copy all of the result registers out of their specified physreg.
|
||||
for (unsigned i = 0; i != RVLocs.size(); ++i) {
|
||||
Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
|
||||
RVLocs[i].getValVT(), InFlag).getValue(1);
|
||||
SmallVector<std::pair<int, unsigned>, 4> ResultMemLocs;
|
||||
// Copy results out of physical registers.
|
||||
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
|
||||
const CCValAssign &VA = RVLocs[i];
|
||||
if (VA.isRegLoc()) {
|
||||
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getValVT(),
|
||||
InFlag).getValue(1);
|
||||
InFlag = Chain.getValue(2);
|
||||
InVals.push_back(Chain.getValue(0));
|
||||
} else {
|
||||
assert(VA.isMemLoc());
|
||||
ResultMemLocs.push_back(std::make_pair(VA.getLocMemOffset(),
|
||||
InVals.size()));
|
||||
// Reserve space for this result.
|
||||
InVals.push_back(SDValue());
|
||||
}
|
||||
}
|
||||
|
||||
// Copy results out of memory.
|
||||
SmallVector<SDValue, 4> MemOpChains;
|
||||
for (unsigned i = 0, e = ResultMemLocs.size(); i != e; ++i) {
|
||||
int offset = ResultMemLocs[i].first;
|
||||
unsigned index = ResultMemLocs[i].second;
|
||||
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
|
||||
SDValue Ops[] = { Chain, DAG.getConstant(offset / 4, MVT::i32) };
|
||||
SDValue load = DAG.getNode(XCoreISD::LDWSP, dl, VTs, Ops, 2);
|
||||
InVals[index] = load;
|
||||
MemOpChains.push_back(load.getValue(1));
|
||||
}
|
||||
|
||||
// Transform all loads nodes into one single node because
|
||||
// all load nodes are independent of each other.
|
||||
if (!MemOpChains.empty())
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
&MemOpChains[0], MemOpChains.size());
|
||||
|
||||
return Chain;
|
||||
}
|
||||
|
@ -1121,8 +1150,15 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
|
|||
|
||||
CCInfo.AnalyzeCallOperands(Outs, CC_XCore);
|
||||
|
||||
SmallVector<CCValAssign, 16> RVLocs;
|
||||
// Analyze return values to determine the number of bytes of stack required.
|
||||
CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
||||
getTargetMachine(), RVLocs, *DAG.getContext());
|
||||
RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), 4);
|
||||
RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
|
||||
|
||||
// Get a count of how many bytes are to be pushed on the stack.
|
||||
unsigned NumBytes = CCInfo.getNextStackOffset();
|
||||
unsigned NumBytes = RetCCInfo.getNextStackOffset();
|
||||
|
||||
Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
|
||||
getPointerTy(), true), dl);
|
||||
|
@ -1218,12 +1254,6 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
|
|||
InFlag, dl);
|
||||
InFlag = Chain.getValue(1);
|
||||
|
||||
// Assign locations to each value returned by this call.
|
||||
SmallVector<CCValAssign, 16> RVLocs;
|
||||
CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
||||
getTargetMachine(), RVLocs, *DAG.getContext());
|
||||
RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
|
||||
|
||||
// Handle result values, copying them out of physregs into vregs that we
|
||||
// return.
|
||||
return LowerCallResult(Chain, InFlag, RVLocs, dl, DAG, InVals);
|
||||
|
@ -1274,6 +1304,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
|
|||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
MachineRegisterInfo &RegInfo = MF.getRegInfo();
|
||||
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
|
||||
|
||||
// Assign locations to all of the incoming arguments.
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
|
@ -1286,6 +1317,9 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
|
|||
|
||||
unsigned LRSaveSize = StackSlotSize;
|
||||
|
||||
if (!isVarArg)
|
||||
XFI->setReturnStackOffset(CCInfo.getNextStackOffset() + LRSaveSize);
|
||||
|
||||
// All getCopyFromReg ops must precede any getMemcpys to prevent the
|
||||
// scheduler clobbering a register before it has been copied.
|
||||
// The stages are:
|
||||
|
@ -1436,7 +1470,11 @@ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
|
|||
LLVMContext &Context) const {
|
||||
SmallVector<CCValAssign, 16> RVLocs;
|
||||
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
|
||||
return CCInfo.CheckReturn(Outs, RetCC_XCore);
|
||||
if (!CCInfo.CheckReturn(Outs, RetCC_XCore))
|
||||
return false;
|
||||
if (CCInfo.getNextStackOffset() != 0 && isVarArg)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
SDValue
|
||||
|
@ -1446,6 +1484,10 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
|
|||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
SDLoc dl, SelectionDAG &DAG) const {
|
||||
|
||||
XCoreFunctionInfo *XFI =
|
||||
DAG.getMachineFunction().getInfo<XCoreFunctionInfo>();
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
|
||||
// CCValAssign - represent the assignment of
|
||||
// the return value to a location
|
||||
SmallVector<CCValAssign, 16> RVLocs;
|
||||
|
@ -1455,6 +1497,9 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
|
|||
getTargetMachine(), RVLocs, *DAG.getContext());
|
||||
|
||||
// Analyze return values.
|
||||
if (!isVarArg)
|
||||
CCInfo.AllocateStack(XFI->getReturnStackOffset(), 4);
|
||||
|
||||
CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
|
||||
|
||||
SDValue Flag;
|
||||
|
@ -1463,13 +1508,43 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
|
|||
// Return on XCore is always a "retsp 0"
|
||||
RetOps.push_back(DAG.getConstant(0, MVT::i32));
|
||||
|
||||
// Copy the result values into the output registers.
|
||||
for (unsigned i = 0; i != RVLocs.size(); ++i) {
|
||||
SmallVector<SDValue, 4> MemOpChains;
|
||||
// Handle return values that must be copied to memory.
|
||||
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = RVLocs[i];
|
||||
assert(VA.isRegLoc() && "Can only return in registers!");
|
||||
if (VA.isRegLoc())
|
||||
continue;
|
||||
assert(VA.isMemLoc());
|
||||
if (isVarArg) {
|
||||
report_fatal_error("Can't return value from vararg function in memory");
|
||||
}
|
||||
|
||||
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
|
||||
OutVals[i], Flag);
|
||||
int Offset = VA.getLocMemOffset();
|
||||
unsigned ObjSize = VA.getLocVT().getSizeInBits() / 8;
|
||||
// Create the frame index object for the memory location.
|
||||
int FI = MFI->CreateFixedObject(ObjSize, Offset, false);
|
||||
|
||||
// Create a SelectionDAG node corresponding to a store
|
||||
// to this memory location.
|
||||
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
|
||||
MemOpChains.push_back(DAG.getStore(Chain, dl, OutVals[i], FIN,
|
||||
MachinePointerInfo::getFixedStack(FI), false, false,
|
||||
0));
|
||||
}
|
||||
|
||||
// Transform all store nodes into one single node because
|
||||
// all stores are independent of each other.
|
||||
if (!MemOpChains.empty())
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
&MemOpChains[0], MemOpChains.size());
|
||||
|
||||
// Now handle return values copied to registers.
|
||||
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = RVLocs[i];
|
||||
if (!VA.isRegLoc())
|
||||
continue;
|
||||
// Copy the result values into the output registers.
|
||||
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
|
||||
|
||||
// guarantee that all emitted copies are
|
||||
// stuck together, avoiding something bad
|
||||
|
|
|
@ -42,6 +42,9 @@ namespace llvm {
|
|||
// cp relative address
|
||||
CPRelativeWrapper,
|
||||
|
||||
// Load word from stack
|
||||
LDWSP,
|
||||
|
||||
// Store word to stack
|
||||
STWSP,
|
||||
|
||||
|
|
|
@ -68,6 +68,10 @@ def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
|
|||
def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
|
||||
[SDNPHasChain, SDNPMayStore]>;
|
||||
|
||||
def SDT_XCoreLdwsp : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
|
||||
def XCoreLdwsp : SDNode<"XCoreISD::LDWSP", SDT_XCoreLdwsp,
|
||||
[SDNPHasChain, SDNPMayLoad]>;
|
||||
|
||||
// These are target-independent nodes, but have target-specific formats.
|
||||
def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
|
||||
def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
|
||||
|
@ -581,10 +585,12 @@ def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b),
|
|||
|
||||
let mayLoad=1 in {
|
||||
def LDWSP_ru6 : _FRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
|
||||
"ldw $a, sp[$b]", []>;
|
||||
"ldw $a, sp[$b]",
|
||||
[(set RRegs:$a, (XCoreLdwsp immU6:$b))]>;
|
||||
|
||||
def LDWSP_lru6 : _FLRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
|
||||
"ldw $a, sp[$b]", []>;
|
||||
"ldw $a, sp[$b]",
|
||||
[(set RRegs:$a, (XCoreLdwsp immU16:$b))]>;
|
||||
}
|
||||
|
||||
let neverHasSideEffects = 1 in {
|
||||
|
|
|
@ -33,6 +33,8 @@ class XCoreFunctionInfo : public MachineFunctionInfo {
|
|||
int FPSpillSlot;
|
||||
bool EHSpillSlotSet;
|
||||
int EHSpillSlot[2];
|
||||
unsigned ReturnStackOffset;
|
||||
bool ReturnStackOffsetSet;
|
||||
int VarArgsFrameIndex;
|
||||
mutable int CachedEStackSize;
|
||||
std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > SpillLabels;
|
||||
|
@ -42,6 +44,7 @@ public:
|
|||
LRSpillSlotSet(false),
|
||||
FPSpillSlotSet(false),
|
||||
EHSpillSlotSet(false),
|
||||
ReturnStackOffsetSet(false),
|
||||
VarArgsFrameIndex(0),
|
||||
CachedEStackSize(-1) {}
|
||||
|
||||
|
@ -78,6 +81,17 @@ public:
|
|||
return EHSpillSlot;
|
||||
}
|
||||
|
||||
void setReturnStackOffset(unsigned value) {
|
||||
assert(!ReturnStackOffsetSet && "Return stack offset set twice");
|
||||
ReturnStackOffset = value;
|
||||
ReturnStackOffsetSet = true;
|
||||
}
|
||||
|
||||
unsigned getReturnStackOffset() const {
|
||||
assert(ReturnStackOffsetSet && "Return stack offset not set");
|
||||
return ReturnStackOffset;
|
||||
}
|
||||
|
||||
bool isLargeFrame(const MachineFunction &MF) const;
|
||||
|
||||
std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > &getSpillLabels() {
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
%0 = type { i32, i32, i32, i32 }
|
||||
%1 = type { i32, i32, i32, i32, i32 }
|
||||
|
||||
; Structs of 4 words can be returned in registers
|
||||
define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
|
||||
; Structs of 4 words are returned in registers
|
||||
define internal %0 @ReturnBigStruct() nounwind readnone {
|
||||
entry:
|
||||
%0 = insertvalue %0 zeroinitializer, i32 12, 0
|
||||
%1 = insertvalue %0 %0, i32 24, 1
|
||||
|
@ -19,8 +19,39 @@ entry:
|
|||
; CHECK: ldc r3, 24601
|
||||
; CHECK: retsp 0
|
||||
|
||||
; Structs bigger than 4 words are returned via a hidden hidden sret-parameter
|
||||
define internal fastcc %1 @ReturnBigStruct2() nounwind readnone {
|
||||
; Structs of more than 4 words are partially returned in memory so long as the
|
||||
; function is not variadic.
|
||||
define { i32, i32, i32, i32, i32} @f(i32, i32, i32, i32, i32) nounwind readnone {
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: ldc [[REGISTER:r[0-9]+]], 5
|
||||
; CHECK-NEXT: stw [[REGISTER]], sp[2]
|
||||
; CHECK-NEXT: retsp 0
|
||||
body:
|
||||
ret { i32, i32, i32, i32, i32} { i32 undef, i32 undef, i32 undef, i32 undef, i32 5}
|
||||
}
|
||||
|
||||
@x = external global i32
|
||||
@y = external global i32
|
||||
|
||||
; Check we call a function returning more than 4 words correctly.
|
||||
define i32 @g() nounwind {
|
||||
; CHECK-LABEL: g:
|
||||
; CHECK: entsp 3
|
||||
; CHECK: ldc [[REGISTER:r[0-9]+]], 0
|
||||
; CHECK: stw [[REGISTER]], sp[1]
|
||||
; CHECK: bl f
|
||||
; CHECK-NEXT: ldw r0, sp[2]
|
||||
; CHECK-NEXT: retsp 3
|
||||
;
|
||||
body:
|
||||
%0 = call { i32, i32, i32, i32, i32 } @f(i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%1 = extractvalue { i32, i32, i32, i32, i32 } %0, 4
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
; Variadic functions return structs bigger than 4 words via a hidden
|
||||
; sret-parameter
|
||||
define internal %1 @ReturnBigStruct2(i32 %dummy, ...) nounwind readnone {
|
||||
entry:
|
||||
%0 = insertvalue %1 zeroinitializer, i32 12, 0
|
||||
%1 = insertvalue %1 %0, i32 24, 1
|
||||
|
|
Loading…
Reference in New Issue