[XCore] Support functions returning more than 4 words.

Summary:
If a function returns a large struct by value return the first 4 words
in registers and the rest on the stack in a location reserved by the
caller. This is needed to support the xC language which supports
functions returning an arbitrary number of return values.

Reviewers: robertlytton

Reviewed By: robertlytton

CC: llvm-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D2889

llvm-svn: 202397
This commit is contained in:
Richard Osborne 2014-02-27 14:00:40 +00:00
parent ed7e2ad090
commit e82bf0988e
6 changed files with 159 additions and 26 deletions

View File

@ -14,7 +14,11 @@
//===----------------------------------------------------------------------===//
def RetCC_XCore : CallingConv<[
// i32 are returned in registers R0, R1, R2, R3
CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>
CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
// Integer values get stored in stack slots that are 4 bytes in
// size and 4-byte aligned.
CCIfType<[i32], CCAssignToStack<4, 4>>
]>;
//===----------------------------------------------------------------------===//

View File

@ -50,6 +50,7 @@ getTargetNodeName(unsigned Opcode) const
case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
case XCoreISD::LDWSP : return "XCoreISD::LDWSP";
case XCoreISD::STWSP : return "XCoreISD::STWSP";
case XCoreISD::RETSP : return "XCoreISD::RETSP";
case XCoreISD::LADD : return "XCoreISD::LADD";
@ -1085,13 +1086,41 @@ LowerCallResult(SDValue Chain, SDValue InFlag,
const SmallVectorImpl<CCValAssign> &RVLocs,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
RVLocs[i].getValVT(), InFlag).getValue(1);
SmallVector<std::pair<int, unsigned>, 4> ResultMemLocs;
// Copy results out of physical registers.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
const CCValAssign &VA = RVLocs[i];
if (VA.isRegLoc()) {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getValVT(),
InFlag).getValue(1);
InFlag = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
} else {
assert(VA.isMemLoc());
ResultMemLocs.push_back(std::make_pair(VA.getLocMemOffset(),
InVals.size()));
// Reserve space for this result.
InVals.push_back(SDValue());
}
}
// Copy results out of memory.
SmallVector<SDValue, 4> MemOpChains;
for (unsigned i = 0, e = ResultMemLocs.size(); i != e; ++i) {
int offset = ResultMemLocs[i].first;
unsigned index = ResultMemLocs[i].second;
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
SDValue Ops[] = { Chain, DAG.getConstant(offset / 4, MVT::i32) };
SDValue load = DAG.getNode(XCoreISD::LDWSP, dl, VTs, Ops, 2);
InVals[index] = load;
MemOpChains.push_back(load.getValue(1));
}
// Transform all loads nodes into one single node because
// all load nodes are independent of each other.
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
return Chain;
}
@ -1121,8 +1150,15 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
CCInfo.AnalyzeCallOperands(Outs, CC_XCore);
SmallVector<CCValAssign, 16> RVLocs;
// Analyze return values to determine the number of bytes of stack required.
CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), 4);
RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
unsigned NumBytes = RetCCInfo.getNextStackOffset();
Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
getPointerTy(), true), dl);
@ -1218,12 +1254,6 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
InFlag, dl);
InFlag = Chain.getValue(1);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
// Handle result values, copying them out of physregs into vregs that we
// return.
return LowerCallResult(Chain, InFlag, RVLocs, dl, DAG, InVals);
@ -1274,6 +1304,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@ -1286,6 +1317,9 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
unsigned LRSaveSize = StackSlotSize;
if (!isVarArg)
XFI->setReturnStackOffset(CCInfo.getNextStackOffset() + LRSaveSize);
// All getCopyFromReg ops must precede any getMemcpys to prevent the
// scheduler clobbering a register before it has been copied.
// The stages are:
@ -1436,7 +1470,11 @@ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_XCore);
if (!CCInfo.CheckReturn(Outs, RetCC_XCore))
return false;
if (CCInfo.getNextStackOffset() != 0 && isVarArg)
return false;
return true;
}
SDValue
@ -1446,6 +1484,10 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const {
XCoreFunctionInfo *XFI =
DAG.getMachineFunction().getInfo<XCoreFunctionInfo>();
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
// CCValAssign - represent the assignment of
// the return value to a location
SmallVector<CCValAssign, 16> RVLocs;
@ -1455,6 +1497,9 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
getTargetMachine(), RVLocs, *DAG.getContext());
// Analyze return values.
if (!isVarArg)
CCInfo.AllocateStack(XFI->getReturnStackOffset(), 4);
CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
SDValue Flag;
@ -1463,13 +1508,43 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
// Return on XCore is always a "retsp 0"
RetOps.push_back(DAG.getConstant(0, MVT::i32));
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
SmallVector<SDValue, 4> MemOpChains;
// Handle return values that must be copied to memory.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
if (VA.isRegLoc())
continue;
assert(VA.isMemLoc());
if (isVarArg) {
report_fatal_error("Can't return value from vararg function in memory");
}
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
OutVals[i], Flag);
int Offset = VA.getLocMemOffset();
unsigned ObjSize = VA.getLocVT().getSizeInBits() / 8;
// Create the frame index object for the memory location.
int FI = MFI->CreateFixedObject(ObjSize, Offset, false);
// Create a SelectionDAG node corresponding to a store
// to this memory location.
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
MemOpChains.push_back(DAG.getStore(Chain, dl, OutVals[i], FIN,
MachinePointerInfo::getFixedStack(FI), false, false,
0));
}
// Transform all store nodes into one single node because
// all stores are independent of each other.
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Now handle return values copied to registers.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
if (!VA.isRegLoc())
continue;
// Copy the result values into the output registers.
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
// guarantee that all emitted copies are
// stuck together, avoiding something bad

View File

@ -42,6 +42,9 @@ namespace llvm {
// cp relative address
CPRelativeWrapper,
// Load word from stack
LDWSP,
// Store word to stack
STWSP,

View File

@ -68,6 +68,10 @@ def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
[SDNPHasChain, SDNPMayStore]>;
def SDT_XCoreLdwsp : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
def XCoreLdwsp : SDNode<"XCoreISD::LDWSP", SDT_XCoreLdwsp,
[SDNPHasChain, SDNPMayLoad]>;
// These are target-independent nodes, but have target-specific formats.
def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
@ -581,10 +585,12 @@ def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b),
let mayLoad=1 in {
def LDWSP_ru6 : _FRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
"ldw $a, sp[$b]", []>;
"ldw $a, sp[$b]",
[(set RRegs:$a, (XCoreLdwsp immU6:$b))]>;
def LDWSP_lru6 : _FLRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
"ldw $a, sp[$b]", []>;
"ldw $a, sp[$b]",
[(set RRegs:$a, (XCoreLdwsp immU16:$b))]>;
}
let neverHasSideEffects = 1 in {

View File

@ -33,6 +33,8 @@ class XCoreFunctionInfo : public MachineFunctionInfo {
int FPSpillSlot;
bool EHSpillSlotSet;
int EHSpillSlot[2];
unsigned ReturnStackOffset;
bool ReturnStackOffsetSet;
int VarArgsFrameIndex;
mutable int CachedEStackSize;
std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > SpillLabels;
@ -42,6 +44,7 @@ public:
LRSpillSlotSet(false),
FPSpillSlotSet(false),
EHSpillSlotSet(false),
ReturnStackOffsetSet(false),
VarArgsFrameIndex(0),
CachedEStackSize(-1) {}
@ -78,6 +81,17 @@ public:
return EHSpillSlot;
}
void setReturnStackOffset(unsigned value) {
assert(!ReturnStackOffsetSet && "Return stack offset set twice");
ReturnStackOffset = value;
ReturnStackOffsetSet = true;
}
unsigned getReturnStackOffset() const {
assert(ReturnStackOffsetSet && "Return stack offset not set");
return ReturnStackOffset;
}
bool isLargeFrame(const MachineFunction &MF) const;
std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > &getSpillLabels() {

View File

@ -3,8 +3,8 @@
%0 = type { i32, i32, i32, i32 }
%1 = type { i32, i32, i32, i32, i32 }
; Structs of 4 words can be returned in registers
define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
; Structs of 4 words are returned in registers
define internal %0 @ReturnBigStruct() nounwind readnone {
entry:
%0 = insertvalue %0 zeroinitializer, i32 12, 0
%1 = insertvalue %0 %0, i32 24, 1
@ -19,8 +19,39 @@ entry:
; CHECK: ldc r3, 24601
; CHECK: retsp 0
; Structs bigger than 4 words are returned via a hidden hidden sret-parameter
define internal fastcc %1 @ReturnBigStruct2() nounwind readnone {
; Structs of more than 4 words are partially returned in memory so long as the
; function is not variadic.
define { i32, i32, i32, i32, i32} @f(i32, i32, i32, i32, i32) nounwind readnone {
; CHECK-LABEL: f:
; CHECK: ldc [[REGISTER:r[0-9]+]], 5
; CHECK-NEXT: stw [[REGISTER]], sp[2]
; CHECK-NEXT: retsp 0
body:
ret { i32, i32, i32, i32, i32} { i32 undef, i32 undef, i32 undef, i32 undef, i32 5}
}
@x = external global i32
@y = external global i32
; Check we call a function returning more than 4 words correctly.
define i32 @g() nounwind {
; CHECK-LABEL: g:
; CHECK: entsp 3
; CHECK: ldc [[REGISTER:r[0-9]+]], 0
; CHECK: stw [[REGISTER]], sp[1]
; CHECK: bl f
; CHECK-NEXT: ldw r0, sp[2]
; CHECK-NEXT: retsp 3
;
body:
%0 = call { i32, i32, i32, i32, i32 } @f(i32 0, i32 0, i32 0, i32 0, i32 0)
%1 = extractvalue { i32, i32, i32, i32, i32 } %0, 4
ret i32 %1
}
; Variadic functions return structs bigger than 4 words via a hidden
; sret-parameter
define internal %1 @ReturnBigStruct2(i32 %dummy, ...) nounwind readnone {
entry:
%0 = insertvalue %1 zeroinitializer, i32 12, 0
%1 = insertvalue %1 %0, i32 24, 1