forked from OSchip/llvm-project
R600/SI: add proper formal parameter handling for SI
Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623
This commit is contained in:
parent
3625055b8c
commit
2c8f6d5376
|
@ -38,3 +38,4 @@ include "AMDGPUInstrInfo.td"
|
|||
include "AMDGPUIntrinsics.td"
|
||||
include "AMDGPURegisterInfo.td"
|
||||
include "AMDGPUInstructions.td"
|
||||
include "AMDGPUCallingConv.td"
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This describes the calling conventions for the AMD Radeon GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Inversion of CCIfInReg
|
||||
class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
|
||||
|
||||
// Calling convention for SI
|
||||
def CC_SI : CallingConv<[
|
||||
|
||||
CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
|
||||
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
|
||||
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
|
||||
]>>>,
|
||||
|
||||
CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
|
||||
[ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
|
||||
[ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR12, SGPR15 ]
|
||||
>>>,
|
||||
|
||||
CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
|
||||
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
|
||||
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
|
||||
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
|
||||
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
|
||||
]>>>
|
||||
|
||||
]>;
|
||||
|
||||
def CC_AMDGPU : CallingConv<[
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"#
|
||||
"->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>>
|
||||
]>;
|
|
@ -14,7 +14,10 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUISelLowering.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
|
@ -22,6 +25,8 @@
|
|||
|
||||
using namespace llvm;
|
||||
|
||||
#include "AMDGPUGenCallingConv.inc"
|
||||
|
||||
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
TargetLowering(TM, new TargetLoweringObjectFileELF()) {
|
||||
|
||||
|
@ -64,17 +69,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
|||
// TargetLowering Callbacks
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerFormalArguments(
|
||||
SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc DL, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
||||
InVals.push_back(SDValue());
|
||||
}
|
||||
return Chain;
|
||||
void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins) const {
|
||||
|
||||
State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerReturn(
|
||||
|
|
|
@ -39,15 +39,12 @@ protected:
|
|||
bool isHWTrueValue(SDValue Op) const;
|
||||
bool isHWFalseValue(SDValue Op) const;
|
||||
|
||||
void AnalyzeFormalArguments(CCState &State,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins) const;
|
||||
|
||||
public:
|
||||
AMDGPUTargetLowering(TargetMachine &TM);
|
||||
|
||||
virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc DL, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
|
|
|
@ -32,11 +32,6 @@
|
|||
#include "llvm/Target/TargetOptions.h"
|
||||
|
||||
using namespace llvm;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Calling Convention Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "AMDGPUGenCallingConv.inc"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TargetLowering Implementation Help Functions End
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#include "SIInstrInfo.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
|
@ -74,6 +76,105 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||
setTargetDAGCombine(ISD::SETCC);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerFormalArguments(
|
||||
SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc DL, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
|
||||
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
FunctionType *FType = MF.getFunction()->getFunctionType();
|
||||
|
||||
assert(CallConv == CallingConv::C);
|
||||
|
||||
SmallVector<ISD::InputArg, 16> Splits;
|
||||
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
|
||||
const ISD::InputArg &Arg = Ins[i];
|
||||
|
||||
// Split vertices into their elements
|
||||
if (Arg.VT.isVector()) {
|
||||
ISD::InputArg NewArg = Arg;
|
||||
NewArg.Flags.setSplit();
|
||||
NewArg.VT = Arg.VT.getVectorElementType();
|
||||
|
||||
// We REALLY want the ORIGINAL number of vertex elements here, e.g. a
|
||||
// three or five element vertex only needs three or five registers,
|
||||
// NOT four or eigth.
|
||||
Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
|
||||
unsigned NumElements = ParamType->getVectorNumElements();
|
||||
|
||||
for (unsigned j = 0; j != NumElements; ++j) {
|
||||
Splits.push_back(NewArg);
|
||||
NewArg.PartOffset += NewArg.VT.getStoreSize();
|
||||
}
|
||||
|
||||
} else {
|
||||
Splits.push_back(Arg);
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
||||
getTargetMachine(), ArgLocs, *DAG.getContext());
|
||||
|
||||
AnalyzeFormalArguments(CCInfo, Splits);
|
||||
|
||||
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
|
||||
|
||||
CCValAssign &VA = ArgLocs[ArgIdx++];
|
||||
assert(VA.isRegLoc() && "Parameter must be in a register!");
|
||||
|
||||
unsigned Reg = VA.getLocReg();
|
||||
MVT VT = VA.getLocVT();
|
||||
|
||||
if (VT == MVT::i64) {
|
||||
// For now assume it is a pointer
|
||||
Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
|
||||
&AMDGPU::SReg_64RegClass);
|
||||
Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
|
||||
InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
|
||||
continue;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
|
||||
|
||||
Reg = MF.addLiveIn(Reg, RC);
|
||||
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
|
||||
|
||||
const ISD::InputArg &Arg = Ins[i];
|
||||
if (Arg.VT.isVector()) {
|
||||
|
||||
// Build a vector from the registers
|
||||
Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
|
||||
unsigned NumElements = ParamType->getVectorNumElements();
|
||||
|
||||
SmallVector<SDValue, 4> Regs;
|
||||
Regs.push_back(Val);
|
||||
for (unsigned j = 1; j != NumElements; ++j) {
|
||||
Reg = ArgLocs[ArgIdx++].getLocReg();
|
||||
Reg = MF.addLiveIn(Reg, RC);
|
||||
Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
|
||||
}
|
||||
|
||||
// Fill up the missing vector elements
|
||||
NumElements = Arg.VT.getVectorNumElements() - NumElements;
|
||||
for (unsigned j = 0; j != NumElements; ++j)
|
||||
Regs.push_back(DAG.getUNDEF(VT));
|
||||
|
||||
InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
|
||||
Regs.data(), Regs.size()));
|
||||
continue;
|
||||
}
|
||||
|
||||
InVals.push_back(Val);
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
MachineInstr * MI, MachineBasicBlock * BB) const {
|
||||
MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
|
||||
|
|
|
@ -43,6 +43,13 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
|||
|
||||
public:
|
||||
SITargetLowering(TargetMachine &tm);
|
||||
|
||||
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc DL, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
|
||||
MachineBasicBlock * BB) const;
|
||||
virtual EVT getSetCCResultType(EVT VT) const;
|
||||
|
|
Loading…
Reference in New Issue