forked from OSchip/llvm-project
AMDGPU: Remove MFI::ABIArgOffset
We have too many mechanisms for tracking the various offsets used for kernel arguments, so remove one. There's still a lot of confusion with these because there are two different "implicit" argument areas located at the beginning and end of the kernarg segment. Additionally, the offset was determined based on the memory size of the split element types. This would break in a future commit where v3i32 is decomposed into separate i32 pieces. llvm-svn: 335830
This commit is contained in:
parent
1fb9013368
commit
75e7192ba3
|
@ -1178,7 +1178,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
|
|||
|
||||
// FIXME: Should use getKernArgSize
|
||||
Out.kernarg_segment_byte_size =
|
||||
STM.getKernArgSegmentSize(MF.getFunction(), MFI->getABIArgOffset());
|
||||
STM.getKernArgSegmentSize(MF.getFunction(), MFI->getExplicitKernArgSize());
|
||||
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
|
||||
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
|
||||
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
|
||||
|
@ -1205,7 +1205,7 @@ AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
|
|||
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
|
||||
|
||||
HSACodeProps.mKernargSegmentSize =
|
||||
STM.getKernArgSegmentSize(MF.getFunction(), MFI.getABIArgOffset());
|
||||
STM.getKernArgSegmentSize(MF.getFunction(), MFI.getExplicitKernArgSize());
|
||||
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
|
||||
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
|
||||
HSACodeProps.mKernargSegmentAlign =
|
||||
|
|
|
@ -3986,9 +3986,13 @@ SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
|
|||
}
|
||||
|
||||
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
|
||||
const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
|
||||
unsigned Alignment = Subtarget->getAlignmentForImplicitArgPtr();
|
||||
uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), Alignment);
|
||||
const MachineFunction &MF, const ImplicitParameter Param) const {
|
||||
const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
|
||||
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
|
||||
unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction());
|
||||
unsigned Alignment = ST.getAlignmentForImplicitArgPtr();
|
||||
uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) +
|
||||
ExplicitArgOffset;
|
||||
switch (Param) {
|
||||
case GRID_DIM:
|
||||
return ArgOffset;
|
||||
|
|
|
@ -292,7 +292,7 @@ public:
|
|||
|
||||
/// Helper function that returns the byte offset of the given
|
||||
/// type of implicit parameter.
|
||||
uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
|
||||
uint32_t getImplicitParameterOffset(const MachineFunction &MF,
|
||||
const ImplicitParameter Param) const;
|
||||
|
||||
AMDGPUAS getAMDGPUAS() const {
|
||||
|
|
|
@ -17,10 +17,9 @@ using namespace llvm;
|
|||
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
|
||||
MachineFunctionInfo(),
|
||||
LocalMemoryObjects(),
|
||||
KernArgSize(0),
|
||||
ExplicitKernArgSize(0),
|
||||
MaxKernArgAlign(0),
|
||||
LDSSize(0),
|
||||
ABIArgOffset(0),
|
||||
IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
|
||||
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath),
|
||||
MemoryBound(false),
|
||||
|
|
|
@ -15,22 +15,20 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget;
|
||||
|
||||
class AMDGPUMachineFunction : public MachineFunctionInfo {
|
||||
/// A map to keep track of local memory objects and their offsets within the
|
||||
/// local memory space.
|
||||
SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
|
||||
|
||||
protected:
|
||||
uint64_t KernArgSize;
|
||||
uint64_t ExplicitKernArgSize;
|
||||
unsigned MaxKernArgAlign;
|
||||
|
||||
/// Number of bytes in the LDS that are being used.
|
||||
unsigned LDSSize;
|
||||
|
||||
// FIXME: This should probably be removed.
|
||||
/// Start of implicit kernel args
|
||||
unsigned ABIArgOffset;
|
||||
|
||||
// Kernels + shaders. i.e. functions called by the driver and not called
|
||||
// by other functions.
|
||||
bool IsEntryFunction;
|
||||
|
@ -48,31 +46,23 @@ public:
|
|||
|
||||
uint64_t allocateKernArg(uint64_t Size, unsigned Align) {
|
||||
assert(isPowerOf2_32(Align));
|
||||
KernArgSize = alignTo(KernArgSize, Align);
|
||||
ExplicitKernArgSize = alignTo(ExplicitKernArgSize, Align);
|
||||
|
||||
uint64_t Result = KernArgSize;
|
||||
KernArgSize += Size;
|
||||
uint64_t Result = ExplicitKernArgSize;
|
||||
ExplicitKernArgSize += Size;
|
||||
|
||||
MaxKernArgAlign = std::max(Align, MaxKernArgAlign);
|
||||
return Result;
|
||||
}
|
||||
|
||||
uint64_t getKernArgSize() const {
|
||||
return KernArgSize;
|
||||
uint64_t getExplicitKernArgSize() const {
|
||||
return ExplicitKernArgSize;
|
||||
}
|
||||
|
||||
unsigned getMaxKernArgAlign() const {
|
||||
return MaxKernArgAlign;
|
||||
}
|
||||
|
||||
void setABIArgOffset(unsigned NewOffset) {
|
||||
ABIArgOffset = NewOffset;
|
||||
}
|
||||
|
||||
unsigned getABIArgOffset() const {
|
||||
return ABIArgOffset;
|
||||
}
|
||||
|
||||
unsigned getLDSSize() const {
|
||||
return LDSSize;
|
||||
}
|
||||
|
|
|
@ -560,7 +560,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|||
|
||||
case Intrinsic::r600_implicitarg_ptr: {
|
||||
MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
|
||||
uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
|
||||
uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);
|
||||
return DAG.getConstant(ByteOffset, DL, PtrVT);
|
||||
}
|
||||
case Intrinsic::r600_read_ngroups_x:
|
||||
|
@ -1544,8 +1544,6 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
|||
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
|
||||
*DAG.getContext());
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
|
||||
|
||||
SmallVector<ISD::InputArg, 8> LocalIns;
|
||||
|
||||
if (AMDGPU::isShader(CallConv)) {
|
||||
|
@ -1609,7 +1607,6 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
|||
|
||||
// 4 is the preferred alignment for the CONSTANT memory space.
|
||||
InVals.push_back(Arg);
|
||||
MFI->setABIArgOffset(Offset + MemVT.getStoreSize());
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
|
|
@ -1078,8 +1078,8 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
|
|||
|
||||
SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
|
||||
const SDLoc &SL) const {
|
||||
auto MFI = DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
|
||||
uint64_t Offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
|
||||
uint64_t Offset = getImplicitParameterOffset(DAG.getMachineFunction(),
|
||||
FIRST_IMPLICIT);
|
||||
return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset);
|
||||
}
|
||||
|
||||
|
@ -1749,7 +1749,6 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||
EVT MemVT = VA.getLocVT();
|
||||
|
||||
const uint64_t Offset = ExplicitOffset + VA.getLocMemOffset();
|
||||
Info->setABIArgOffset(Offset + MemVT.getStoreSize());
|
||||
unsigned Align = MinAlign(KernelArgBaseAlign, Offset);
|
||||
|
||||
// The first 36 bytes of the input buffer contains information about
|
||||
|
|
Loading…
Reference in New Issue