forked from OSchip/llvm-project
[AMDGPU] Add the adjusted FP as a livein register.
Reviewers: arsenm, rampitec Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64145 llvm-svn: 366223
This commit is contained in:
parent
450c62e33e
commit
b3f967d411
|
@ -1067,15 +1067,15 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
|
|||
|
||||
auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
|
||||
const TargetRegisterClass &RC,
|
||||
ArgDescriptor &Arg) {
|
||||
ArgDescriptor &Arg, unsigned UserSGPRs,
|
||||
unsigned SystemSGPRs) {
|
||||
// Skip parsing if it's not present.
|
||||
if (!A)
|
||||
return false;
|
||||
|
||||
if (A->IsRegister) {
|
||||
unsigned Reg;
|
||||
if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value,
|
||||
Error)) {
|
||||
if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
|
||||
SourceRange = A->RegisterName.SourceRange;
|
||||
return true;
|
||||
}
|
||||
|
@ -1088,60 +1088,62 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
|
|||
if (A->Mask)
|
||||
Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
|
||||
|
||||
MFI->NumUserSGPRs += UserSGPRs;
|
||||
MFI->NumSystemSGPRs += SystemSGPRs;
|
||||
return false;
|
||||
};
|
||||
|
||||
if (YamlMFI.ArgInfo &&
|
||||
(parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
|
||||
AMDGPU::SReg_128RegClass,
|
||||
MFI->ArgInfo.PrivateSegmentBuffer) ||
|
||||
MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
|
||||
AMDGPU::SReg_64RegClass,
|
||||
MFI->ArgInfo.DispatchPtr) ||
|
||||
AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
|
||||
2, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
|
||||
MFI->ArgInfo.QueuePtr) ||
|
||||
MFI->ArgInfo.QueuePtr, 2, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
|
||||
AMDGPU::SReg_64RegClass,
|
||||
MFI->ArgInfo.KernargSegmentPtr) ||
|
||||
MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
|
||||
AMDGPU::SReg_64RegClass,
|
||||
MFI->ArgInfo.DispatchID) ||
|
||||
AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
|
||||
2, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
|
||||
AMDGPU::SReg_64RegClass,
|
||||
MFI->ArgInfo.FlatScratchInit) ||
|
||||
MFI->ArgInfo.FlatScratchInit, 2, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
|
||||
AMDGPU::SGPR_32RegClass,
|
||||
MFI->ArgInfo.PrivateSegmentSize) ||
|
||||
MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
|
||||
AMDGPU::SGPR_32RegClass,
|
||||
MFI->ArgInfo.WorkGroupIDX) ||
|
||||
AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
|
||||
0, 1) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
|
||||
AMDGPU::SGPR_32RegClass,
|
||||
MFI->ArgInfo.WorkGroupIDY) ||
|
||||
AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
|
||||
0, 1) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
|
||||
AMDGPU::SGPR_32RegClass,
|
||||
MFI->ArgInfo.WorkGroupIDZ) ||
|
||||
AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
|
||||
0, 1) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
|
||||
AMDGPU::SGPR_32RegClass,
|
||||
MFI->ArgInfo.WorkGroupInfo) ||
|
||||
MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
|
||||
AMDGPU::SGPR_32RegClass,
|
||||
MFI->ArgInfo.PrivateSegmentWaveByteOffset) ||
|
||||
MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
|
||||
AMDGPU::SReg_64RegClass,
|
||||
MFI->ArgInfo.ImplicitArgPtr) ||
|
||||
MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
|
||||
AMDGPU::SReg_64RegClass,
|
||||
MFI->ArgInfo.ImplicitBufferPtr) ||
|
||||
MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
|
||||
AMDGPU::VGPR_32RegClass,
|
||||
MFI->ArgInfo.WorkItemIDX) ||
|
||||
MFI->ArgInfo.WorkItemIDX, 0, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
|
||||
AMDGPU::VGPR_32RegClass,
|
||||
MFI->ArgInfo.WorkItemIDY) ||
|
||||
MFI->ArgInfo.WorkItemIDY, 0, 0) ||
|
||||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
|
||||
AMDGPU::VGPR_32RegClass,
|
||||
MFI->ArgInfo.WorkItemIDZ)))
|
||||
MFI->ArgInfo.WorkItemIDZ, 0, 0)))
|
||||
return true;
|
||||
|
||||
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
|
||||
|
|
|
@ -311,7 +311,8 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
|
|||
}
|
||||
|
||||
// Shift down registers reserved for the scratch wave offset.
|
||||
unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
|
||||
std::pair<unsigned, bool>
|
||||
SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
|
||||
const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
|
||||
SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
@ -322,17 +323,17 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
|
|||
// No replacement necessary.
|
||||
if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
|
||||
(!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
|
||||
return AMDGPU::NoRegister;
|
||||
return std::make_pair(AMDGPU::NoRegister, false);
|
||||
}
|
||||
|
||||
if (ST.hasSGPRInitBug())
|
||||
return ScratchWaveOffsetReg;
|
||||
return std::make_pair(ScratchWaveOffsetReg, false);
|
||||
|
||||
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
|
||||
|
||||
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
|
||||
if (NumPreloaded > AllSGPRs.size())
|
||||
return ScratchWaveOffsetReg;
|
||||
return std::make_pair(ScratchWaveOffsetReg, false);
|
||||
|
||||
AllSGPRs = AllSGPRs.slice(NumPreloaded);
|
||||
|
||||
|
@ -353,10 +354,11 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
|
|||
unsigned ReservedRegCount = 13;
|
||||
|
||||
if (AllSGPRs.size() < ReservedRegCount)
|
||||
return ScratchWaveOffsetReg;
|
||||
return std::make_pair(ScratchWaveOffsetReg, false);
|
||||
|
||||
bool HandledScratchWaveOffsetReg =
|
||||
ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
|
||||
bool FPAdjusted = false;
|
||||
|
||||
for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
|
||||
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
|
||||
|
@ -374,12 +376,13 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
|
|||
MFI->setScratchWaveOffsetReg(Reg);
|
||||
MFI->setFrameOffsetReg(Reg);
|
||||
ScratchWaveOffsetReg = Reg;
|
||||
FPAdjusted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ScratchWaveOffsetReg;
|
||||
return std::make_pair(ScratchWaveOffsetReg, FPAdjusted);
|
||||
}
|
||||
|
||||
void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
||||
|
@ -415,7 +418,9 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
|||
unsigned ScratchRsrcReg
|
||||
= getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
|
||||
|
||||
unsigned ScratchWaveOffsetReg =
|
||||
unsigned ScratchWaveOffsetReg;
|
||||
bool FPAdjusted;
|
||||
std::tie(ScratchWaveOffsetReg, FPAdjusted) =
|
||||
getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
|
||||
|
||||
// We need to insert initialization of the scratch resource descriptor.
|
||||
|
@ -453,7 +458,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
|||
if (&OtherBB == &MBB)
|
||||
continue;
|
||||
|
||||
if (OffsetRegUsed)
|
||||
if (OffsetRegUsed || FPAdjusted)
|
||||
OtherBB.addLiveIn(ScratchWaveOffsetReg);
|
||||
|
||||
if (ResourceRegUsed)
|
||||
|
|
|
@ -66,7 +66,7 @@ private:
|
|||
SIMachineFunctionInfo *MFI,
|
||||
MachineFunction &MF) const;
|
||||
|
||||
unsigned getReservedPrivateSegmentWaveByteOffsetReg(
|
||||
std::pair<unsigned, bool> getReservedPrivateSegmentWaveByteOffsetReg(
|
||||
const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
|
||||
SIMachineFunctionInfo *MFI, MachineFunction &MF) const;
|
||||
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
|
||||
|
||||
|
||||
# CHECK-LABEL: name: foo
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET
|
||||
--- |
|
||||
|
||||
define amdgpu_kernel void @foo() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
|
||||
...
|
||||
---
|
||||
name: foo
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$vgpr0' }
|
||||
- { reg: '$sgpr4_sgpr5' }
|
||||
- { reg: '$sgpr6_sgpr7' }
|
||||
- { reg: '$sgpr8' }
|
||||
frameInfo:
|
||||
maxAlignment: 4
|
||||
stack:
|
||||
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
|
||||
machineFunctionInfo:
|
||||
explicitKernArgSize: 660
|
||||
maxKernArgAlign: 4
|
||||
isEntryFunction: true
|
||||
waveLimiter: true
|
||||
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
||||
scratchWaveOffsetReg: '$sgpr101'
|
||||
frameOffsetReg: '$sgpr101'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
argumentInfo:
|
||||
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
||||
dispatchPtr: { reg: '$sgpr4_sgpr5' }
|
||||
kernargSegmentPtr: { reg: '$sgpr6_sgpr7' }
|
||||
workGroupIDX: { reg: '$sgpr8' }
|
||||
privateSegmentWaveByteOffset: { reg: '$sgpr9' }
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
liveins: $sgpr8, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7
|
||||
|
||||
bb.1:
|
||||
liveins: $sgpr4, $sgpr5, $sgpr9, $sgpr22, $vgpr0, $sgpr6_sgpr7
|
||||
|
||||
renamable $vgpr2 = IMPLICIT_DEF
|
||||
SI_SPILL_V32_SAVE killed $vgpr2, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
|
Loading…
Reference in New Issue