[AMDGPU] Allow buildSpillLoadStore in empty bb

This allows calling buildSpillLoadStore for an empty basic block, where
MI points at the end of the block instead of to an instruction.

This only happens with downstream CFI changes, so I was not able to
create a testcase that works with upstream LLVM.

Differential Revision: https://reviews.llvm.org/D101356
This commit is contained in:
Sebastian Neubauer 2021-04-29 12:52:29 +02:00
parent 2fa14d4700
commit 9569d5ba02
3 changed files with 52 additions and 56 deletions

View File

@ -118,6 +118,7 @@ static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, Register SpillReg,
int FI) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
@ -129,7 +130,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
LiveRegs.addReg(SpillReg);
TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, true,
TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true,
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
&LiveRegs);
LiveRegs.removeReg(SpillReg);
@ -139,6 +140,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, Register SpillReg,
int FI) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
@ -149,7 +151,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, false,
TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false,
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
&LiveRegs);
}
@ -745,7 +747,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
/*IsProlog*/ true);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR, *Reg.FI);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
*Reg.FI);
}
// VGPRs used for Whole Wave Mode
@ -759,7 +762,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, VGPR, *FI);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
}
if (ScratchExecCopy) {
@ -785,7 +788,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(FramePtrReg);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
FramePtrFI);
}
@ -803,7 +806,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(BasePtrReg);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
BasePtrFI);
}
@ -996,7 +999,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
FramePtrFI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
.addReg(TmpVGPR, RegState::Kill);
@ -1022,7 +1025,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
BasePtrFI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
.addReg(TmpVGPR, RegState::Kill);
@ -1048,7 +1051,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR,
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
*Reg.FI);
}
@ -1062,7 +1065,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, VGPR, *FI);
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
}
if (ScratchExecCopy) {

View File

@ -915,13 +915,11 @@ static int getOffsetMUBUFLoad(unsigned Opc) {
}
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
int Index,
unsigned Lane,
unsigned ValueReg,
bool IsKill) {
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MI->getParent()->getParent();
int Index, unsigned Lane,
unsigned ValueReg, bool IsKill) {
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const SIInstrInfo *TII = ST.getInstrInfo();
@ -939,8 +937,8 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
: AMDGPU::V_ACCVGPR_READ_B32_e64;
auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
.addReg(Src, getKillRegState(IsKill));
auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
.addReg(Src, getKillRegState(IsKill));
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
return MIB;
}
@ -964,7 +962,7 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
return false;
const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
if (spillVGPRtoAGPR(ST, MI, Index, 0, Reg->getReg(), false).getInstr())
if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
return true;
MachineInstrBuilder NewMI =
@ -1021,20 +1019,19 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
}
void SIRegisterInfo::buildSpillLoadStore(
MachineBasicBlock::iterator MI, unsigned LoadStoreOp, int Index,
Register ValueReg, bool IsKill, MCRegister ScratchOffsetReg,
int64_t InstOffset, MachineMemOperand *MMO, RegScavenger *RS,
LivePhysRegs *LiveRegs) const {
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
RegScavenger *RS, LivePhysRegs *LiveRegs) const {
assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MI->getParent()->getParent();
MachineFunction *MF = MBB.getParent();
const SIInstrInfo *TII = ST.getInstrInfo();
const MachineFrameInfo &MFI = MF->getFrameInfo();
const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
const DebugLoc &DL = MI->getDebugLoc();
const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
bool IsStore = Desc->mayStore();
bool IsFlat = TII->isFLATScratch(LoadStoreOp);
@ -1114,10 +1111,9 @@ void SIRegisterInfo::buildSpillLoadStore(
report_fatal_error("could not scavenge SGPR to spill in entry function");
if (ScratchOffsetReg == AMDGPU::NoRegister) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset)
.addImm(Offset);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
} else {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
.addReg(ScratchOffsetReg)
.addImm(Offset);
}
@ -1170,7 +1166,7 @@ void SIRegisterInfo::buildSpillLoadStore(
Register Sub = IsSubReg
? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
: ValueReg;
auto MIB = spillVGPRtoAGPR(ST, MI, Index, Lane, Sub, IsKill);
auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
if (!MIB.getInstr())
break;
if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) {
@ -1216,9 +1212,9 @@ void SIRegisterInfo::buildSpillLoadStore(
RS->setRegUsed(TmpReg);
}
if (IsStore) {
auto AccRead = BuildMI(*MBB, MI, DL,
TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
.addReg(SubReg, getKillRegState(IsKill));
auto AccRead = BuildMI(MBB, MI, DL,
TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
.addReg(SubReg, getKillRegState(IsKill));
if (NeedSuperRegDef)
AccRead.addReg(ValueReg, RegState::ImplicitDefine);
AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
@ -1231,9 +1227,9 @@ void SIRegisterInfo::buildSpillLoadStore(
MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
commonAlignment(Alignment, RemRegOffset));
auto MIB = BuildMI(*MBB, MI, DL, *Desc)
.addReg(SubReg,
getDefRegState(!IsStore) | getKillRegState(IsKill));
auto MIB =
BuildMI(MBB, MI, DL, *Desc)
.addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
if (!IsFlat)
MIB.addReg(FuncInfo->getScratchRSrcReg());
@ -1254,9 +1250,9 @@ void SIRegisterInfo::buildSpillLoadStore(
MIB.addReg(ValueReg, RegState::ImplicitDefine);
if (!IsStore && TmpReg != AMDGPU::NoRegister) {
MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
FinalReg)
.addReg(TmpReg, RegState::Kill);
.addReg(TmpReg, RegState::Kill);
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
}
@ -1266,7 +1262,7 @@ void SIRegisterInfo::buildSpillLoadStore(
if (ScratchOffsetRegDelta != 0) {
// Subtract the offset we added to the ScratchOffset register.
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset)
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset)
.addReg(SOffset)
.addImm(ScratchOffsetRegDelta);
}
@ -1293,12 +1289,12 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
if (IsLoad) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
Offset * SB.EltSize, MMO, SB.RS);
} else {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
Offset * SB.EltSize, MMO, SB.RS);
// This only ever adds one VGPR spill
SB.MFI.addToSpilledVGPRs(1);
@ -1573,13 +1569,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
buildSpillLoadStore(MI, Opc,
Index,
VData->getReg(), VData->isKill(),
FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(),
RS);
auto *MBB = MI->getParent();
buildSpillLoadStore(
*MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(), RS);
MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
MI->eraseFromParent();
break;
@ -1609,13 +1603,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
buildSpillLoadStore(MI, Opc,
Index,
VData->getReg(), VData->isKill(),
FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(),
RS);
auto *MBB = MI->getParent();
buildSpillLoadStore(
*MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(), RS);
MI->eraseFromParent();
break;
}

View File

@ -349,7 +349,8 @@ public:
// When lowering spill pseudos, the RegScavenger should be set.
// For creating spill instructions during frame lowering, where no scavenger
// is available, LiveRegs can be used.
void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp,
void buildSpillLoadStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, unsigned LoadStoreOp,
int Index, Register ValueReg, bool ValueIsKill,
MCRegister ScratchOffsetReg, int64_t InstrOffset,
MachineMemOperand *MMO, RegScavenger *RS,