forked from OSchip/llvm-project
[AMDGPU] Allow buildSpillLoadStore in empty bb
This allows calling buildSpillLoadStore for an empty basic block, where MI points at the end of the block instead of to an instruction. This only happens with downstream CFI changes, so I was not able to create a testcase that works with upstream LLVM. Differential Revision: https://reviews.llvm.org/D101356
This commit is contained in:
parent
2fa14d4700
commit
9569d5ba02
|
@ -118,6 +118,7 @@ static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
|
|||
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
|
||||
const SIMachineFunctionInfo &FuncInfo,
|
||||
LivePhysRegs &LiveRegs, MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, Register SpillReg,
|
||||
int FI) {
|
||||
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
|
||||
|
@ -129,7 +130,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
|
|||
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
|
||||
FrameInfo.getObjectAlign(FI));
|
||||
LiveRegs.addReg(SpillReg);
|
||||
TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, true,
|
||||
TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true,
|
||||
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
|
||||
&LiveRegs);
|
||||
LiveRegs.removeReg(SpillReg);
|
||||
|
@ -139,6 +140,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
|
|||
const SIRegisterInfo &TRI,
|
||||
const SIMachineFunctionInfo &FuncInfo,
|
||||
LivePhysRegs &LiveRegs, MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, Register SpillReg,
|
||||
int FI) {
|
||||
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
|
||||
|
@ -149,7 +151,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
|
|||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
|
||||
FrameInfo.getObjectAlign(FI));
|
||||
TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, false,
|
||||
TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false,
|
||||
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
|
||||
&LiveRegs);
|
||||
}
|
||||
|
@ -745,7 +747,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
|
||||
/*IsProlog*/ true);
|
||||
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR, *Reg.FI);
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
|
||||
*Reg.FI);
|
||||
}
|
||||
|
||||
// VGPRs used for Whole Wave Mode
|
||||
|
@ -759,7 +762,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
ScratchExecCopy =
|
||||
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
|
||||
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, VGPR, *FI);
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
|
||||
}
|
||||
|
||||
if (ScratchExecCopy) {
|
||||
|
@ -785,7 +788,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
|
||||
.addReg(FramePtrReg);
|
||||
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
|
||||
FramePtrFI);
|
||||
}
|
||||
|
||||
|
@ -803,7 +806,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
|
||||
.addReg(BasePtrReg);
|
||||
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
|
||||
BasePtrFI);
|
||||
}
|
||||
|
||||
|
@ -996,7 +999,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
||||
if (!TmpVGPR)
|
||||
report_fatal_error("failed to find free scratch register");
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
|
||||
FramePtrFI);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
|
||||
.addReg(TmpVGPR, RegState::Kill);
|
||||
|
@ -1022,7 +1025,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
||||
if (!TmpVGPR)
|
||||
report_fatal_error("failed to find free scratch register");
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
|
||||
BasePtrFI);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
|
||||
.addReg(TmpVGPR, RegState::Kill);
|
||||
|
@ -1048,7 +1051,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
ScratchExecCopy =
|
||||
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
|
||||
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR,
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
|
||||
*Reg.FI);
|
||||
}
|
||||
|
||||
|
@ -1062,7 +1065,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
ScratchExecCopy =
|
||||
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
|
||||
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, VGPR, *FI);
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
|
||||
}
|
||||
|
||||
if (ScratchExecCopy) {
|
||||
|
|
|
@ -915,13 +915,11 @@ static int getOffsetMUBUFLoad(unsigned Opc) {
|
|||
}
|
||||
|
||||
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
int Index,
|
||||
unsigned Lane,
|
||||
unsigned ValueReg,
|
||||
bool IsKill) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
MachineFunction *MF = MI->getParent()->getParent();
|
||||
int Index, unsigned Lane,
|
||||
unsigned ValueReg, bool IsKill) {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
|
||||
|
@ -939,8 +937,8 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
|
|||
unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
|
||||
: AMDGPU::V_ACCVGPR_READ_B32_e64;
|
||||
|
||||
auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
|
||||
.addReg(Src, getKillRegState(IsKill));
|
||||
auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
|
||||
.addReg(Src, getKillRegState(IsKill));
|
||||
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
|
||||
return MIB;
|
||||
}
|
||||
|
@ -964,7 +962,7 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
|
|||
return false;
|
||||
|
||||
const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
|
||||
if (spillVGPRtoAGPR(ST, MI, Index, 0, Reg->getReg(), false).getInstr())
|
||||
if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
|
||||
return true;
|
||||
|
||||
MachineInstrBuilder NewMI =
|
||||
|
@ -1021,20 +1019,19 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
|
|||
}
|
||||
|
||||
void SIRegisterInfo::buildSpillLoadStore(
|
||||
MachineBasicBlock::iterator MI, unsigned LoadStoreOp, int Index,
|
||||
Register ValueReg, bool IsKill, MCRegister ScratchOffsetReg,
|
||||
int64_t InstOffset, MachineMemOperand *MMO, RegScavenger *RS,
|
||||
LivePhysRegs *LiveRegs) const {
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
|
||||
MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
|
||||
RegScavenger *RS, LivePhysRegs *LiveRegs) const {
|
||||
assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
|
||||
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
MachineFunction *MF = MI->getParent()->getParent();
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
|
||||
const DebugLoc &DL = MI->getDebugLoc();
|
||||
const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
|
||||
bool IsStore = Desc->mayStore();
|
||||
bool IsFlat = TII->isFLATScratch(LoadStoreOp);
|
||||
|
||||
|
@ -1114,10 +1111,9 @@ void SIRegisterInfo::buildSpillLoadStore(
|
|||
report_fatal_error("could not scavenge SGPR to spill in entry function");
|
||||
|
||||
if (ScratchOffsetReg == AMDGPU::NoRegister) {
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset)
|
||||
.addImm(Offset);
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
|
||||
} else {
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
|
||||
.addReg(ScratchOffsetReg)
|
||||
.addImm(Offset);
|
||||
}
|
||||
|
@ -1170,7 +1166,7 @@ void SIRegisterInfo::buildSpillLoadStore(
|
|||
Register Sub = IsSubReg
|
||||
? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
|
||||
: ValueReg;
|
||||
auto MIB = spillVGPRtoAGPR(ST, MI, Index, Lane, Sub, IsKill);
|
||||
auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
|
||||
if (!MIB.getInstr())
|
||||
break;
|
||||
if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) {
|
||||
|
@ -1216,9 +1212,9 @@ void SIRegisterInfo::buildSpillLoadStore(
|
|||
RS->setRegUsed(TmpReg);
|
||||
}
|
||||
if (IsStore) {
|
||||
auto AccRead = BuildMI(*MBB, MI, DL,
|
||||
TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
|
||||
.addReg(SubReg, getKillRegState(IsKill));
|
||||
auto AccRead = BuildMI(MBB, MI, DL,
|
||||
TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
|
||||
.addReg(SubReg, getKillRegState(IsKill));
|
||||
if (NeedSuperRegDef)
|
||||
AccRead.addReg(ValueReg, RegState::ImplicitDefine);
|
||||
AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
|
||||
|
@ -1231,9 +1227,9 @@ void SIRegisterInfo::buildSpillLoadStore(
|
|||
MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
|
||||
commonAlignment(Alignment, RemRegOffset));
|
||||
|
||||
auto MIB = BuildMI(*MBB, MI, DL, *Desc)
|
||||
.addReg(SubReg,
|
||||
getDefRegState(!IsStore) | getKillRegState(IsKill));
|
||||
auto MIB =
|
||||
BuildMI(MBB, MI, DL, *Desc)
|
||||
.addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
|
||||
if (!IsFlat)
|
||||
MIB.addReg(FuncInfo->getScratchRSrcReg());
|
||||
|
||||
|
@ -1254,9 +1250,9 @@ void SIRegisterInfo::buildSpillLoadStore(
|
|||
MIB.addReg(ValueReg, RegState::ImplicitDefine);
|
||||
|
||||
if (!IsStore && TmpReg != AMDGPU::NoRegister) {
|
||||
MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
|
||||
MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
|
||||
FinalReg)
|
||||
.addReg(TmpReg, RegState::Kill);
|
||||
.addReg(TmpReg, RegState::Kill);
|
||||
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
|
||||
}
|
||||
|
||||
|
@ -1266,7 +1262,7 @@ void SIRegisterInfo::buildSpillLoadStore(
|
|||
|
||||
if (ScratchOffsetRegDelta != 0) {
|
||||
// Subtract the offset we added to the ScratchOffset register.
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset)
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset)
|
||||
.addReg(SOffset)
|
||||
.addImm(ScratchOffsetRegDelta);
|
||||
}
|
||||
|
@ -1293,12 +1289,12 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
|
|||
if (IsLoad) {
|
||||
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
|
||||
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
|
||||
buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
|
||||
buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
|
||||
Offset * SB.EltSize, MMO, SB.RS);
|
||||
} else {
|
||||
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
|
||||
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
|
||||
buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
|
||||
buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
|
||||
Offset * SB.EltSize, MMO, SB.RS);
|
||||
// This only ever adds one VGPR spill
|
||||
SB.MFI.addToSpilledVGPRs(1);
|
||||
|
@ -1573,13 +1569,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
|||
|
||||
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
|
||||
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
|
||||
buildSpillLoadStore(MI, Opc,
|
||||
Index,
|
||||
VData->getReg(), VData->isKill(),
|
||||
FrameReg,
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
|
||||
*MI->memoperands_begin(),
|
||||
RS);
|
||||
auto *MBB = MI->getParent();
|
||||
buildSpillLoadStore(
|
||||
*MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
|
||||
*MI->memoperands_begin(), RS);
|
||||
MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
|
@ -1609,13 +1603,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
|||
|
||||
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
|
||||
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
|
||||
buildSpillLoadStore(MI, Opc,
|
||||
Index,
|
||||
VData->getReg(), VData->isKill(),
|
||||
FrameReg,
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
|
||||
*MI->memoperands_begin(),
|
||||
RS);
|
||||
auto *MBB = MI->getParent();
|
||||
buildSpillLoadStore(
|
||||
*MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
|
||||
*MI->memoperands_begin(), RS);
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -349,7 +349,8 @@ public:
|
|||
// When lowering spill pseudos, the RegScavenger should be set.
|
||||
// For creating spill instructions during frame lowering, where no scavenger
|
||||
// is available, LiveRegs can be used.
|
||||
void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp,
|
||||
void buildSpillLoadStore(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, unsigned LoadStoreOp,
|
||||
int Index, Register ValueReg, bool ValueIsKill,
|
||||
MCRegister ScratchOffsetReg, int64_t InstrOffset,
|
||||
MachineMemOperand *MMO, RegScavenger *RS,
|
||||
|
|
Loading…
Reference in New Issue