[amdgpu] Handle the case where there is no scavenged register.

- When an unconditional branch is expanded into an indirect branch, if
  there is no scavenged register, an SGPR pair needs spilling to enable
  the destination PC calculation. In addition, before jumping into the
  destination, that clobbered SGPR pair need restoring.
- As SGPR cannot be spilled to or restored from memory directly, the
  spilling/restoring of that SGPR pair reuses the regular SGPR spilling
  support but without spilling it into memory. As that spilling and
  restoring points are fully controlled, we only need to spill that SGPR
  into the temporary VGPR, which needs spilling into its emergency slot.
- The target-specific hook is revised to take additional restore block,
  where the restoring code is filled. After that, the relaxation will
  place that restore block directly before the destination block and
  insert an unconditional branch in any fall-through block into the
  destination block.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D106449
This commit is contained in:
Michael Liao 2021-07-16 12:14:49 -04:00
parent 32d45862fc
commit e6a4ba3aa6
11 changed files with 1922 additions and 86 deletions

View File

@ -582,15 +582,14 @@ public:
} }
/// Insert an unconditional indirect branch at the end of \p MBB to \p /// Insert an unconditional indirect branch at the end of \p MBB to \p
/// NewDestBB. \p BrOffset indicates the offset of \p NewDestBB relative to /// NewDestBB. Optionally, insert the clobbered register restoring in \p
/// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to
/// the offset of the position to insert the new branch. /// the offset of the position to insert the new branch.
/// virtual void insertIndirectBranch(MachineBasicBlock &MBB,
/// \returns The number of bytes added to the block. MachineBasicBlock &NewDestBB,
virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &RestoreBB,
MachineBasicBlock &NewDestBB, const DebugLoc &DL, int64_t BrOffset = 0,
const DebugLoc &DL, RegScavenger *RS = nullptr) const {
int64_t BrOffset = 0,
RegScavenger *RS = nullptr) const {
llvm_unreachable("target did not implement"); llvm_unreachable("target did not implement");
} }

View File

@ -463,10 +463,48 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
DebugLoc DL = MI.getDebugLoc(); DebugLoc DL = MI.getDebugLoc();
MI.eraseFromParent(); MI.eraseFromParent();
BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch(
*BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get());
// Create the optional restore block and, initially, place it at the end of
// function. That block will be placed later if it's used; otherwise, it will
// be erased.
MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back());
TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL,
DestOffset - SrcOffset, RS.get());
BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB);
adjustBlockOffsets(*MBB); adjustBlockOffsets(*MBB);
// If RestoreBB is required, try to place just before DestBB.
if (!RestoreBB->empty()) {
// TODO: For multiple far branches to the same destination, there are
// chances that some restore blocks could be shared if they clobber the
// same registers and share the same restore sequence. So far, those
// restore blocks are just duplicated for each far branch.
assert(!DestBB->isEntryBlock());
MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
if (auto *FT = PrevBB->getFallThrough()) {
assert(FT == DestBB);
TII->insertUnconditionalBranch(*PrevBB, DestBB, DebugLoc());
// Recalculate the block size.
BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
}
// Now, RestoreBB could be placed directly before DestBB.
MF->splice(DestBB->getIterator(), RestoreBB->getIterator());
// Update successors and predecessors.
RestoreBB->addSuccessor(DestBB);
BranchBB->replaceSuccessor(DestBB, RestoreBB);
if (TRI->trackLivenessAfterRegAlloc(*MF))
computeAndAddLiveIns(LiveRegs, *RestoreBB);
// Compute the restore block size.
BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB);
// Update the offset starting from the previous block.
adjustBlockOffsets(*PrevBB);
} else {
// Remove restore block if it's not required.
MF->erase(RestoreBB);
}
return true; return true;
} }

View File

@ -2223,15 +2223,17 @@ MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
return MI.getOperand(0).getMBB(); return MI.getOperand(0).getMBB();
} }
unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
MachineBasicBlock &DestBB, MachineBasicBlock &DestBB,
const DebugLoc &DL, MachineBasicBlock &RestoreBB,
int64_t BrOffset, const DebugLoc &DL, int64_t BrOffset,
RegScavenger *RS) const { RegScavenger *RS) const {
assert(RS && "RegScavenger required for long branching"); assert(RS && "RegScavenger required for long branching");
assert(MBB.empty() && assert(MBB.empty() &&
"new block should be inserted for expanding unconditional branch"); "new block should be inserted for expanding unconditional branch");
assert(MBB.pred_size() == 1); assert(MBB.pred_size() == 1);
assert(RestoreBB.empty() &&
"restore block should be inserted for restoring clobbered registers");
MachineFunction *MF = MBB.getParent(); MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo(); MachineRegisterInfo &MRI = MF->getRegInfo();
@ -2268,14 +2270,6 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64)) BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
.addReg(PCReg); .addReg(PCReg);
auto ComputeBlockSize = [](const TargetInstrInfo *TII,
const MachineBasicBlock &MBB) {
unsigned Size = 0;
for (const MachineInstr &MI : MBB)
Size += TII->getInstSizeInBytes(MI);
return Size;
};
// FIXME: If spilling is necessary, this will fail because this scavenger has // FIXME: If spilling is necessary, this will fail because this scavenger has
// no emergency stack slots. It is non-trivial to spill in this situation, // no emergency stack slots. It is non-trivial to spill in this situation,
// because the restore code needs to be specially placed after the // because the restore code needs to be specially placed after the
@ -2314,22 +2308,34 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->enterBasicBlockEnd(MBB); RS->enterBasicBlockEnd(MBB);
Register Scav = RS->scavengeRegisterBackwards( Register Scav = RS->scavengeRegisterBackwards(
AMDGPU::SReg_64RegClass, AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC),
MachineBasicBlock::iterator(GetPC), false, 0); /* RestoreAfter */ false, 0, /* AllowSpill */ false);
MRI.replaceRegWith(PCReg, Scav); if (Scav) {
MRI.clearVirtRegs(); RS->setRegUsed(Scav);
RS->setRegUsed(Scav); MRI.replaceRegWith(PCReg, Scav);
MRI.clearVirtRegs();
} else {
// As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for
// SGPR spill.
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
MRI.clearVirtRegs();
}
MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol();
// Now, the distance could be defined. // Now, the distance could be defined.
auto *Offset = MCBinaryExpr::createSub( auto *Offset = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(DestBB.getSymbol(), MCCtx), MCSymbolRefExpr::create(DestLabel, MCCtx),
MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx); MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx);
// Add offset assignments. // Add offset assignments.
auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx); auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx);
OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx));
auto *ShAmt = MCConstantExpr::create(32, MCCtx); auto *ShAmt = MCConstantExpr::create(32, MCCtx);
OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx));
return ComputeBlockSize(this, MBB);
return;
} }
unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {

View File

@ -275,11 +275,10 @@ public:
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
unsigned insertIndirectBranch(MachineBasicBlock &MBB, void insertIndirectBranch(MachineBasicBlock &MBB,
MachineBasicBlock &NewDestBB, MachineBasicBlock &NewDestBB,
const DebugLoc &DL, MachineBasicBlock &RestoreBB, const DebugLoc &DL,
int64_t BrOffset, int64_t BrOffset, RegScavenger *RS) const override;
RegScavenger *RS = nullptr) const override;
bool analyzeBranchImpl(MachineBasicBlock &MBB, bool analyzeBranchImpl(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, MachineBasicBlock::iterator I,

View File

@ -97,7 +97,7 @@ struct SGPRSpillBuilder {
unsigned EltSize = 4; unsigned EltSize = 4;
RegScavenger *RS; RegScavenger *RS;
MachineBasicBlock &MBB; MachineBasicBlock *MBB;
MachineFunction &MF; MachineFunction &MF;
SIMachineFunctionInfo &MFI; SIMachineFunctionInfo &MFI;
const SIInstrInfo &TII; const SIInstrInfo &TII;
@ -110,9 +110,14 @@ struct SGPRSpillBuilder {
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
bool IsWave32, MachineBasicBlock::iterator MI, int Index, bool IsWave32, MachineBasicBlock::iterator MI, int Index,
RegScavenger *RS) RegScavenger *RS)
: SuperReg(MI->getOperand(0).getReg()), MI(MI), : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index), MI->getOperand(0).isKill(), Index, RS) {}
RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()),
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
bool IsWave32, MachineBasicBlock::iterator MI, Register Reg,
bool IsKill, int Index, RegScavenger *RS)
: SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI), MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
IsWave32(IsWave32) { IsWave32(IsWave32) {
const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
@ -189,8 +194,9 @@ struct SGPRSpillBuilder {
if (SavedExecReg) { if (SavedExecReg) {
RS->setRegUsed(SavedExecReg); RS->setRegUsed(SavedExecReg);
// Set exec to needed lanes // Set exec to needed lanes
BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg); BuildMI(*MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes); auto I =
BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
if (!TmpVGPRLive) if (!TmpVGPRLive)
I.addReg(TmpVGPR, RegState::ImplicitDefine); I.addReg(TmpVGPR, RegState::ImplicitDefine);
// Spill needed lanes // Spill needed lanes
@ -201,7 +207,7 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false, TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
/*IsKill*/ false); /*IsKill*/ false);
// Spill inactive lanes // Spill inactive lanes
auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
if (!TmpVGPRLive) if (!TmpVGPRLive)
I.addReg(TmpVGPR, RegState::ImplicitDefine); I.addReg(TmpVGPR, RegState::ImplicitDefine);
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
@ -224,7 +230,7 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
/*IsKill*/ false); /*IsKill*/ false);
// Restore exec // Restore exec
auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg) auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
.addReg(SavedExecReg, RegState::Kill); .addReg(SavedExecReg, RegState::Kill);
// Add an implicit use of the load so it is not dead. // Add an implicit use of the load so it is not dead.
// FIXME This inserts an unnecessary waitcnt // FIXME This inserts an unnecessary waitcnt
@ -235,7 +241,7 @@ struct SGPRSpillBuilder {
// Restore inactive lanes // Restore inactive lanes
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
/*IsKill*/ false); /*IsKill*/ false);
auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
if (!TmpVGPRLive) { if (!TmpVGPRLive) {
I.addReg(TmpVGPR, RegState::ImplicitKill); I.addReg(TmpVGPR, RegState::ImplicitKill);
} }
@ -261,11 +267,17 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad, TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
/*IsKill*/ false); /*IsKill*/ false);
// Spill inactive lanes // Spill inactive lanes
BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
} }
} }
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI) {
assert(MBB->getParent() == &MF);
MI = NewMI;
MBB = NewMBB;
}
}; };
} // namespace llvm } // namespace llvm
@ -1337,13 +1349,13 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
if (IsLoad) { if (IsLoad) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET; : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
Offset * SB.EltSize, MMO, SB.RS); Offset * SB.EltSize, MMO, SB.RS);
} else { } else {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET; : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill,
Offset * SB.EltSize, MMO, SB.RS); FrameReg, Offset * SB.EltSize, MMO, SB.RS);
// This only ever adds one VGPR spill // This only ever adds one VGPR spill
SB.MFI.addToSpilledVGPRs(1); SB.MFI.addToSpilledVGPRs(1);
} }
@ -1381,8 +1393,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
// Mark the "old value of vgpr" input undef only if this is the first sgpr // Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block. // spill to this specific vgpr in the first basic block.
auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
Spill.VGPR) SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
.addReg(SubReg, getKillRegState(UseKill)) .addReg(SubReg, getKillRegState(UseKill))
.addImm(Spill.Lane) .addImm(Spill.Lane)
.addReg(Spill.VGPR); .addReg(Spill.VGPR);
@ -1428,7 +1440,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
MachineInstrBuilder WriteLane = MachineInstrBuilder WriteLane =
BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
SB.TmpVGPR) SB.TmpVGPR)
.addReg(SubReg, SubKillState) .addReg(SubReg, SubKillState)
.addImm(i % PVD.PerVGPR) .addImm(i % PVD.PerVGPR)
@ -1490,10 +1502,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
auto MIB = auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) SubReg)
.addReg(Spill.VGPR) .addReg(Spill.VGPR)
.addImm(Spill.Lane); .addImm(Spill.Lane);
if (SB.NumSubRegs > 1 && i == 0) if (SB.NumSubRegs > 1 && i == 0)
MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
if (LIS) { if (LIS) {
@ -1524,7 +1536,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
bool LastSubReg = (i + 1 == e); bool LastSubReg = (i + 1 == e);
auto MIB = BuildMI(SB.MBB, MI, SB.DL, auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
.addImm(i); .addImm(i);
@ -1550,6 +1562,75 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
return true; return true;
} }
bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock &RestoreMBB,
Register SGPR, RegScavenger *RS) const {
SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
RS);
SB.prepare();
// Generate the spill of SGPR to SB.TmpVGPR.
unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
auto PVD = SB.getPerVGPRData();
for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
unsigned TmpVGPRFlags = RegState::Undef;
// Write sub registers into the VGPR
for (unsigned i = Offset * PVD.PerVGPR,
e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
i < e; ++i) {
Register SubReg =
SB.NumSubRegs == 1
? SB.SuperReg
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
MachineInstrBuilder WriteLane =
BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
SB.TmpVGPR)
.addReg(SubReg, SubKillState)
.addImm(i % PVD.PerVGPR)
.addReg(SB.TmpVGPR, TmpVGPRFlags);
TmpVGPRFlags = 0;
// There could be undef components of a spilled super register.
// TODO: Can we detect this and skip the spill?
if (SB.NumSubRegs > 1) {
// The last implicit use of the SB.SuperReg carries the "Kill" flag.
unsigned SuperKillState = 0;
if (i + 1 == SB.NumSubRegs)
SuperKillState |= getKillRegState(SB.IsKill);
WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
}
}
// Don't need to write VGPR out.
}
// Restore clobbered registers in the specified restore block.
MI = RestoreMBB.end();
SB.setMI(&RestoreMBB, MI);
// Generate the restore of SGPR from SB.TmpVGPR.
for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
// Don't need to load VGPR in.
// Unpack lanes
for (unsigned i = Offset * PVD.PerVGPR,
e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
i < e; ++i) {
Register SubReg =
SB.NumSubRegs == 1
? SB.SuperReg
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
bool LastSubReg = (i + 1 == e);
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
SubReg)
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
.addImm(i);
if (SB.NumSubRegs > 1 && i == 0)
MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
}
}
SB.restore();
SB.MFI.addToSpilledSGPRs(SB.NumSubRegs);
return false;
}
/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
/// a VGPR and the stack slot can be safely eliminated when all other users are /// a VGPR and the stack slot can be safely eliminated when all other users are
/// handled. /// handled.

View File

@ -130,6 +130,10 @@ public:
LiveIntervals *LIS = nullptr, LiveIntervals *LIS = nullptr,
bool OnlyToVGPR = false) const; bool OnlyToVGPR = false) const;
bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock &RestoreMBB, Register SGPR,
RegScavenger *RS) const;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum, unsigned FIOperandNum,
RegScavenger *RS) const override; RegScavenger *RS) const override;

View File

@ -560,19 +560,19 @@ bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
} }
} }
unsigned AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
MachineBasicBlock &NewDestBB, MachineBasicBlock &NewDestBB,
const DebugLoc &DL, MachineBasicBlock &RestoreBB,
int64_t BrOffset, const DebugLoc &DL, int64_t BrOffset,
RegScavenger *RS) const { RegScavenger *RS) const {
// This method inserts a *direct* branch (JMP), despite its name. // This method inserts a *direct* branch (JMP), despite its name.
// LLVM calls this method to fixup unconditional branches; it never calls // LLVM calls this method to fixup unconditional branches; it never calls
// insertBranch or some hypothetical "insertDirectBranch". // insertBranch or some hypothetical "insertDirectBranch".
// See lib/CodeGen/RegisterRelaxation.cpp for details. // See lib/CodeGen/RegisterRelaxation.cpp for details.
// We end up here when a jump is too long for a RJMP instruction. // We end up here when a jump is too long for a RJMP instruction.
auto &MI = *BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB); BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB);
return getInstSizeInBytes(MI); return;
} }
} // end of namespace llvm } // end of namespace llvm

View File

@ -107,10 +107,10 @@ public:
bool isBranchOffsetInRange(unsigned BranchOpc, bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override; int64_t BrOffset) const override;
unsigned insertIndirectBranch(MachineBasicBlock &MBB, void insertIndirectBranch(MachineBasicBlock &MBB,
MachineBasicBlock &NewDestBB, MachineBasicBlock &NewDestBB,
const DebugLoc &DL, int64_t BrOffset, MachineBasicBlock &RestoreBB, const DebugLoc &DL,
RegScavenger *RS) const override; int64_t BrOffset, RegScavenger *RS) const override;
private: private:
const AVRRegisterInfo RI; const AVRRegisterInfo RI;

View File

@ -684,11 +684,11 @@ unsigned RISCVInstrInfo::insertBranch(
return 2; return 2;
} }
unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
MachineBasicBlock &DestBB, MachineBasicBlock &DestBB,
const DebugLoc &DL, MachineBasicBlock &RestoreBB,
int64_t BrOffset, const DebugLoc &DL, int64_t BrOffset,
RegScavenger *RS) const { RegScavenger *RS) const {
assert(RS && "RegScavenger required for long branching"); assert(RS && "RegScavenger required for long branching");
assert(MBB.empty() && assert(MBB.empty() &&
"new block should be inserted for expanding unconditional branch"); "new block should be inserted for expanding unconditional branch");
@ -714,10 +714,11 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->enterBasicBlockEnd(MBB); RS->enterBasicBlockEnd(MBB);
unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass, unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
MI.getIterator(), false, 0); MI.getIterator(), false, 0);
// TODO: The case when there is no scavenged register needs special handling.
assert(Scav != RISCV::NoRegister && "No register is scavenged!");
MRI.replaceRegWith(ScratchReg, Scav); MRI.replaceRegWith(ScratchReg, Scav);
MRI.clearVirtRegs(); MRI.clearVirtRegs();
RS->setRegUsed(Scav); RS->setRegUsed(Scav);
return 8;
} }
bool RISCVInstrInfo::reverseBranchCondition( bool RISCVInstrInfo::reverseBranchCondition(

View File

@ -85,10 +85,10 @@ public:
const DebugLoc &dl, const DebugLoc &dl,
int *BytesAdded = nullptr) const override; int *BytesAdded = nullptr) const override;
unsigned insertIndirectBranch(MachineBasicBlock &MBB, void insertIndirectBranch(MachineBasicBlock &MBB,
MachineBasicBlock &NewDestBB, MachineBasicBlock &NewDestBB,
const DebugLoc &DL, int64_t BrOffset, MachineBasicBlock &RestoreBB, const DebugLoc &DL,
RegScavenger *RS = nullptr) const override; int64_t BrOffset, RegScavenger *RS) const override;
unsigned removeBranch(MachineBasicBlock &MBB, unsigned removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved = nullptr) const override; int *BytesRemoved = nullptr) const override;

File diff suppressed because it is too large Load Diff