forked from OSchip/llvm-project
R600/SI: Re-initialize the m0 register after using it for indirect addressing
We need to store a value greater than or equal to the number of LDS bytes allocated by the shader in the m0 register in order for LDS instructions to work correctly. We always initialize m0 at the beginning of a shader, but this register is also used for indirect addressing offsets, so we need to re-initialize it any time we use indirect addressing. llvm-svn: 211107
This commit is contained in:
parent
61d7f97000
commit
8942276a2a
|
@ -86,6 +86,7 @@ private:
|
|||
void Kill(MachineInstr &MI);
|
||||
void Branch(MachineInstr &MI);
|
||||
|
||||
void InitM0ForLDS(MachineBasicBlock::iterator MI);
|
||||
void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
|
||||
void IndirectSrc(MachineInstr &MI);
|
||||
void IndirectDst(MachineInstr &MI);
|
||||
|
@ -320,6 +321,14 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
|
|||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
/// The m0 register stores the maximum allowable address for LDS reads and
|
||||
/// writes. Its value must be at least the size in bytes of LDS allocated by
|
||||
/// the shader. For simplicity, we set it to the maximum possible value.
|
||||
void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) {
|
||||
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
|
||||
AMDGPU::M0).addImm(0xffffffff);
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
|
@ -333,9 +342,7 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
|
|||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
|
||||
.addReg(Idx);
|
||||
MBB.insert(I, MovRel);
|
||||
MI.eraseFromParent();
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
|
||||
assert(AMDGPU::SReg_64RegClass.contains(Save));
|
||||
assert(AMDGPU::VReg_32RegClass.contains(Idx));
|
||||
|
@ -379,6 +386,13 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
|
|||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
|
||||
.addReg(Save);
|
||||
|
||||
}
|
||||
// FIXME: Are there any values other than the LDS address clamp that need to
|
||||
// be stored in the m0 register and may be live for more than a few
|
||||
// instructions? If so, we should save the m0 register at the beginning
|
||||
// of this function and restore it here.
|
||||
// FIXME: Add support for LDS direct loads.
|
||||
InitM0ForLDS(&MI);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
|
@ -523,8 +537,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
MachineBasicBlock &MBB = MF.front();
|
||||
// Initialize M0 to a value that won't cause LDS access to be discarded
|
||||
// due to offset clamping
|
||||
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32),
|
||||
AMDGPU::M0).addImm(0xffffffff);
|
||||
InitM0ForLDS(MBB.getFirstNonPHI());
|
||||
}
|
||||
|
||||
if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
|
||||
|
|
Loading…
Reference in New Issue