forked from OSchip/llvm-project
AMDGPU/SI: Don't set DATA_FORMAT if ADD_TID_ENABLE is set
to prevent setting a huge stride, because DATA_FORMAT has a different meaning if ADD_TID_ENABLE is set. This is a candidate for stable llvm 3.7. Tested-and-Reviewed-by: Christian König <christian.koenig@amd.com> llvm-svn: 248858
This commit is contained in:
parent
a13dfd539b
commit
d1a69a2839
|
@ -2257,10 +2257,8 @@ MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
|
|||
SDValue Ptr) const {
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
|
||||
uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
|
||||
0xffffffff; // Size
|
||||
|
||||
return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
|
||||
return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23());
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
|
||||
|
|
|
@ -2781,3 +2781,16 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
|
|||
|
||||
return RsrcDataFormat;
|
||||
}
|
||||
|
||||
uint64_t SIInstrInfo::getScratchRsrcWords23() const {
|
||||
uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
|
||||
AMDGPU::RSRC_TID_ENABLE |
|
||||
0xffffffff; // Size;
|
||||
|
||||
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
|
||||
// Clear them unless we want a huge stride.
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
|
||||
|
||||
return Rsrc23;
|
||||
}
|
||||
|
|
|
@ -356,7 +356,7 @@ public:
|
|||
}
|
||||
|
||||
uint64_t getDefaultRsrcDataFormat() const;
|
||||
|
||||
uint64_t getScratchRsrcWords23() const;
|
||||
};
|
||||
|
||||
namespace AMDGPU {
|
||||
|
|
|
@ -138,8 +138,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
|
|||
unsigned ScratchRsrcReg =
|
||||
RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
|
||||
|
||||
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
|
||||
0xffffffff; // Size
|
||||
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
|
||||
|
||||
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
|
||||
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
|
||||
|
@ -155,11 +154,11 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
|
|||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
|
||||
.addImm(Rsrc & 0xffffffff)
|
||||
.addImm(Rsrc23 & 0xffffffff)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
|
||||
.addImm(Rsrc >> 32)
|
||||
.addImm(Rsrc23 >> 32)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
// Scratch Offset
|
||||
|
|
Loading…
Reference in New Issue