AMDGPU/SI: Don't set DATA_FORMAT if ADD_TID_ENABLE is set

to prevent setting a huge stride, because DATA_FORMAT has a different
meaning if ADD_TID_ENABLE is set.

This is a candidate for stable llvm 3.7.

Tested-and-Reviewed-by: Christian König <christian.koenig@amd.com>
llvm-svn: 248858
This commit is contained in:
Marek Olsak 2015-09-29 23:37:32 +00:00
parent a13dfd539b
commit d1a69a2839
4 changed files with 18 additions and 8 deletions

View File

@ -2257,10 +2257,8 @@ MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
SDValue Ptr) const {
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size
return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23());
}
SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,

View File

@ -2781,3 +2781,16 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
return RsrcDataFormat;
}
uint64_t SIInstrInfo::getScratchRsrcWords23() const {
uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size;
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
// Clear them unless we want a huge stride.
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
return Rsrc23;
}

View File

@ -356,7 +356,7 @@ public:
}
uint64_t getDefaultRsrcDataFormat() const;
uint64_t getScratchRsrcWords23() const;
};
namespace AMDGPU {

View File

@ -138,8 +138,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
unsigned ScratchRsrcReg =
RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
@ -155,11 +154,11 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
.addImm(Rsrc & 0xffffffff)
.addImm(Rsrc23 & 0xffffffff)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
.addImm(Rsrc >> 32)
.addImm(Rsrc23 >> 32)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
// Scratch Offset