R600/SI: Use external symbols for scratch buffer

We were passing the scratch buffer address to the shaders via user sgprs,
but now we use external symbols and have the driver patch the shader
using reloc information.

llvm-svn: 226586
This commit is contained in:
Tom Stellard 2015-01-20 17:49:47 +00:00
parent 8255af45cb
commit 95292bbfcd
9 changed files with 92 additions and 82 deletions

View File

@ -77,7 +77,11 @@ extern Target TheGCNTarget;
namespace AMDGPU {
enum TargetIndex {
TI_CONSTDATA_START
TI_CONSTDATA_START,
TI_SCRATCH_RSRC_DWORD0,
TI_SCRATCH_RSRC_DWORD1,
TI_SCRATCH_RSRC_DWORD2,
TI_SCRATCH_RSRC_DWORD3
};
}

View File

@ -962,16 +962,27 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
unsigned ScratchPtrReg =
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
unsigned ScratchOffsetReg =
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
ScratchOffsetReg, MVT::i32);
SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
SDValue ScratchRsrcDword0 =
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
SDValue ScratchPtr =
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64);
SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
SDValue ScratchRsrcDword1 =
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
const SDValue RsrcOps[] = {
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
ScratchRsrcDword0,
CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
ScratchRsrcDword1,
CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
};
SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
MVT::v2i32, RsrcOps), 0);
Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);

View File

@ -80,6 +80,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::CreateExpr(Expr);
break;
}
case MachineOperand::MO_ExternalSymbol: {
MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName()));
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
MCOp = MCOperand::CreateExpr(Expr);
break;
}
}
OutMI.addOperand(MCOp);
}

View File

@ -163,4 +163,5 @@ namespace SIOutMods {
#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
#endif

View File

@ -482,7 +482,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex)
// Place-holder registers, these will be filled in by
// SIPrepareScratchRegs.
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
.addReg(AMDGPU::SGPR0, RegState::Undef);
} else {
LLVMContext &Ctx = MF->getFunction()->getContext();
@ -528,7 +528,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex)
// Place-holder registers, these will be filled in by
// SIPrepareScratchRegs.
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
.addReg(AMDGPU::SGPR0, RegState::Undef);
} else {

View File

@ -1951,14 +1951,14 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
let UseNamedOperandTable = 1 in {
def _SAVE : InstSI <
(outs),
(ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
(ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
SReg_32:$scratch_offset),
"", []
>;
def _RESTORE : InstSI <
(outs sgpr_class:$dst),
(ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", []
>;
} // End UseNamedOperandTable = 1
@ -1974,14 +1974,14 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let UseNamedOperandTable = 1 in {
def _SAVE : InstSI <
(outs),
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
SReg_32:$scratch_offset),
"", []
>;
def _RESTORE : InstSI <
(outs vgpr_class:$dst),
(ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", []
>;
} // End UseNamedOperandTable = 1

View File

@ -29,6 +29,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
namespace {
@ -84,28 +85,10 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
Entry->addLiveIn(ScratchOffsetPreloadReg);
// Load the scratch pointer
unsigned ScratchPtrReg =
TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
int ScratchPtrFI = -1;
if (ScratchPtrReg != AMDGPU::NoRegister) {
// Found an SGPR to use.
MRI.setPhysRegUsed(ScratchPtrReg);
BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
.addReg(ScratchPtrPreloadReg);
} else {
// No SGPR is available, we must spill.
ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
.addReg(ScratchPtrPreloadReg)
.addFrameIndex(ScratchPtrFI);
}
// Load the scratch offset.
unsigned ScratchOffsetReg =
TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
int ScratchOffsetFI = ~0;
int ScratchOffsetFI = -1;
if (ScratchOffsetReg != AMDGPU::NoRegister) {
// Found an SGPR to use
@ -125,22 +108,26 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
// add them to all the SI_SPILL_V* instructions.
RegScavenger RS;
bool UseRegScavenger =
(ScratchPtrReg == AMDGPU::NoRegister ||
ScratchOffsetReg == AMDGPU::NoRegister);
unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
RS.addScavengingFrameIndex(ScratchRsrcFI);
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
if (UseRegScavenger)
RS.enterBasicBlock(&MBB);
// Add the scratch offset reg as a live-in so that the register scavenger
// doesn't re-use it.
if (!MBB.isLiveIn(ScratchOffsetReg))
MBB.addLiveIn(ScratchOffsetReg);
RS.enterBasicBlock(&MBB);
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
RS.forward(I);
DebugLoc DL = MI.getDebugLoc();
switch(MI.getOpcode()) {
default: break;;
default: break;
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V128_SAVE:
@ -153,18 +140,35 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE:
// Scratch Pointer
if (ScratchPtrReg == AMDGPU::NoRegister) {
ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
ScratchPtrReg)
.addFrameIndex(ScratchPtrFI)
.addReg(AMDGPU::NoRegister)
.addReg(AMDGPU::NoRegister);
} else if (!MBB.isLiveIn(ScratchPtrReg)) {
MBB.addLiveIn(ScratchPtrReg);
}
// Scratch resource
unsigned ScratchRsrcReg =
RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
.addImm(Rsrc & 0xffffffff)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
.addImm(Rsrc >> 32)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
// Scratch Offset
if (ScratchOffsetReg == AMDGPU::NoRegister) {
ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
@ -176,20 +180,26 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
MBB.addLiveIn(ScratchOffsetReg);
}
if (ScratchPtrReg == AMDGPU::NoRegister ||
if (ScratchRsrcReg == AMDGPU::NoRegister ||
ScratchOffsetReg == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF.getFunction()->getContext();
Ctx.emitError("ran out of SGPRs for spilling VGPRs");
ScratchPtrReg = AMDGPU::SGPR0;
ScratchRsrcReg = AMDGPU::SGPR0;
ScratchOffsetReg = AMDGPU::SGPR0;
}
MI.getOperand(2).setReg(ScratchPtrReg);
MI.getOperand(2).setReg(ScratchRsrcReg);
MI.getOperand(2).setIsKill(true);
MI.getOperand(2).setIsUndef(false);
MI.getOperand(3).setReg(ScratchOffsetReg);
MI.getOperand(3).setIsUndef(false);
MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
MI.dump();
break;
}
if (UseRegScavenger)
RS.forward();
}
}
return true;

View File

@ -98,7 +98,7 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
unsigned Value,
unsigned ScratchPtr,
unsigned ScratchRsrcReg,
unsigned ScratchOffset,
int64_t Offset,
RegScavenger *RS) const {
@ -113,34 +113,11 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
bool RanOutOfSGPRs = false;
unsigned SOffset = ScratchOffset;
unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
if (RsrcReg == AMDGPU::NoRegister) {
RanOutOfSGPRs = true;
RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
}
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
unsigned Size = NumSubRegs * 4;
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
getSubReg(RsrcReg, AMDGPU::sub0_sub1))
.addReg(ScratchPtr)
.addReg(RsrcReg, RegState::ImplicitDefine);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
getSubReg(RsrcReg, AMDGPU::sub2))
.addImm(Rsrc & 0xffffffff)
.addReg(RsrcReg, RegState::ImplicitDefine);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
getSubReg(RsrcReg, AMDGPU::sub3))
.addImm(Rsrc >> 32)
.addReg(RsrcReg, RegState::ImplicitDefine);
if (!isUInt<12>(Offset + Size)) {
dbgs() << "Offset scavenge\n";
SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
if (SOffset == AMDGPU::NoRegister) {
RanOutOfSGPRs = true;
@ -163,7 +140,7 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.addReg(SubReg, getDefRegState(IsLoad))
.addReg(RsrcReg, getKillRegState(IsKill))
.addReg(ScratchRsrcReg, getKillRegState(IsKill))
.addImm(Offset)
.addReg(SOffset, getKillRegState(IsKill))
.addImm(0) // glc
@ -236,6 +213,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
if (isM0) {
dbgs() << "Scavenge M0\n";
SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
}
@ -262,7 +240,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V32_SAVE:
buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
FrameInfo->getObjectOffset(Index), RS);
MI->eraseFromParent();
@ -274,7 +252,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V512_RESTORE: {
buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
FrameInfo->getObjectOffset(Index), RS);
MI->eraseFromParent();

View File

@ -111,7 +111,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
private:
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp, unsigned Value,
unsigned ScratchPtr, unsigned ScratchOffset,
unsigned ScratchRsrcReg, unsigned ScratchOffset,
int64_t Offset, RegScavenger *RS) const;
};