forked from OSchip/llvm-project
R600/SI: Fix dependency between instruction writing M0 and S_SENDMSG on VI (v2)
This fixes a hang when using an empty geometry shader. v2: - don't add s_nop when followed by s_waitcnt - comestic changes Tested-by: Michel Dänzer <michel.daenzer@amd.com> llvm-svn: 227986
This commit is contained in:
parent
ffd039bde1
commit
1bd2463548
|
@ -82,6 +82,8 @@ private:
|
|||
/// \brief Type of the last opcode.
|
||||
InstType LastOpcodeType;
|
||||
|
||||
bool LastInstWritesM0;
|
||||
|
||||
/// \brief Get increment/decrement amount for this instruction.
|
||||
Counters getHwCounts(MachineInstr &MI);
|
||||
|
||||
|
@ -106,6 +108,9 @@ private:
|
|||
/// \brief Resolve all operand dependencies to counter requirements
|
||||
Counters handleOperands(MachineInstr &MI);
|
||||
|
||||
/// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
|
||||
void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
||||
|
||||
public:
|
||||
SIInsertWaits(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID),
|
||||
|
@ -269,6 +274,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
|
|||
// Insert a NOP to break the clause.
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
|
||||
.addImm(0);
|
||||
LastInstWritesM0 = false;
|
||||
}
|
||||
|
||||
if (TII->isSMRD(I->getOpcode()))
|
||||
|
@ -362,6 +368,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
|
|||
((Counts.Named.LGKM & 0x7) << 8));
|
||||
|
||||
LastOpcodeType = OTHER;
|
||||
LastInstWritesM0 = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -403,6 +410,30 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
|
|||
return Result;
|
||||
}
|
||||
|
||||
void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) {
|
||||
if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return;
|
||||
|
||||
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
|
||||
if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
|
||||
LastInstWritesM0 = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Set whether this instruction sets M0
|
||||
LastInstWritesM0 = false;
|
||||
|
||||
unsigned NumOperands = I->getNumOperands();
|
||||
for (unsigned i = 0; i < NumOperands; i++) {
|
||||
const MachineOperand &Op = I->getOperand(i);
|
||||
|
||||
if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
|
||||
LastInstWritesM0 = true;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
|
||||
// around other non-memory instructions.
|
||||
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
@ -417,6 +448,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
|||
WaitedOn = ZeroCounts;
|
||||
LastIssued = ZeroCounts;
|
||||
LastOpcodeType = OTHER;
|
||||
LastInstWritesM0 = false;
|
||||
|
||||
memset(&UsedRegs, 0, sizeof(UsedRegs));
|
||||
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
|
||||
|
@ -433,7 +465,9 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
|||
Changes |= insertWait(MBB, I, LastIssued);
|
||||
else
|
||||
Changes |= insertWait(MBB, I, handleOperands(*I));
|
||||
|
||||
pushInstruction(MBB, I);
|
||||
handleSendMsg(MBB, I);
|
||||
}
|
||||
|
||||
// Wait for everything at the end of the MBB
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=BOTH %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=BOTH %s
|
||||
|
||||
; BOTH-LABEL: {{^}}main:
|
||||
; BOTH: s_mov_b32 m0, s0
|
||||
; VI-NEXT: s_nop 0
|
||||
; BOTH-NEXT: s_sendmsg Gs_done(nop)
|
||||
; BOTH-NEXT: s_endpgm
|
||||
|
||||
define void @main(i32 inreg %a) #0 {
|
||||
main_body:
|
||||
call void @llvm.SI.sendmsg(i32 3, i32 %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.SI.sendmsg(i32, i32) #1
|
||||
|
||||
attributes #0 = { "ShaderType"="2" "unsafe-fp-math"="true" }
|
||||
attributes #1 = { nounwind }
|
Loading…
Reference in New Issue