forked from OSchip/llvm-project
R600/SI: Disable VMEM and SMEM clauses by breaking them with S_NOP
This is only a workaround. llvm-svn: 223615
This commit is contained in:
parent
58f61a84e7
commit
fa58e5e111
|
@ -41,6 +41,12 @@ typedef union {
|
||||||
|
|
||||||
} Counters;
|
} Counters;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
OTHER,
|
||||||
|
SMEM,
|
||||||
|
VMEM
|
||||||
|
} InstType;
|
||||||
|
|
||||||
typedef Counters RegCounters[512];
|
typedef Counters RegCounters[512];
|
||||||
typedef std::pair<unsigned, unsigned> RegInterval;
|
typedef std::pair<unsigned, unsigned> RegInterval;
|
||||||
|
|
||||||
|
@ -73,6 +79,9 @@ private:
|
||||||
/// \brief Different export instruction types seen since last wait.
|
/// \brief Different export instruction types seen since last wait.
|
||||||
unsigned ExpInstrTypesSeen;
|
unsigned ExpInstrTypesSeen;
|
||||||
|
|
||||||
|
/// \brief Type of the last opcode.
|
||||||
|
InstType LastOpcodeType;
|
||||||
|
|
||||||
/// \brief Get increment/decrement amount for this instruction.
|
/// \brief Get increment/decrement amount for this instruction.
|
||||||
Counters getHwCounts(MachineInstr &MI);
|
Counters getHwCounts(MachineInstr &MI);
|
||||||
|
|
||||||
|
@ -83,7 +92,8 @@ private:
|
||||||
RegInterval getRegInterval(MachineOperand &Op);
|
RegInterval getRegInterval(MachineOperand &Op);
|
||||||
|
|
||||||
/// \brief Handle instructions async components
|
/// \brief Handle instructions async components
|
||||||
void pushInstruction(MachineInstr &MI);
|
void pushInstruction(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator I);
|
||||||
|
|
||||||
/// \brief Insert the actual wait instruction
|
/// \brief Insert the actual wait instruction
|
||||||
bool insertWait(MachineBasicBlock &MBB,
|
bool insertWait(MachineBasicBlock &MBB,
|
||||||
|
@ -203,10 +213,11 @@ RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SIInsertWaits::pushInstruction(MachineInstr &MI) {
|
void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator I) {
|
||||||
|
|
||||||
// Get the hardware counter increments and sum them up
|
// Get the hardware counter increments and sum them up
|
||||||
Counters Increment = getHwCounts(MI);
|
Counters Increment = getHwCounts(*I);
|
||||||
unsigned Sum = 0;
|
unsigned Sum = 0;
|
||||||
|
|
||||||
for (unsigned i = 0; i < 3; ++i) {
|
for (unsigned i = 0; i < 3; ++i) {
|
||||||
|
@ -215,17 +226,42 @@ void SIInsertWaits::pushInstruction(MachineInstr &MI) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we don't increase anything then that's it
|
// If we don't increase anything then that's it
|
||||||
if (Sum == 0)
|
if (Sum == 0) {
|
||||||
|
LastOpcodeType = OTHER;
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||||
|
// Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
|
||||||
|
// or SMEM clause, respectively.
|
||||||
|
//
|
||||||
|
// The temporary workaround is to break the clauses with S_NOP.
|
||||||
|
//
|
||||||
|
// The proper solution would be to allocate registers such that all source
|
||||||
|
// and destination registers don't overlap, e.g. this is illegal:
|
||||||
|
// r0 = load r2
|
||||||
|
// r2 = load r0
|
||||||
|
if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
|
||||||
|
(LastOpcodeType == VMEM && Increment.Named.VM)) {
|
||||||
|
// Insert a NOP to break the clause.
|
||||||
|
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
|
||||||
|
.addImm(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TII->isSMRD(I->getOpcode()))
|
||||||
|
LastOpcodeType = SMEM;
|
||||||
|
else if (Increment.Named.VM)
|
||||||
|
LastOpcodeType = VMEM;
|
||||||
|
}
|
||||||
|
|
||||||
// Remember which export instructions we have seen
|
// Remember which export instructions we have seen
|
||||||
if (Increment.Named.EXP) {
|
if (Increment.Named.EXP) {
|
||||||
ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
|
ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
|
||||||
|
|
||||||
MachineOperand &Op = MI.getOperand(i);
|
MachineOperand &Op = I->getOperand(i);
|
||||||
if (!isOpRelevant(Op))
|
if (!isOpRelevant(Op))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -302,6 +338,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
|
||||||
((Counts.Named.EXP & 0x7) << 4) |
|
((Counts.Named.EXP & 0x7) << 4) |
|
||||||
((Counts.Named.LGKM & 0x7) << 8));
|
((Counts.Named.LGKM & 0x7) << 8));
|
||||||
|
|
||||||
|
LastOpcodeType = OTHER;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -356,6 +393,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
||||||
|
|
||||||
WaitedOn = ZeroCounts;
|
WaitedOn = ZeroCounts;
|
||||||
LastIssued = ZeroCounts;
|
LastIssued = ZeroCounts;
|
||||||
|
LastOpcodeType = OTHER;
|
||||||
|
|
||||||
memset(&UsedRegs, 0, sizeof(UsedRegs));
|
memset(&UsedRegs, 0, sizeof(UsedRegs));
|
||||||
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
|
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
|
||||||
|
@ -368,7 +406,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
||||||
I != E; ++I) {
|
I != E; ++I) {
|
||||||
|
|
||||||
Changes |= insertWait(MBB, I, handleOperands(*I));
|
Changes |= insertWait(MBB, I, handleOperands(*I));
|
||||||
pushInstruction(*I);
|
pushInstruction(MBB, I);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for everything at the end of the MBB
|
// Wait for everything at the end of the MBB
|
||||||
|
|
Loading…
Reference in New Issue