AMDGPU/SI: Handle div_fmas hazard in GCNHazardRecognizer

Reviewers: arsenm

Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D25250

llvm-svn: 283622
This commit is contained in:
Tom Stellard 2016-10-07 23:42:48 +00:00
parent 33b87bffc6
commit 5ab6154dc3
3 changed files with 83 additions and 0 deletions

View File

@ -38,6 +38,10 @@ void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
CurrCycleInstr = MI;
}
static bool isDivFMas(unsigned Opcode) {
return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
}
ScheduleHazardRecognizer::HazardType
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
@ -51,6 +55,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
return NoopHazard;
if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
return NoopHazard;
return NoHazard;
}
@ -68,6 +75,9 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
if (SIInstrInfo::isDPP(*MI))
return std::max(0, checkDPPHazards(MI));
if (isDivFMas(MI->getOpcode()))
return std::max(0, checkDivFMasHazards(MI));
return 0;
}
@ -262,3 +272,15 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
return WaitStatesNeeded;
}
int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
const SIInstrInfo *TII = ST.getInstrInfo();
// v_div_fmas requires 4 wait states after a write to vcc from a VALU
// instruction.
const int DivFMasWaitStates = 4;
auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
return DivFMasWaitStates - WaitStatesNeeded;
}

View File

@ -43,6 +43,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
int checkSMRDHazards(MachineInstr *SMRD);
int checkVMEMHazards(MachineInstr* VMEM);
int checkDPPHazards(MachineInstr *DPP);
int checkDivFMasHazards(MachineInstr *DivFMas);
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.

View File

@ -0,0 +1,60 @@
# RUN: llc -march=amdgcn -run-pass post-RA-hazard-rec %s -o - | FileCheck %s
# CHECK-LABEL: bb.0:
# CHECK: S_MOV_B64
# CHECK-NOT: S_NOP
# CHECK: V_DIV_FMAS
# CHECK-LABEL: bb.1:
# CHECK: V_CMP_EQ_I32
# CHECK: S_NOP
# CHECK: S_NOP
# CHECK: S_NOP
# CHECK: S_NOP
# CHECK: V_DIV_FMAS_F32
# CHECK-LABEL: bb.2:
# CHECK: V_CMP_EQ_I32
# CHECK: S_NOP
# CHECK: S_NOP
# CHECK: S_NOP
# CHECK: S_NOP
# CHECK: V_DIV_FMAS_F32
# CHECK-LABEL: bb.3:
# CHECK: V_DIV_SCALE_F32
# CHECK: S_NOP
# CHECK: S_NOP
# CHECK: S_NOP
# CHECK: S_NOP
# CHECK: V_DIV_FMAS_F32
--- |
define void @test0() { ret void }
...
---
name: test0
body: |
bb.0:
successors: %bb.1
%vcc = S_MOV_B64 0
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
S_BRANCH %bb.1
bb.1:
successors: %bb.2
implicit %vcc = V_CMP_EQ_I32_e32 %vgpr1, %vgpr2, implicit %exec
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
S_BRANCH %bb.2
bb.2:
successors: %bb.3
%vcc = V_CMP_EQ_I32_e64 %vgpr1, %vgpr2, implicit %exec
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
S_BRANCH %bb.3
bb.3:
%vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
S_ENDPGM
...