forked from OSchip/llvm-project
AMDGPU/SI: Handle div_fmas hazard in GCNHazardRecognizer
Reviewers: arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D25250 llvm-svn: 283622
This commit is contained in:
parent
33b87bffc6
commit
5ab6154dc3
|
@ -38,6 +38,10 @@ void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
|
|||
CurrCycleInstr = MI;
|
||||
}
|
||||
|
||||
static bool isDivFMas(unsigned Opcode) {
|
||||
return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
|
||||
}
|
||||
|
||||
ScheduleHazardRecognizer::HazardType
|
||||
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
||||
MachineInstr *MI = SU->getInstr();
|
||||
|
@ -51,6 +55,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
|||
if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
return NoHazard;
|
||||
}
|
||||
|
||||
|
@ -68,6 +75,9 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
|
|||
if (SIInstrInfo::isDPP(*MI))
|
||||
return std::max(0, checkDPPHazards(MI));
|
||||
|
||||
if (isDivFMas(MI->getOpcode()))
|
||||
return std::max(0, checkDivFMasHazards(MI));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -262,3 +272,15 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
|
|||
|
||||
return WaitStatesNeeded;
|
||||
}
|
||||
|
||||
int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
|
||||
// v_div_fmas requires 4 wait states after a write to vcc from a VALU
|
||||
// instruction.
|
||||
const int DivFMasWaitStates = 4;
|
||||
auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
|
||||
int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
|
||||
|
||||
return DivFMasWaitStates - WaitStatesNeeded;
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
|
|||
int checkSMRDHazards(MachineInstr *SMRD);
|
||||
int checkVMEMHazards(MachineInstr* VMEM);
|
||||
int checkDPPHazards(MachineInstr *DPP);
|
||||
int checkDivFMasHazards(MachineInstr *DivFMas);
|
||||
public:
|
||||
GCNHazardRecognizer(const MachineFunction &MF);
|
||||
// We can only issue one instruction per cycle.
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
# RUN: llc -march=amdgcn -run-pass post-RA-hazard-rec %s -o - | FileCheck %s
|
||||
|
||||
# CHECK-LABEL: bb.0:
|
||||
# CHECK: S_MOV_B64
|
||||
# CHECK-NOT: S_NOP
|
||||
# CHECK: V_DIV_FMAS
|
||||
|
||||
# CHECK-LABEL: bb.1:
|
||||
# CHECK: V_CMP_EQ_I32
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP
|
||||
# CHECK: V_DIV_FMAS_F32
|
||||
|
||||
# CHECK-LABEL: bb.2:
|
||||
# CHECK: V_CMP_EQ_I32
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP
|
||||
# CHECK: V_DIV_FMAS_F32
|
||||
|
||||
# CHECK-LABEL: bb.3:
|
||||
# CHECK: V_DIV_SCALE_F32
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP
|
||||
# CHECK: V_DIV_FMAS_F32
|
||||
--- |
|
||||
define void @test0() { ret void }
|
||||
...
|
||||
---
|
||||
name: test0
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
%vcc = S_MOV_B64 0
|
||||
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
implicit %vcc = V_CMP_EQ_I32_e32 %vgpr1, %vgpr2, implicit %exec
|
||||
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
successors: %bb.3
|
||||
%vcc = V_CMP_EQ_I32_e64 %vgpr1, %vgpr2, implicit %exec
|
||||
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.3:
|
||||
%vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec
|
||||
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
|
||||
S_ENDPGM
|
||||
...
|
Loading…
Reference in New Issue