diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 29b1f79187d5..78f91c06adfb 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -38,6 +38,10 @@ void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { CurrCycleInstr = MI; } +static bool isDivFMas(unsigned Opcode) { + return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; +} + ScheduleHazardRecognizer::HazardType GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *MI = SU->getInstr(); @@ -51,6 +55,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) return NoopHazard; + if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) + return NoopHazard; + return NoHazard; } @@ -68,6 +75,9 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { if (SIInstrInfo::isDPP(*MI)) return std::max(0, checkDPPHazards(MI)); + if (isDivFMas(MI->getOpcode())) + return std::max(0, checkDivFMasHazards(MI)); + return 0; } @@ -262,3 +272,15 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { return WaitStatesNeeded; } + +int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { + const SIInstrInfo *TII = ST.getInstrInfo(); + + // v_div_fmas requires 4 wait states after a write to vcc from a VALU + // instruction. + const int DivFMasWaitStates = 4; + auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; + int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn); + + return DivFMasWaitStates - WaitStatesNeeded; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index d82041c5f174..1d87f3a05831 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -43,6 +43,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { int checkSMRDHazards(MachineInstr *SMRD); int checkVMEMHazards(MachineInstr* VMEM); int checkDPPHazards(MachineInstr *DPP); + int checkDivFMasHazards(MachineInstr *DivFMas); public: GCNHazardRecognizer(const MachineFunction &MF); // We can only issue one instruction per cycle. diff --git a/llvm/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir new file mode 100644 index 000000000000..9003454c3583 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir @@ -0,0 +1,60 @@ +# RUN: llc -march=amdgcn -run-pass post-RA-hazard-rec %s -o - | FileCheck %s + +# CHECK-LABEL: bb.0: +# CHECK: S_MOV_B64 +# CHECK-NOT: S_NOP +# CHECK: V_DIV_FMAS + +# CHECK-LABEL: bb.1: +# CHECK: V_CMP_EQ_I32 +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: V_DIV_FMAS_F32 + +# CHECK-LABEL: bb.2: +# CHECK: V_CMP_EQ_I32 +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: V_DIV_FMAS_F32 + +# CHECK-LABEL: bb.3: +# CHECK: V_DIV_SCALE_F32 +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: V_DIV_FMAS_F32 +--- | + define void @test0() { ret void } +... +--- +name: test0 + +body: | + bb.0: + successors: %bb.1 + %vcc = S_MOV_B64 0 + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + implicit %vcc = V_CMP_EQ_I32_e32 %vgpr1, %vgpr2, implicit %exec + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + %vcc = V_CMP_EQ_I32_e64 %vgpr1, %vgpr2, implicit %exec + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_BRANCH %bb.3 + + bb.3: + %vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_ENDPGM +...