[AMDGPU] prefer non-mfma in post-RA schedule

MFMA instructions shall not be scheduled back to back
to avoid MAI SIMD stall. Tell post-RA schedule we would
prefer some other instruction instead.

Differential Revision: https://reviews.llvm.org/D84883
This commit is contained in:
Stanislav Mekhanoshin 2020-07-29 11:47:18 -07:00
parent a5faf3c849
commit 13b63be472
3 changed files with 43 additions and 0 deletions

View File

@ -1383,3 +1383,27 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
return WaitStatesNeeded;
}
bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
if (!SU->isInstr())
return false;
MachineInstr *MAI = nullptr;
auto IsMFMAFn = [&MAI] (MachineInstr *MI) {
MAI = nullptr;
if (SIInstrInfo::isMAI(*MI) &&
MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
MAI = MI;
return MAI != nullptr;
};
MachineInstr *MI = SU->getInstr();
if (IsMFMAFn(MI)) {
int W = getWaitStatesSince(IsMFMAFn, 16);
if (MAI)
return W < (int)TSchedModel.computeInstrLatency(MAI);
}
return false;
}

View File

@ -108,6 +108,7 @@ public:
unsigned PreEmitNoopsCommon(MachineInstr *);
void AdvanceCycle() override;
void RecedeCycle() override;
bool ShouldPreferAnother(SUnit *SU) override;
};
} // end namespace llvm

View File

@ -0,0 +1,18 @@
# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass post-RA-sched -amdgpu-disable-power-sched -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: unrelated_mfma
# GCN: V_MFMA_F32_32X32X1F32
# GCN: S_CMP_LG_U32
# GCN: V_MFMA_F32_32X32X1F32
---
name: unrelated_mfma
body: |
bb.0.entry:
renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32 $vgpr67, $vgpr66, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
renamable $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63 = V_MFMA_F32_32X32X1F32 $vgpr69, $vgpr68, killed $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63, 0, 0, 0, implicit $mode, implicit $exec
renamable $sgpr2 = S_ADD_U32 renamable $sgpr2, 4, implicit-def $scc
renamable $sgpr3 = S_ADDC_U32 renamable $sgpr3, 0, implicit-def dead $scc, implicit killed $scc
S_CMP_LG_U32 renamable $sgpr2, 64, implicit-def $scc
...