From b9e433b02a77830b9ba13406b459ab905371e346 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 9 Aug 2021 10:18:52 -0700 Subject: [PATCH] Prevent machine licm if remattable with a vreg use Check if a remateralizable nstruction does not have any virtual register uses. Even though rematerializable RA might not actually rematerialize it in this scenario. In that case we do not want to hoist such instruction out of the loop in a believe RA will sink it back if needed. This already has impact on AMDGPU target which does not check for this condition in its isTriviallyReMaterializable implementation and have instructions with virtual register uses enabled. The other targets are not impacted at this point although will be when D106408 lands. Differential Revision: https://reviews.llvm.org/D107677 --- llvm/lib/CodeGen/MachineLICM.cpp | 28 ++++++++-- llvm/test/CodeGen/AMDGPU/licm-regpressure.mir | 56 +++++++++---------- 2 files changed, 52 insertions(+), 32 deletions(-) diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 883299c452b7..42708659c79e 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -230,6 +230,9 @@ namespace { bool IsGuaranteedToExecute(MachineBasicBlock *BB); + bool isTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const; + void EnterScope(MachineBasicBlock *MBB); void ExitScope(MachineBasicBlock *MBB); @@ -659,6 +662,23 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) { return true; } +/// Check if \p MI is trivially remateralizable and if it does not have any +/// virtual register uses. Even though rematerializable RA might not actually +/// rematerialize it in this scenario. In that case we do not want to hoist such +/// instruction out of the loop in a belief RA will sink it back if needed. +bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const { + if (!TII->isTriviallyReMaterializable(MI, AA)) + return false; + + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) + return false; + } + + return true; +} + void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); @@ -1156,9 +1176,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { return false; } - // Rematerializable instructions should always be hoisted since the register - // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(MI, AA)) + // Rematerializable instructions should always be hoisted providing the + // register allocator can just pull them down again when needed. + if (isTriviallyReMaterializable(MI, AA)) return true; // FIXME: If there are long latency loop-invariant instructions inside the @@ -1211,7 +1231,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { // High register pressure situation, only hoist if the instruction is going // to be remat'ed. - if (!TII->isTriviallyReMaterializable(MI, AA) && + if (!isTriviallyReMaterializable(MI, AA) && !MI.isDereferenceableInvariantLoad(AA)) { LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; diff --git a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir index dcdbe6bd02dc..1d033e117ede 100644 --- a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir +++ b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass machinelicm -o - %s | FileCheck -check-prefix=GCN %s -# FIXME: MachineLICM hoists all V_CVT instructions out of the loop increasing -# register pressure. VGPR budget at occupancy 10 is 24 vgprs. +# MachineLICM shall limit hoisting of V_CVT instructions out of the loop keeping +# register pressure within the budget. VGPR budget at occupancy 10 is 24 vgprs. --- name: test @@ -35,19 +35,6 @@ body: | ; GCN: %20:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY2]], implicit $mode, implicit $exec ; GCN: %21:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY3]], implicit $mode, implicit $exec ; GCN: %22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY4]], implicit $mode, implicit $exec - ; GCN: %23:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY5]], implicit $mode, implicit $exec - ; GCN: %24:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY6]], implicit $mode, implicit $exec - ; GCN: %25:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY7]], implicit $mode, implicit $exec - ; GCN: %26:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY8]], implicit $mode, implicit $exec - ; GCN: %27:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY9]], implicit $mode, implicit $exec - ; GCN: %28:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY10]], implicit $mode, implicit $exec - ; GCN: %29:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY11]], implicit $mode, implicit $exec - ; GCN: %30:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY12]], implicit $mode, implicit $exec - ; GCN: %31:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY13]], implicit $mode, implicit $exec - ; GCN: %32:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY14]], implicit $mode, implicit $exec - ; GCN: %33:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY15]], implicit $mode, implicit $exec - ; GCN: %34:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY16]], implicit $mode, implicit $exec - ; GCN: %35:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY17]], implicit $mode, implicit $exec ; GCN: bb.1: ; GCN: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GCN: liveins: $vcc @@ -57,19 +44,32 @@ body: | ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %20, implicit $exec ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %21, implicit $exec ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %22, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %23, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %24, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %25, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %26, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %27, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %28, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %29, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %30, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %31, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %32, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %33, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %34, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %35, implicit $exec + ; GCN: %23:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY5]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %23, implicit $exec + ; GCN: %24:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY6]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %24, implicit $exec + ; GCN: %25:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY7]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %25, implicit $exec + ; GCN: %26:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY8]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %26, implicit $exec + ; GCN: %27:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY9]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %27, implicit $exec + ; GCN: %28:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY10]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %28, implicit $exec + ; GCN: %29:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY11]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %29, implicit $exec + ; GCN: %30:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY12]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %30, implicit $exec + ; GCN: %31:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY13]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %31, implicit $exec + ; GCN: %32:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY14]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %32, implicit $exec + ; GCN: %33:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY15]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %33, implicit $exec + ; GCN: %34:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY16]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %34, implicit $exec + ; GCN: %35:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY17]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %35, implicit $exec ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN: S_BRANCH %bb.2 ; GCN: bb.2: