Prevent machine licm if remattable with a vreg use

Check if a remateralizable nstruction does not have any virtual
register uses. Even though rematerializable RA might not actually
rematerialize it in this scenario. In that case we do not want to
hoist such instruction out of the loop in a believe RA will sink
it back if needed.

This already has impact on AMDGPU target which does not check for
this condition in its isTriviallyReMaterializable implementation
and have instructions with virtual register uses enabled. The
other targets are not impacted at this point although will be when
D106408 lands.

Differential Revision: https://reviews.llvm.org/D107677
This commit is contained in:
Stanislav Mekhanoshin 2021-08-09 10:18:52 -07:00
parent 735a590471
commit b9e433b02a
2 changed files with 52 additions and 32 deletions

View File

@ -230,6 +230,9 @@ namespace {
bool IsGuaranteedToExecute(MachineBasicBlock *BB);
bool isTriviallyReMaterializable(const MachineInstr &MI,
AAResults *AA) const;
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
@ -659,6 +662,23 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
return true;
}
/// Check if \p MI is trivially remateralizable and if it does not have any
/// virtual register uses. Even though rematerializable RA might not actually
/// rematerialize it in this scenario. In that case we do not want to hoist such
/// instruction out of the loop in a belief RA will sink it back if needed.
bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI,
AAResults *AA) const {
if (!TII->isTriviallyReMaterializable(MI, AA))
return false;
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
return false;
}
return true;
}
void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
@ -1156,9 +1176,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
return false;
}
// Rematerializable instructions should always be hoisted since the register
// allocator can just pull them down again when needed.
if (TII->isTriviallyReMaterializable(MI, AA))
// Rematerializable instructions should always be hoisted providing the
// register allocator can just pull them down again when needed.
if (isTriviallyReMaterializable(MI, AA))
return true;
// FIXME: If there are long latency loop-invariant instructions inside the
@ -1211,7 +1231,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
if (!TII->isTriviallyReMaterializable(MI, AA) &&
if (!isTriviallyReMaterializable(MI, AA) &&
!MI.isDereferenceableInvariantLoad(AA)) {
LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
return false;

View File

@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass machinelicm -o - %s | FileCheck -check-prefix=GCN %s
# FIXME: MachineLICM hoists all V_CVT instructions out of the loop increasing
# register pressure. VGPR budget at occupancy 10 is 24 vgprs.
# MachineLICM shall limit hoisting of V_CVT instructions out of the loop keeping
# register pressure within the budget. VGPR budget at occupancy 10 is 24 vgprs.
---
name: test
@ -35,19 +35,6 @@ body: |
; GCN: %20:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY2]], implicit $mode, implicit $exec
; GCN: %21:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY3]], implicit $mode, implicit $exec
; GCN: %22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY4]], implicit $mode, implicit $exec
; GCN: %23:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY5]], implicit $mode, implicit $exec
; GCN: %24:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY6]], implicit $mode, implicit $exec
; GCN: %25:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY7]], implicit $mode, implicit $exec
; GCN: %26:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY8]], implicit $mode, implicit $exec
; GCN: %27:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY9]], implicit $mode, implicit $exec
; GCN: %28:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY10]], implicit $mode, implicit $exec
; GCN: %29:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY11]], implicit $mode, implicit $exec
; GCN: %30:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY12]], implicit $mode, implicit $exec
; GCN: %31:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY13]], implicit $mode, implicit $exec
; GCN: %32:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY14]], implicit $mode, implicit $exec
; GCN: %33:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY15]], implicit $mode, implicit $exec
; GCN: %34:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY16]], implicit $mode, implicit $exec
; GCN: %35:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY17]], implicit $mode, implicit $exec
; GCN: bb.1:
; GCN: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GCN: liveins: $vcc
@ -57,19 +44,32 @@ body: |
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %20, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %21, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %22, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %23, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %24, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %25, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %26, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %27, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %28, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %29, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %30, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %31, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %32, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %33, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %34, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %35, implicit $exec
; GCN: %23:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY5]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %23, implicit $exec
; GCN: %24:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY6]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %24, implicit $exec
; GCN: %25:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY7]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %25, implicit $exec
; GCN: %26:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY8]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %26, implicit $exec
; GCN: %27:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY9]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %27, implicit $exec
; GCN: %28:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY10]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %28, implicit $exec
; GCN: %29:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY11]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %29, implicit $exec
; GCN: %30:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY12]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %30, implicit $exec
; GCN: %31:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY13]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %31, implicit $exec
; GCN: %32:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY14]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %32, implicit $exec
; GCN: %33:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY15]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %33, implicit $exec
; GCN: %34:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY16]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %34, implicit $exec
; GCN: %35:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY17]], implicit $mode, implicit $exec
; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %35, implicit $exec
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; GCN: S_BRANCH %bb.2
; GCN: bb.2: