[AMDGPU] SIOptimizeExecMaskingPreRA should check constant bus constraint when folds EXEC copy

Folding EXEC copy into it's single use may lead to constant bus constraint violation as it adds one more SGPR operand.
         This change makes it validate the user instruction with the new SGPR operand and only fold it if it is legal.

Reviewed By: rampitec, arsenm

Differential Revision: https://reviews.llvm.org/D98888
This commit is contained in:
alex-t 2021-03-18 22:22:08 +03:00
parent 952bc6c92e
commit dccf83acf9
2 changed files with 37 additions and 9 deletions

View File

@ -416,15 +416,20 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
continue;
Register SavedExec = I->getOperand(0).getReg();
if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec) &&
MRI->use_instr_nodbg_begin(SavedExec)->getParent() ==
I->getParent()) {
LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n');
LIS->RemoveMachineInstrFromMaps(*I);
I->eraseFromParent();
MRI->replaceRegWith(SavedExec, ExecReg);
LIS->removeInterval(SavedExec);
Changed = true;
if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec)) {
MachineInstr *SingleExecUser = &*MRI->use_instr_nodbg_begin(SavedExec);
int Idx = SingleExecUser->findRegisterUseOperandIdx(SavedExec);
assert(Idx != -1);
if (SingleExecUser->getParent() == I->getParent() &&
!SingleExecUser->getOperand(Idx).isImplicit() &&
TII->isOperandLegal(*SingleExecUser, Idx, &I->getOperand(1))) {
LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n');
LIS->RemoveMachineInstrFromMaps(*I);
I->eraseFromParent();
MRI->replaceRegWith(SavedExec, ExecReg);
LIS->removeInterval(SavedExec);
Changed = true;
}
}
break;
}

View File

@ -0,0 +1,23 @@
# RUN: llc -run-pass si-optimize-exec-masking-pre-ra -march=amdgcn -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
---
# GCN-LABEL: name: opt_exec_copy_fold
# GCN: %2:vreg_64 = COPY $exec
name: opt_exec_copy_fold
tracksRegLiveness: true
liveins:
- { reg: '$sgpr0_sgpr1' }
body: |
bb.0:
liveins: $sgpr0_sgpr1
%0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr0_sgpr1, implicit $exec
%1:sreg_64 = V_CMP_NE_U32_e64 0, %0, implicit $exec
%2:vreg_64 = COPY $exec
%3:sreg_64 = V_CMP_EQ_U64_e64 %1, %2, implicit $exec
$scc = COPY %3
S_CBRANCH_SCC0 %bb.1, implicit $scc
bb.1:
...