AMDGPU: Erase redundant redefs of m0 in SIFoldOperands

Only handle simple inter-block redefs of m0 to the same value. This
avoids interference from redefs of m0 in SILoadStoreOptimzer. I was
initially teaching that pass to ignore redefs of m0, but having them
not exist beforehand is much simpler.

This is in preparation for deleting the current special m0 handling in
SIFixSGPRCopies to allow the register coalescer to handle the
difficult cases.

llvm-svn: 375449
This commit is contained in:
Matt Arsenault 2019-10-21 19:53:46 +00:00
parent dd6cf159ba
commit 8ebbf25cb1
2 changed files with 387 additions and 0 deletions

View File

@ -1349,6 +1349,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock *MBB : depth_first(&MF)) {
MachineBasicBlock::iterator I, Next;
MachineOperand *CurrentKnownM0Val = nullptr;
for (I = MBB->begin(); I != MBB->end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
@ -1361,6 +1363,25 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
!tryFoldOMod(MI))
tryFoldClamp(MI);
// Saw an unknown clobber of m0, so we no longer know what it is.
if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
CurrentKnownM0Val = nullptr;
continue;
}
// Specially track simple redefs of m0 to the same value in a block, so we
// can erase the later ones.
if (MI.getOperand(0).getReg() == AMDGPU::M0) {
MachineOperand &NewM0Val = MI.getOperand(1);
if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
MI.eraseFromParent();
continue;
}
// We aren't tracking other physical registers
CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ?
nullptr : &NewM0Val;
continue;
}

View File

@ -0,0 +1,366 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
--- |
define amdgpu_kernel void @redef_m0_same_copy() { ret void }
define amdgpu_kernel void @multi_redef_m0_same_copy() { ret void }
define amdgpu_kernel void @redef_m0_different_copy() { ret void }
define amdgpu_kernel void @redef_m0_mixed_copy0() { ret void }
define amdgpu_kernel void @redef_m0_mixed_copy1() { ret void }
define amdgpu_kernel void @redef_m0_same_mov_imm() { ret void }
define amdgpu_kernel void @redef_m0_different_inst0() { ret void }
define amdgpu_kernel void @redef_m0_different_inst1() { ret void }
define amdgpu_kernel void @redef_m0_mixed_read_m0() { ret void }
define amdgpu_kernel void @redef_m0_same_copy_call() { ret void }
define amdgpu_kernel void @redef_m0_same_copy_multi_block() { ret void }
define amdgpu_kernel void @redef_m0_copy_self() { ret void }
define amdgpu_kernel void @redef_m0_copy_physreg() { ret void }
declare void @func()
...
---
name: redef_m0_same_copy
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; GCN-LABEL: name: redef_m0_same_copy
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
$m0 = COPY %1
%2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
$m0 = COPY %1
%3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: multi_redef_m0_same_copy
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; GCN-LABEL: name: multi_redef_m0_same_copy
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
$m0 = COPY %1
%2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
$m0 = COPY %1
$m0 = COPY %1
%3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_different_copy
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0, $sgpr1
; GCN-LABEL: name: redef_m0_different_copy
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: $m0 = COPY [[COPY2]]
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
%2:sgpr_32 = COPY $sgpr1
$m0 = COPY %1
%3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
$m0 = COPY %2
%4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_mixed_copy0
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0, $sgpr1
; GCN-LABEL: name: redef_m0_mixed_copy0
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: $m0 = COPY [[COPY2]]
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
%2:sgpr_32 = COPY $sgpr1
$m0 = COPY %1
%3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
$m0 = COPY %1
$m0 = COPY %2
%4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_mixed_copy1
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0, $sgpr1
; GCN-LABEL: name: redef_m0_mixed_copy1
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: $m0 = COPY [[COPY2]]
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
%2:sgpr_32 = COPY $sgpr1
$m0 = COPY %1
%3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
$m0 = COPY %2
$m0 = COPY %1
%4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_same_mov_imm
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; GCN-LABEL: name: redef_m0_same_mov_imm
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: $m0 = S_MOV_B32 -1
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
$m0 = S_MOV_B32 -1
%2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
$m0 = S_MOV_B32 -1
%3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_different_inst0
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; GCN-LABEL: name: redef_m0_different_inst0
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: $m0 = IMPLICIT_DEF
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
$m0 = COPY %1
%2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
$m0 = IMPLICIT_DEF
%3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_different_inst1
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; GCN-LABEL: name: redef_m0_different_inst1
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: S_NOP 0, implicit-def $m0
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
$m0 = COPY %1
%2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
S_NOP 0, implicit-def $m0
%3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_mixed_read_m0
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0, $sgpr1
; GCN-LABEL: name: redef_m0_mixed_read_m0
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: $m0 = COPY [[COPY2]]
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: [[DS_READ_B32_2:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 128, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
%2:sgpr_32 = COPY $sgpr1
$m0 = COPY %1
%3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
$m0 = COPY %2
%4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
$m0 = COPY %2
%5:vgpr_32 = DS_READ_B32 %0, 128, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_same_copy_call
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; GCN-LABEL: name: redef_m0_same_copy_call
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
$m0 = COPY %1
%2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs
$m0 = COPY %1
%3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_same_copy_multi_block
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
; GCN-LABEL: name: redef_m0_same_copy_multi_block
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: bb.1:
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
bb.0:
liveins: $vgpr0, $sgpr0
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
$m0 = COPY %1
%2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
bb.1:
$m0 = COPY %1
%3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_copy_self
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; GCN-LABEL: name: redef_m0_copy_self
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: $m0 = COPY [[COPY1]]
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: $m0 = COPY $m0
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
$m0 = COPY %1
%2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
$m0 = COPY $m0
%3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...
---
name: redef_m0_copy_physreg
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; GCN-LABEL: name: redef_m0_copy_physreg
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: $m0 = COPY $sgpr0
; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
; GCN: $sgpr0 = S_MOV_B32 0
; GCN: $m0 = COPY $sgpr0
; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr0
$m0 = COPY $sgpr0
%2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
$sgpr0 = S_MOV_B32 0
$m0 = COPY $sgpr0
%3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
...