AMDGPU: Insert skips for blocks with FLAT

This already forced a skip for VMEM, so it should also be done for
flat. I'm somewhat skeptical about the benefit of this though.

llvm-svn: 362760
This commit is contained in:
Matt Arsenault 2019-06-07 00:14:45 +00:00
parent ef4a3aa549
commit 99ee81b183
2 changed files with 60 additions and 1 deletions

View File

@ -135,7 +135,8 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
return true;
// These instructions are potentially expensive even if EXEC = 0.
if (TII->isSMRD(*I) || TII->isVMEM(*I) || I->getOpcode() == AMDGPU::S_WAITCNT)
if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
I->getOpcode() == AMDGPU::S_WAITCNT)
return true;
++NumInstr;

View File

@ -0,0 +1,58 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold=1 %s -o - | FileCheck %s
---
name: skip_execz_flat
body: |
; CHECK-LABEL: name: skip_execz_flat
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK: FLAT_STORE_DWORD undef $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; CHECK: bb.2:
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1
SI_MASK_BRANCH %bb.2, implicit $exec
bb.1:
successors: %bb.2
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
FLAT_STORE_DWORD undef $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
bb.2:
S_ENDPGM 0
...
---
name: skip_execz_mubuf
body: |
; CHECK-LABEL: name: skip_execz_mubuf
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
; CHECK: bb.2:
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1
SI_MASK_BRANCH %bb.2, implicit $exec
bb.1:
successors: %bb.2
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
bb.2:
S_ENDPGM 0
...