From 99ee81b1839aeee36a2c58e822f33e3101ceb7ad Mon Sep 17 00:00:00 2001 From: Matt Arsenault <Matthew.Arsenault@amd.com> Date: Fri, 7 Jun 2019 00:14:45 +0000 Subject: [PATCH] AMDGPU: Insert skips for blocks with FLAT This already forced a skip for VMEM, so it should also be done for flat. I'm somewhat skeptical about the benefit of this though. llvm-svn: 362760 --- llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 3 +- .../CodeGen/AMDGPU/insert-skips-flat-vmem.mir | 58 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index d7a7b79e68a0..507c686e1cf2 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -135,7 +135,8 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From, return true; // These instructions are potentially expensive even if EXEC = 0. - if (TII->isSMRD(*I) || TII->isVMEM(*I) || I->getOpcode() == AMDGPU::S_WAITCNT) + if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) || + I->getOpcode() == AMDGPU::S_WAITCNT) return true; ++NumInstr; diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir new file mode 100644 index 000000000000..da0d366a8d69 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir @@ -0,0 +1,58 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold=1 %s -o - | FileCheck %s + +--- + +name: skip_execz_flat +body: | + ; CHECK-LABEL: name: skip_execz_flat + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: FLAT_STORE_DWORD undef $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1 + SI_MASK_BRANCH %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + FLAT_STORE_DWORD undef $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + + bb.2: + S_ENDPGM 0 +... + +--- + +name: skip_execz_mubuf +body: | + ; CHECK-LABEL: name: skip_execz_mubuf + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + ; CHECK: bb.2: + ; CHECK: S_ENDPGM 0 + bb.0: + successors: %bb.1 + SI_MASK_BRANCH %bb.2, implicit $exec + + bb.1: + successors: %bb.2 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + + bb.2: + S_ENDPGM 0 +...