From ffaed72089f359320d92e07f9ed00de070264969 Mon Sep 17 00:00:00 2001 From: Zvi Rackover Date: Wed, 6 Dec 2017 17:40:09 +0000 Subject: [PATCH] AMDGPU Tests: Change a case to be run with -O0 D40231 requires to run case with -O0 to prevent InstructionSimplify from transforming an extractelement with undef index. llvm-svn: 319907 --- .../AMDGPU/indirect-addressing-si-noopt.ll | 45 +++++++++++++++++ .../CodeGen/AMDGPU/indirect-addressing-si.ll | 49 ------------------- 2 files changed, 45 insertions(+), 49 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll index b2873402da43..63384f5e4450 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll @@ -17,3 +17,48 @@ entry: ret void } +; Make sure we don't hit use of undefined register errors when expanding an +; extract with undef index. + +; CHECK-LABEL: {{^}}extract_adjacent_blocks: +; CHECK: s_load_dword [[ARG:s[0-9]+]] +; CHECK: s_cmp_lg_u32 +; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]] + +; CHECK: buffer_load_dwordx4 +; CHECK: s_mov_b32 m0, +; CHECK: v_movrels_b32_e32 + +; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] + +; CHECK: [[BB4]]: +; CHECK: buffer_load_dwordx4 +; CHECK: s_mov_b32 m0, +; CHECK: v_movrels_b32_e32 + +; CHECK: [[ENDBB]]: +; CHECK: buffer_store_dword +; CHECK: s_endpgm + +define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 { +bb: + %tmp = icmp eq i32 %arg, 0 + br i1 %tmp, label %bb1, label %bb4 + +bb1: + %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef + %tmp3 = extractelement <4 x float> %tmp2, i32 undef + call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out + br label %bb7 + +bb4: + %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef + %tmp6 = extractelement <4 x float> %tmp5, i32 undef + call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out + br label %bb7 + +bb7: + %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ] + store volatile float %tmp8, float addrspace(1)* undef + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index d181d5029efb..2b4aca019a39 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -469,55 +469,6 @@ bb2: ret void } -; GCN-LABEL: {{^}}extract_adjacent_blocks: -; GCN: s_load_dword [[ARG:s[0-9]+]] -; GCN: s_cmp_lg_u32 -; GCN: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]] - -; GCN: buffer_load_dwordx4 -; MOVREL: s_mov_b32 m0, -; MOVREL: v_movrels_b32_e32 - -; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, src0 -; IDXMODE: v_mov_b32_e32 -; IDXMODE: s_set_gpr_idx_off - -; GCN: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] - -; GCN: [[BB4]]: -; GCN: buffer_load_dwordx4 -; MOVREL: s_mov_b32 m0, -; MOVREL: v_movrels_b32_e32 - -; IDXMODE: s_set_gpr_idx_on -; IDXMODE: v_mov_b32_e32 -; IDXMODE: s_set_gpr_idx_off - -; GCN: [[ENDBB]]: -; GCN: buffer_store_dword -; GCN: s_endpgm -define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 { -bb: - %tmp = icmp eq i32 %arg, 0 - br i1 %tmp, label %bb1, label %bb4 - -bb1: - %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef - %tmp3 = extractelement <4 x float> %tmp2, i32 undef - call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out - br label %bb7 - -bb4: - %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef - %tmp6 = extractelement <4 x float> %tmp5, i32 undef - call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out - br label %bb7 - -bb7: - %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ] - store volatile float %tmp8, float addrspace(1)* undef - ret void -} ; GCN-LABEL: {{^}}insert_adjacent_blocks: ; GCN: s_load_dword [[ARG:s[0-9]+]]