2017-08-07 22:58:04 +08:00
|
|
|
; RUN: llc -O0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
|
Replace subregister uses when processing tied operands
This was for some reason skipping operands that are subregisters
instead of keeping the same subregister index.
v_movreld_b32 expects src0 to be the subregister of the tied
super register use/def.
e.g.
v_movreld_b32 v0, v9, <imp-def, tied3> v[0:3], <imp-use, tied2> v[0:3]
was being replaced with
v[4:7] = copy v[0:3]
v_movreld_b32 v0, v9, <imp-def, tied3> v[4:7], <imp-use, tied2> v[4:7],
which really writes to v[0:3]
llvm-svn: 279804
2016-08-26 14:31:32 +08:00
|
|
|
|
|
|
|
; FIXME: Merge into indirect-addressing-si.ll
|
|
|
|
|
|
|
|
; Make sure that TwoAddressInstructions keeps src0 as subregister sub0
|
|
|
|
; of the tied implicit use and def of the super register.
|
|
|
|
|
|
|
|
; CHECK-LABEL: {{^}}insert_wo_offset:
|
|
|
|
; CHECK: s_load_dword [[IN:s[0-9]+]]
|
|
|
|
; CHECK: s_mov_b32 m0, [[IN]]
|
|
|
|
; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]]
|
|
|
|
; CHECK-NEXT: buffer_store_dwordx4 v{{\[}}[[ELT0]]:
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) {
|
Replace subregister uses when processing tied operands
This was for some reason skipping operands that are subregisters
instead of keeping the same subregister index.
v_movreld_b32 expects src0 to be the subregister of the tied
super register use/def.
e.g.
v_movreld_b32 v0, v9, <imp-def, tied3> v[0:3], <imp-use, tied2> v[0:3]
was being replaced with
v[4:7] = copy v[0:3]
v_movreld_b32 v0, v9, <imp-def, tied3> v[4:7], <imp-use, tied2> v[4:7],
which really writes to v[0:3]
llvm-svn: 279804
2016-08-26 14:31:32 +08:00
|
|
|
entry:
|
|
|
|
%ins = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
|
|
|
|
store <4 x float> %ins, <4 x float> addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-12-07 01:40:09 +08:00
|
|
|
; Make sure we don't hit use of undefined register errors when expanding an
|
|
|
|
; extract with undef index.
|
|
|
|
|
|
|
|
; CHECK-LABEL: {{^}}extract_adjacent_blocks:
|
|
|
|
; CHECK: s_load_dword [[ARG:s[0-9]+]]
|
|
|
|
; CHECK: s_cmp_lg_u32
|
|
|
|
; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]]
|
|
|
|
|
|
|
|
; CHECK: buffer_load_dwordx4
|
2018-11-14 05:18:21 +08:00
|
|
|
; CHECK: v_cndmask_b32_e64
|
|
|
|
; CHECK: v_cndmask_b32_e64
|
|
|
|
; CHECK: v_cndmask_b32_e64
|
2017-12-07 01:40:09 +08:00
|
|
|
|
|
|
|
; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]
|
|
|
|
|
|
|
|
; CHECK: [[BB4]]:
|
|
|
|
; CHECK: buffer_load_dwordx4
|
2018-11-14 05:18:21 +08:00
|
|
|
; CHECK: v_cndmask_b32_e64
|
|
|
|
; CHECK: v_cndmask_b32_e64
|
|
|
|
; CHECK: v_cndmask_b32_e64
|
2017-12-07 01:40:09 +08:00
|
|
|
|
|
|
|
; CHECK: [[ENDBB]]:
|
|
|
|
; CHECK: buffer_store_dword
|
|
|
|
; CHECK: s_endpgm
|
|
|
|
|
|
|
|
define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 {
|
|
|
|
bb:
|
|
|
|
%tmp = icmp eq i32 %arg, 0
|
|
|
|
br i1 %tmp, label %bb1, label %bb4
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
%tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
|
|
|
|
%tmp3 = extractelement <4 x float> %tmp2, i32 undef
|
|
|
|
call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out
|
|
|
|
br label %bb7
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
%tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
|
|
|
|
%tmp6 = extractelement <4 x float> %tmp5, i32 undef
|
|
|
|
call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out
|
|
|
|
br label %bb7
|
|
|
|
|
|
|
|
bb7:
|
|
|
|
%tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ]
|
|
|
|
store volatile float %tmp8, float addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|