AMDGPU: Allow getMemOperandWithOffset to analyze stack accesses

Report soffset as a base register if the scratch resource can be
ignored.

llvm-svn: 371149
This commit is contained in:
Matt Arsenault 2019-09-05 23:54:35 +00:00
parent 59ff77ee38
commit 60c8b8bcf2
6 changed files with 60 additions and 45 deletions

View File

@ -318,9 +318,26 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
if (isMUBUF(LdSt) || isMTBUF(LdSt)) { if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset); const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
if (SOffset && SOffset->isReg()) if (SOffset && SOffset->isReg()) {
// We can only handle this if it's a stack access, as any other resource
// would require reporting multiple base registers.
const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (AddrReg && !AddrReg->isFI())
return false; return false;
const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
const SIMachineFunctionInfo *MFI
= LdSt.getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
if (RSrc->getReg() != MFI->getScratchRSrcReg())
return false;
const MachineOperand *OffsetImm =
getNamedOperand(LdSt, AMDGPU::OpName::offset);
BaseOp = SOffset;
Offset = OffsetImm->getImm();
return true;
}
const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (!AddrReg) if (!AddrReg)
return false; return false;

View File

@ -133,10 +133,10 @@ entry:
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28
; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
@ -263,10 +263,10 @@ entry:
; GCN-NOT: s_add_u32 s32, s32, 0x800 ; GCN-NOT: s_add_u32 s32, s32, 0x800
; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}}
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}}
; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
@ -331,10 +331,11 @@ entry:
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28
; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
; GCN: s_waitcnt vmcnt(0) ; GCN: s_waitcnt vmcnt(0)
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20

View File

@ -765,17 +765,16 @@ entry:
; GCN-LABEL: {{^}}tail_call_byval_align16: ; GCN-LABEL: {{^}}tail_call_byval_align16:
; GCN-NOT: s32 ; GCN-NOT: s32
; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GCN-NOT: buffer_store_dword v33
; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; GCN-NOT: buffer_store_dword v33
; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:20 ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:20
; GCN: s_getpc_b64 ; GCN: s_getpc_b64
; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4
; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16
; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GCN-NOT: s32 ; GCN-NOT: s32
; GCN: s_setpc_b64 ; GCN: s_setpc_b64
define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
@ -806,12 +805,12 @@ entry:
; GCN-LABEL: {{^}}stack_12xv3i32: ; GCN-LABEL: {{^}}stack_12xv3i32:
; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
; GCN: buffer_store_dword [[REG12]], {{.*$}}
; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
; GCN: buffer_store_dword [[REG12]], {{.*$}}
; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12
; GCN: v_mov_b32_e32 v31, 11 ; GCN: v_mov_b32_e32 v31, 11
; GCN: s_getpc ; GCN: s_getpc
@ -835,12 +834,12 @@ entry:
; GCN-LABEL: {{^}}stack_12xv3f32: ; GCN-LABEL: {{^}}stack_12xv3f32:
; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
; GCN: buffer_store_dword [[REG12]], {{.*$}}
; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
; GCN: buffer_store_dword [[REG12]], {{.*$}}
; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12
; GCN: v_mov_b32_e32 v31, 0x41300000 ; GCN: v_mov_b32_e32 v31, 0x41300000
; GCN: s_getpc ; GCN: s_getpc
@ -865,20 +864,20 @@ entry:
; GCN-LABEL: {{^}}stack_8xv5i32: ; GCN-LABEL: {{^}}stack_8xv5i32:
; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 8
; GCN: buffer_store_dword [[REG8]], {{.*$}}
; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 ; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9
; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 ; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10
; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 ; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11
; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
; GCN: buffer_store_dword [[REG8]], {{.*$}}
; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16
; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28
; GCN: v_mov_b32_e32 v31, 7 ; GCN: v_mov_b32_e32 v31, 7
@ -899,20 +898,20 @@ entry:
; GCN-LABEL: {{^}}stack_8xv5f32: ; GCN-LABEL: {{^}}stack_8xv5f32:
; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 0x41000000 ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 0x41000000
; GCN: buffer_store_dword [[REG8]], {{.*$}}
; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000 ; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000
; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000 ; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000
; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000 ; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000
; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
; GCN: buffer_store_dword [[REG8]], {{.*$}}
; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16
; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28
; GCN: v_mov_b32_e32 v31, 0x40e00000 ; GCN: v_mov_b32_e32 v31, 0x40e00000

View File

@ -489,16 +489,15 @@ define void @too_many_args_use_workitem_id_x_byval(
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval: ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
; GCN: enable_vgpr_workitem_id = 0 ; GCN: enable_vgpr_workitem_id = 0
; GCN-DAG: s_mov_b32 s33, s7
; GCN: s_mov_b32 s33, s7 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4
; GCN: s_add_u32 s32, s33, 0x400{{$}} ; GCN: s_add_u32 s32, s33, 0x400{{$}}
; GCN-NOT: s32 ; GCN-NOT: s32
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4
; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]], ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
; GCN: s_swappc_b64 ; GCN: s_swappc_b64
@ -521,9 +520,8 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval: ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
; GCN: buffer_store_dword [[K]], off, s[0:3], s34{{$}} ; GCN: buffer_store_dword [[K]], off, s[0:3], s34{{$}}
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s34{{$}} ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s34{{$}}
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]], ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
; GCN: s_swappc_b64 ; GCN: s_swappc_b64

View File

@ -231,10 +231,10 @@ declare void @func(<4 x float> addrspace(5)* nocapture) #0
; GCN-LABEL: {{^}}undefined_stack_store_reg: ; GCN-LABEL: {{^}}undefined_stack_store_reg:
; GCN: s_and_saveexec_b64 ; GCN: s_and_saveexec_b64
; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset: ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset:
; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 { define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 {
bb: bb:
%tmp = alloca <4 x float>, align 16, addrspace(5) %tmp = alloca <4 x float>, align 16, addrspace(5)

View File

@ -1650,10 +1650,10 @@ define amdgpu_kernel void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)
; SI-NEXT: v_mov_b32_e32 v13, s21 ; SI-NEXT: v_mov_b32_e32 v13, s21
; SI-NEXT: v_mov_b32_e32 v14, s22 ; SI-NEXT: v_mov_b32_e32 v14, s22
; SI-NEXT: v_mov_b32_e32 v15, s23 ; SI-NEXT: v_mov_b32_e32 v15, s23
; SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112
; SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], s7 offset:96 ; SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], s7 offset:96
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80 ; SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], s7 offset:64 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], s7 offset:64
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80
; SI-NEXT: v_or_b32_e32 v16, s4, v16 ; SI-NEXT: v_or_b32_e32 v16, s4, v16
; SI-NEXT: v_mov_b32_e32 v0, 0 ; SI-NEXT: v_mov_b32_e32 v0, 0
; SI-NEXT: v_mov_b32_e32 v1, 0x40200000 ; SI-NEXT: v_mov_b32_e32 v1, 0x40200000
@ -1696,10 +1696,10 @@ define amdgpu_kernel void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)
; VI-NEXT: v_mov_b32_e32 v13, s21 ; VI-NEXT: v_mov_b32_e32 v13, s21
; VI-NEXT: v_mov_b32_e32 v14, s22 ; VI-NEXT: v_mov_b32_e32 v14, s22
; VI-NEXT: v_mov_b32_e32 v15, s23 ; VI-NEXT: v_mov_b32_e32 v15, s23
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], s7 offset:96 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], s7 offset:96
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], s7 offset:64 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], s7 offset:64
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80
; VI-NEXT: v_or_b32_e32 v16, s4, v16 ; VI-NEXT: v_or_b32_e32 v16, s4, v16
; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v0, 0
; VI-NEXT: v_mov_b32_e32 v1, 0x40200000 ; VI-NEXT: v_mov_b32_e32 v1, 0x40200000