forked from OSchip/llvm-project
AMDGPU/GlobalISel: Improve private addressing mode matching
This enables the look-through-copy to hack around not correctly regbankselecting constants to match the use bank.
This commit is contained in:
parent
cf5ecd5644
commit
70cb57d7da
|
@ -3712,23 +3712,19 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
|
|||
Optional<int> FI;
|
||||
Register VAddr = Root.getReg();
|
||||
if (const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg())) {
|
||||
if (isBaseWithConstantOffset(Root, *MRI)) {
|
||||
const MachineOperand &LHS = RootDef->getOperand(1);
|
||||
const MachineOperand &RHS = RootDef->getOperand(2);
|
||||
const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg());
|
||||
const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg());
|
||||
if (LHSDef && RHSDef) {
|
||||
int64_t PossibleOffset =
|
||||
RHSDef->getOperand(1).getCImm()->getSExtValue();
|
||||
if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) &&
|
||||
(!STI.privateMemoryResourceIsRangeChecked() ||
|
||||
KnownBits->signBitIsZero(LHS.getReg()))) {
|
||||
if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
|
||||
FI = LHSDef->getOperand(1).getIndex();
|
||||
else
|
||||
VAddr = LHS.getReg();
|
||||
Offset = PossibleOffset;
|
||||
}
|
||||
Register PtrBase;
|
||||
int64_t ConstOffset;
|
||||
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
|
||||
if (ConstOffset != 0) {
|
||||
if (SIInstrInfo::isLegalMUBUFImmOffset(ConstOffset) &&
|
||||
(!STI.privateMemoryResourceIsRangeChecked() ||
|
||||
KnownBits->signBitIsZero(PtrBase))) {
|
||||
const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
|
||||
if (PtrBaseDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
|
||||
FI = PtrBaseDef->getOperand(1).getIndex();
|
||||
else
|
||||
VAddr = PtrBase;
|
||||
Offset = ConstOffset;
|
||||
}
|
||||
} else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
|
||||
FI = RootDef->getOperand(1).getIndex();
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -10,15 +10,15 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
|
|||
; GCN-NEXT: s_add_u32 s0, s0, s7
|
||||
; GCN-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0
|
||||
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
|
||||
; GCN-NEXT: v_mov_b32_e32 v16, 0x100
|
||||
; GCN-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GCN-NEXT: v_add_u32_e32 v31, 64, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v16, 0x100
|
||||
; GCN-NEXT: v_mov_b32_e32 v64, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx16 s[36:51], s[10:11], 0x0
|
||||
; GCN-NEXT: s_load_dwordx16 s[52:67], s[10:11], 0x40
|
||||
; GCN-NEXT: s_load_dwordx16 s[12:27], s[10:11], 0x80
|
||||
; GCN-NEXT: v_add_u32_e32 v32, 0x44, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v33, 0x48, v16
|
||||
; GCN-NEXT: s_and_b32 s4, s7, 63
|
||||
; GCN-NEXT: s_lshl_b32 s4, s4, 2
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s36
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s37
|
||||
|
@ -38,328 +38,217 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
|
|||
; GCN-NEXT: v_mov_b32_e32 v15, s51
|
||||
; GCN-NEXT: s_load_dwordx16 s[36:51], s[10:11], 0xc0
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:256
|
||||
; GCN-NEXT: v_add_u32_e32 v0, 4, v16
|
||||
; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s52
|
||||
; GCN-NEXT: buffer_store_dword v1, v31, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s53
|
||||
; GCN-NEXT: buffer_store_dword v1, v32, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s54
|
||||
; GCN-NEXT: buffer_store_dword v1, v33, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v34, 0x4c, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s55
|
||||
; GCN-NEXT: buffer_store_dword v1, v34, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v35, 0x50, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s56
|
||||
; GCN-NEXT: buffer_store_dword v1, v35, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v36, 0x54, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s57
|
||||
; GCN-NEXT: buffer_store_dword v1, v36, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v37, 0x58, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s58
|
||||
; GCN-NEXT: buffer_store_dword v1, v37, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v38, 0x5c, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s59
|
||||
; GCN-NEXT: buffer_store_dword v1, v38, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v39, 0x60, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s60
|
||||
; GCN-NEXT: buffer_store_dword v1, v39, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v40, 0x64, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s61
|
||||
; GCN-NEXT: buffer_store_dword v1, v40, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v41, 0x68, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s62
|
||||
; GCN-NEXT: buffer_store_dword v1, v41, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v42, 0x6c, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s63
|
||||
; GCN-NEXT: buffer_store_dword v1, v42, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v43, 0x70, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s64
|
||||
; GCN-NEXT: buffer_store_dword v1, v43, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v44, 0x74, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s65
|
||||
; GCN-NEXT: buffer_store_dword v1, v44, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v45, 0x78, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s66
|
||||
; GCN-NEXT: buffer_store_dword v1, v45, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v46, 0x7c, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s67
|
||||
; GCN-NEXT: buffer_store_dword v1, v46, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v47, 0x80, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s12
|
||||
; GCN-NEXT: buffer_store_dword v1, v47, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v48, 0x84, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s13
|
||||
; GCN-NEXT: buffer_store_dword v1, v48, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v49, 0x88, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s14
|
||||
; GCN-NEXT: buffer_store_dword v1, v49, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v50, 0x8c, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s15
|
||||
; GCN-NEXT: buffer_store_dword v1, v50, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v51, 0x90, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GCN-NEXT: buffer_store_dword v1, v51, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v52, 0x94, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s17
|
||||
; GCN-NEXT: buffer_store_dword v1, v52, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v53, 0x98, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s18
|
||||
; GCN-NEXT: buffer_store_dword v1, v53, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v54, 0x9c, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s19
|
||||
; GCN-NEXT: buffer_store_dword v1, v54, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v55, 0xa0, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s20
|
||||
; GCN-NEXT: buffer_store_dword v1, v55, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v56, 0xa4, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s21
|
||||
; GCN-NEXT: buffer_store_dword v1, v56, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v57, 0xa8, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s22
|
||||
; GCN-NEXT: buffer_store_dword v1, v57, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v58, 0xac, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s23
|
||||
; GCN-NEXT: buffer_store_dword v1, v58, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v59, 0xb0, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s24
|
||||
; GCN-NEXT: buffer_store_dword v1, v59, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v60, 0xb4, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s25
|
||||
; GCN-NEXT: buffer_store_dword v1, v60, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v61, 0xb8, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s26
|
||||
; GCN-NEXT: buffer_store_dword v1, v61, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v62, 0xbc, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s27
|
||||
; GCN-NEXT: buffer_store_dword v1, v62, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v63, 0xc0, v16
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:260
|
||||
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:264
|
||||
; GCN-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:268
|
||||
; GCN-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:272
|
||||
; GCN-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:276
|
||||
; GCN-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:280
|
||||
; GCN-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
|
||||
; GCN-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:288
|
||||
; GCN-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:292
|
||||
; GCN-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:296
|
||||
; GCN-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:300
|
||||
; GCN-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:304
|
||||
; GCN-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:308
|
||||
; GCN-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:312
|
||||
; GCN-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:316
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s52
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:320
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s53
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:324
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s54
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:328
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s55
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:332
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s56
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:336
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s57
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:340
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s58
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:344
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s59
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:348
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s60
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:352
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s61
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:356
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s62
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:360
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s63
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:364
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s64
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:368
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s65
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:372
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s66
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:376
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s67
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:380
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:384
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s13
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:388
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s14
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:392
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s15
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:396
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s16
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:400
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s17
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:404
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s18
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:408
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s19
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:412
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s20
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:416
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s21
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:420
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s22
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:424
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s23
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:428
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s24
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:432
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s25
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:436
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s26
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:440
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s27
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:444
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s36
|
||||
; GCN-NEXT: buffer_store_dword v1, v63, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v64, 0xc4, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s37
|
||||
; GCN-NEXT: buffer_store_dword v1, v64, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v65, 0xc8, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s38
|
||||
; GCN-NEXT: buffer_store_dword v1, v65, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v66, 0xcc, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s39
|
||||
; GCN-NEXT: buffer_store_dword v1, v66, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v67, 0xd0, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s40
|
||||
; GCN-NEXT: buffer_store_dword v1, v67, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v68, 0xd4, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s41
|
||||
; GCN-NEXT: buffer_store_dword v1, v68, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v69, 0xd8, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s42
|
||||
; GCN-NEXT: buffer_store_dword v1, v69, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v70, 0xdc, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s43
|
||||
; GCN-NEXT: buffer_store_dword v1, v70, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v71, 0xe0, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s44
|
||||
; GCN-NEXT: buffer_store_dword v1, v71, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v72, 0xe4, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s45
|
||||
; GCN-NEXT: buffer_store_dword v1, v72, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v73, 0xe8, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s46
|
||||
; GCN-NEXT: buffer_store_dword v1, v73, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v74, 0xec, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s47
|
||||
; GCN-NEXT: buffer_store_dword v1, v74, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v75, 0xf0, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s48
|
||||
; GCN-NEXT: buffer_store_dword v1, v75, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v76, 0xf4, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s49
|
||||
; GCN-NEXT: s_and_b32 s4, s7, 63
|
||||
; GCN-NEXT: buffer_store_dword v1, v76, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v77, 0xf8, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s50
|
||||
; GCN-NEXT: v_add_u32_e32 v17, 8, v16
|
||||
; GCN-NEXT: buffer_store_dword v1, v77, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v78, 0xfc, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s51
|
||||
; GCN-NEXT: s_lshl_b32 s4, s4, 2
|
||||
; GCN-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v18, 12, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v19, 16, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v20, 20, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v21, 24, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v22, 28, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v23, 32, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v24, 36, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v25, 40, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v26, 44, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v27, 48, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v28, 52, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v29, 56, v16
|
||||
; GCN-NEXT: v_add_u32_e32 v30, 60, v16
|
||||
; GCN-NEXT: buffer_store_dword v1, v78, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_add_u32_e32 v1, s4, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GCN-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v4, v19, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v5, v20, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v6, v21, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v7, v22, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v8, v23, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v9, v24, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v10, v25, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v11, v26, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v12, v27, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v13, v28, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v14, v29, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v15, v30, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
|
||||
; GCN-NEXT: s_nop 0
|
||||
; GCN-NEXT: buffer_load_dword v2, v17, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v3, v18, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v4, v19, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v5, v20, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v6, v21, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v7, v22, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v8, v23, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v9, v24, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v10, v25, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v11, v26, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v12, v27, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v13, v28, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v14, v29, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v15, v30, s[0:3], 0 offen
|
||||
; GCN-NEXT: ; kill: killed $vgpr30
|
||||
; GCN-NEXT: ; kill: killed $vgpr19
|
||||
; GCN-NEXT: ; kill: killed $vgpr23
|
||||
; GCN-NEXT: ; kill: killed $vgpr27
|
||||
; GCN-NEXT: ; kill: killed $vgpr20
|
||||
; GCN-NEXT: ; kill: killed $vgpr24
|
||||
; GCN-NEXT: ; kill: killed $vgpr28
|
||||
; GCN-NEXT: ; kill: killed $vgpr17
|
||||
; GCN-NEXT: ; kill: killed $vgpr21
|
||||
; GCN-NEXT: ; kill: killed $vgpr25
|
||||
; GCN-NEXT: ; kill: killed $vgpr0
|
||||
; GCN-NEXT: ; kill: killed $vgpr29
|
||||
; GCN-NEXT: ; kill: killed $vgpr18
|
||||
; GCN-NEXT: ; kill: killed $vgpr22
|
||||
; GCN-NEXT: ; kill: killed $vgpr26
|
||||
; GCN-NEXT: buffer_load_dword v16, v31, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v17, v32, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v18, v33, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v19, v34, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v20, v35, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v21, v36, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v22, v37, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v23, v38, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v24, v39, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v25, v40, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v26, v41, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v27, v42, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v28, v43, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v29, v44, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v30, v45, s[0:3], 0 offen
|
||||
; GCN-NEXT: ; kill: killed $vgpr45
|
||||
; GCN-NEXT: ; kill: killed $vgpr34
|
||||
; GCN-NEXT: ; kill: killed $vgpr38
|
||||
; GCN-NEXT: ; kill: killed $vgpr42
|
||||
; GCN-NEXT: ; kill: killed $vgpr31
|
||||
; GCN-NEXT: ; kill: killed $vgpr35
|
||||
; GCN-NEXT: ; kill: killed $vgpr39
|
||||
; GCN-NEXT: ; kill: killed $vgpr43
|
||||
; GCN-NEXT: ; kill: killed $vgpr32
|
||||
; GCN-NEXT: ; kill: killed $vgpr36
|
||||
; GCN-NEXT: ; kill: killed $vgpr40
|
||||
; GCN-NEXT: ; kill: killed $vgpr44
|
||||
; GCN-NEXT: ; kill: killed $vgpr33
|
||||
; GCN-NEXT: ; kill: killed $vgpr37
|
||||
; GCN-NEXT: ; kill: killed $vgpr41
|
||||
; GCN-NEXT: buffer_load_dword v31, v46, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v32, v47, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v33, v48, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v34, v49, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v35, v50, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v36, v51, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v37, v52, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v38, v53, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v39, v54, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v40, v55, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v41, v56, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v42, v57, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v43, v58, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v44, v59, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v45, v60, s[0:3], 0 offen
|
||||
; GCN-NEXT: ; kill: killed $vgpr60
|
||||
; GCN-NEXT: ; kill: killed $vgpr49
|
||||
; GCN-NEXT: ; kill: killed $vgpr53
|
||||
; GCN-NEXT: ; kill: killed $vgpr57
|
||||
; GCN-NEXT: ; kill: killed $vgpr46
|
||||
; GCN-NEXT: ; kill: killed $vgpr50
|
||||
; GCN-NEXT: ; kill: killed $vgpr54
|
||||
; GCN-NEXT: ; kill: killed $vgpr58
|
||||
; GCN-NEXT: ; kill: killed $vgpr47
|
||||
; GCN-NEXT: ; kill: killed $vgpr51
|
||||
; GCN-NEXT: ; kill: killed $vgpr55
|
||||
; GCN-NEXT: ; kill: killed $vgpr59
|
||||
; GCN-NEXT: ; kill: killed $vgpr48
|
||||
; GCN-NEXT: ; kill: killed $vgpr52
|
||||
; GCN-NEXT: ; kill: killed $vgpr56
|
||||
; GCN-NEXT: buffer_load_dword v46, v61, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v47, v62, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v48, v63, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v49, v64, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v50, v65, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v51, v66, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v52, v67, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v53, v68, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v54, v69, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v55, v70, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v56, v71, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v57, v72, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v58, v73, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v59, v74, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v60, v75, s[0:3], 0 offen
|
||||
; GCN-NEXT: ; kill: killed $vgpr64
|
||||
; GCN-NEXT: ; kill: killed $vgpr68
|
||||
; GCN-NEXT: ; kill: killed $vgpr72
|
||||
; GCN-NEXT: ; kill: killed $vgpr61
|
||||
; GCN-NEXT: ; kill: killed $vgpr65
|
||||
; GCN-NEXT: ; kill: killed $vgpr69
|
||||
; GCN-NEXT: ; kill: killed $vgpr73
|
||||
; GCN-NEXT: ; kill: killed $vgpr62
|
||||
; GCN-NEXT: ; kill: killed $vgpr66
|
||||
; GCN-NEXT: ; kill: killed $vgpr70
|
||||
; GCN-NEXT: ; kill: killed $vgpr74
|
||||
; GCN-NEXT: ; kill: killed $vgpr63
|
||||
; GCN-NEXT: ; kill: killed $vgpr67
|
||||
; GCN-NEXT: ; kill: killed $vgpr71
|
||||
; GCN-NEXT: ; kill: killed $vgpr75
|
||||
; GCN-NEXT: buffer_load_dword v61, v76, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v62, v77, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v63, v78, s[0:3], 0 offen
|
||||
; GCN-NEXT: ; kill: killed $vgpr76
|
||||
; GCN-NEXT: ; kill: killed $vgpr77
|
||||
; GCN-NEXT: ; kill: killed $vgpr78
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s36
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:448
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s37
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:452
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s38
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:456
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s39
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:460
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s40
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:464
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s41
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:468
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s42
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:472
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s43
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:476
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s44
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:480
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s45
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:484
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s46
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:488
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s47
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:492
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s48
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:496
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s49
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:500
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s50
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:504
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s51
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:508
|
||||
; GCN-NEXT: v_add_u32_e32 v0, s4, v16
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:256
|
||||
; GCN-NEXT: v_mov_b32_e32 v64, 0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_nop 0
|
||||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:260
|
||||
; GCN-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:264
|
||||
; GCN-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:268
|
||||
; GCN-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:272
|
||||
; GCN-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:276
|
||||
; GCN-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:280
|
||||
; GCN-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:284
|
||||
; GCN-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:288
|
||||
; GCN-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:292
|
||||
; GCN-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:296
|
||||
; GCN-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:300
|
||||
; GCN-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:304
|
||||
; GCN-NEXT: buffer_load_dword v13, off, s[0:3], 0 offset:308
|
||||
; GCN-NEXT: buffer_load_dword v14, off, s[0:3], 0 offset:312
|
||||
; GCN-NEXT: buffer_load_dword v15, off, s[0:3], 0 offset:316
|
||||
; GCN-NEXT: buffer_load_dword v16, off, s[0:3], 0 offset:320
|
||||
; GCN-NEXT: buffer_load_dword v17, off, s[0:3], 0 offset:324
|
||||
; GCN-NEXT: buffer_load_dword v18, off, s[0:3], 0 offset:328
|
||||
; GCN-NEXT: buffer_load_dword v19, off, s[0:3], 0 offset:332
|
||||
; GCN-NEXT: buffer_load_dword v20, off, s[0:3], 0 offset:336
|
||||
; GCN-NEXT: buffer_load_dword v21, off, s[0:3], 0 offset:340
|
||||
; GCN-NEXT: buffer_load_dword v22, off, s[0:3], 0 offset:344
|
||||
; GCN-NEXT: buffer_load_dword v23, off, s[0:3], 0 offset:348
|
||||
; GCN-NEXT: buffer_load_dword v24, off, s[0:3], 0 offset:352
|
||||
; GCN-NEXT: buffer_load_dword v25, off, s[0:3], 0 offset:356
|
||||
; GCN-NEXT: buffer_load_dword v26, off, s[0:3], 0 offset:360
|
||||
; GCN-NEXT: buffer_load_dword v27, off, s[0:3], 0 offset:364
|
||||
; GCN-NEXT: buffer_load_dword v28, off, s[0:3], 0 offset:368
|
||||
; GCN-NEXT: buffer_load_dword v29, off, s[0:3], 0 offset:372
|
||||
; GCN-NEXT: buffer_load_dword v30, off, s[0:3], 0 offset:376
|
||||
; GCN-NEXT: buffer_load_dword v31, off, s[0:3], 0 offset:380
|
||||
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], 0 offset:384
|
||||
; GCN-NEXT: buffer_load_dword v33, off, s[0:3], 0 offset:388
|
||||
; GCN-NEXT: buffer_load_dword v34, off, s[0:3], 0 offset:392
|
||||
; GCN-NEXT: buffer_load_dword v35, off, s[0:3], 0 offset:396
|
||||
; GCN-NEXT: buffer_load_dword v36, off, s[0:3], 0 offset:400
|
||||
; GCN-NEXT: buffer_load_dword v37, off, s[0:3], 0 offset:404
|
||||
; GCN-NEXT: buffer_load_dword v38, off, s[0:3], 0 offset:408
|
||||
; GCN-NEXT: buffer_load_dword v39, off, s[0:3], 0 offset:412
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], 0 offset:416
|
||||
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], 0 offset:420
|
||||
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], 0 offset:424
|
||||
; GCN-NEXT: buffer_load_dword v43, off, s[0:3], 0 offset:428
|
||||
; GCN-NEXT: buffer_load_dword v44, off, s[0:3], 0 offset:432
|
||||
; GCN-NEXT: buffer_load_dword v45, off, s[0:3], 0 offset:436
|
||||
; GCN-NEXT: buffer_load_dword v46, off, s[0:3], 0 offset:440
|
||||
; GCN-NEXT: buffer_load_dword v47, off, s[0:3], 0 offset:444
|
||||
; GCN-NEXT: buffer_load_dword v48, off, s[0:3], 0 offset:448
|
||||
; GCN-NEXT: buffer_load_dword v49, off, s[0:3], 0 offset:452
|
||||
; GCN-NEXT: buffer_load_dword v50, off, s[0:3], 0 offset:456
|
||||
; GCN-NEXT: buffer_load_dword v51, off, s[0:3], 0 offset:460
|
||||
; GCN-NEXT: buffer_load_dword v52, off, s[0:3], 0 offset:464
|
||||
; GCN-NEXT: buffer_load_dword v53, off, s[0:3], 0 offset:468
|
||||
; GCN-NEXT: buffer_load_dword v54, off, s[0:3], 0 offset:472
|
||||
; GCN-NEXT: buffer_load_dword v55, off, s[0:3], 0 offset:476
|
||||
; GCN-NEXT: buffer_load_dword v56, off, s[0:3], 0 offset:480
|
||||
; GCN-NEXT: buffer_load_dword v57, off, s[0:3], 0 offset:484
|
||||
; GCN-NEXT: buffer_load_dword v58, off, s[0:3], 0 offset:488
|
||||
; GCN-NEXT: buffer_load_dword v59, off, s[0:3], 0 offset:492
|
||||
; GCN-NEXT: buffer_load_dword v60, off, s[0:3], 0 offset:496
|
||||
; GCN-NEXT: buffer_load_dword v61, off, s[0:3], 0 offset:500
|
||||
; GCN-NEXT: buffer_load_dword v62, off, s[0:3], 0 offset:504
|
||||
; GCN-NEXT: buffer_load_dword v63, off, s[0:3], 0 offset:508
|
||||
; GCN-NEXT: s_waitcnt vmcnt(60)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[8:9]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(57)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[8:9] offset:16
|
||||
; GCN-NEXT: s_waitcnt vmcnt(54)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[8:9] offset:32
|
||||
; GCN-NEXT: s_waitcnt vmcnt(51)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[8:9] offset:48
|
||||
; GCN-NEXT: s_waitcnt vmcnt(48)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[8:9] offset:64
|
||||
; GCN-NEXT: s_waitcnt vmcnt(45)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[8:9] offset:80
|
||||
; GCN-NEXT: s_waitcnt vmcnt(42)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[8:9] offset:96
|
||||
; GCN-NEXT: s_waitcnt vmcnt(39)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[8:9] offset:112
|
||||
; GCN-NEXT: s_waitcnt vmcnt(36)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[8:9] offset:128
|
||||
; GCN-NEXT: s_waitcnt vmcnt(33)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[8:9] offset:144
|
||||
; GCN-NEXT: s_waitcnt vmcnt(30)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[8:9] offset:160
|
||||
; GCN-NEXT: s_waitcnt vmcnt(27)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[44:47], s[8:9] offset:176
|
||||
; GCN-NEXT: s_waitcnt vmcnt(24)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[48:51], s[8:9] offset:192
|
||||
; GCN-NEXT: s_waitcnt vmcnt(21)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[8:9] offset:208
|
||||
; GCN-NEXT: s_waitcnt vmcnt(18)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[8:9] offset:224
|
||||
; GCN-NEXT: s_waitcnt vmcnt(15)
|
||||
; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[8:9] offset:240
|
||||
; GCN-NEXT: s_endpgm
|
||||
%vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
|
||||
|
|
|
@ -782,7 +782,6 @@ body: |
|
|||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_s32_from_1_fi_offset_4095
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
@ -810,6 +809,36 @@ body: |
|
|||
|
||||
...
|
||||
|
||||
# Have to hack around the copy of the constant to VGPR
|
||||
---
|
||||
name: load_private_s32_from_1_fi_offset_sgpr_4095
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
stack:
|
||||
- { id: 0, size: 4096, alignment: 4 }
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:sgpr(s32) = G_CONSTANT i32 4095
|
||||
%2:vgpr(s32) = COPY %1
|
||||
%3:vgpr(p5) = G_PTR_ADD %0, %2
|
||||
%4:vgpr(s32) = G_LOAD %3 :: (load 1, align 1, addrspace 5)
|
||||
$vgpr0 = COPY %4
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_s32_from_1_fi_offset_4096
|
||||
|
|
|
@ -32,16 +32,14 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
|
||||
; GCN-NEXT: s_load_dword s8, s[4:5], 0x10
|
||||
; GCN-NEXT: s_add_u32 s4, s32, 0x1000
|
||||
; GCN-NEXT: s_add_u32 s5, s4, 4
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_lshl_b32 s5, s8, 2
|
||||
; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 1
|
||||
; GCN-NEXT: s_add_u32 s4, s4, s5
|
||||
; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -103,16 +101,14 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
|||
; GCN-NEXT: s_load_dword s8, s[4:5], 0xc
|
||||
; GCN-NEXT: s_add_u32 s4, s32, 0x1000
|
||||
; GCN-NEXT: s_and_b32 s4, s4, 0xfffff000
|
||||
; GCN-NEXT: s_add_u32 s5, s4, 4
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_lshl_b32 s5, s8, 2
|
||||
; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 1
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 1
|
||||
; GCN-NEXT: s_add_u32 s4, s4, s5
|
||||
; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -156,7 +152,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
|||
; GCN-LABEL: func_non_entry_block_static_alloca_align4:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s8, s33
|
||||
; GCN-NEXT: s_mov_b32 s7, s33
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
|
@ -170,11 +166,9 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
|||
; GCN-NEXT: s_add_u32 s6, s32, 0x1000
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s6
|
||||
; GCN-NEXT: s_add_u32 s7, s6, 4
|
||||
; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 1
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:4
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v4
|
||||
; GCN-NEXT: v_add_u32_e32 v2, s6, v2
|
||||
; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
||||
|
@ -188,7 +182,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
|||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_mov_b32 s33, s8
|
||||
; GCN-NEXT: s_mov_b32 s33, s7
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
|
||||
entry:
|
||||
|
@ -222,7 +216,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
|||
; GCN-LABEL: func_non_entry_block_static_alloca_align64:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s8, s33
|
||||
; GCN-NEXT: s_mov_b32 s7, s33
|
||||
; GCN-NEXT: s_add_u32 s33, s32, 0xfc0
|
||||
; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
|
@ -232,13 +226,11 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
|||
; GCN-NEXT: ; %bb.1: ; %bb.0
|
||||
; GCN-NEXT: s_add_u32 s6, s32, 0x1000
|
||||
; GCN-NEXT: s_and_b32 s6, s6, 0xfffff000
|
||||
; GCN-NEXT: s_add_u32 s7, s6, 4
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s6
|
||||
; GCN-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 1
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s7
|
||||
; GCN-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
|
||||
; GCN-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen offset:4
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v3
|
||||
; GCN-NEXT: v_add_u32_e32 v2, s6, v2
|
||||
; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
||||
|
@ -252,7 +244,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
|||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x2000
|
||||
; GCN-NEXT: s_mov_b32 s33, s8
|
||||
; GCN-NEXT: s_mov_b32 s33, s7
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%cond = icmp eq i32 %arg.cond, 0
|
||||
|
|
Loading…
Reference in New Issue