forked from OSchip/llvm-project
[AMDGPU] Extend SILoadStoreOptimizer to handle global saddr loads
This adds handling of the _SADDR forms to the GLOBAL_LOAD combining. TODO: merge global stores. TODO: merge flat load/stores. TODO: merge flat with global promoting to flat. Differential Revision: https://reviews.llvm.org/D120285
This commit is contained in:
parent
f4e9df22b5
commit
9e055c0fff
|
@ -79,7 +79,8 @@ enum InstClassEnum {
|
|||
MIMG,
|
||||
TBUFFER_LOAD,
|
||||
TBUFFER_STORE,
|
||||
GLOBAL_LOAD
|
||||
GLOBAL_LOAD,
|
||||
GLOBAL_LOAD_SADDR
|
||||
};
|
||||
|
||||
struct AddressRegs {
|
||||
|
@ -87,6 +88,7 @@ struct AddressRegs {
|
|||
bool SBase = false;
|
||||
bool SRsrc = false;
|
||||
bool SOffset = false;
|
||||
bool SAddr = false;
|
||||
bool VAddr = false;
|
||||
bool Addr = false;
|
||||
bool SSamp = false;
|
||||
|
@ -305,14 +307,18 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
|
|||
switch (Opc) {
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORD:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
|
||||
return 1;
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX2:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
|
||||
return 2;
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX3:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
|
||||
return 3;
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX4:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
|
||||
return 4;
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
|
||||
return 8;
|
||||
|
@ -402,6 +408,11 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
|
|||
case AMDGPU::GLOBAL_LOAD_DWORDX3:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX4:
|
||||
return GLOBAL_LOAD;
|
||||
case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
|
||||
return GLOBAL_LOAD_SADDR;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -440,6 +451,11 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
|
|||
case AMDGPU::GLOBAL_LOAD_DWORDX3:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX4:
|
||||
return AMDGPU::GLOBAL_LOAD_DWORD;
|
||||
case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
|
||||
return AMDGPU::GLOBAL_LOAD_DWORD_SADDR;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -502,6 +518,12 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
|
|||
case AMDGPU::DS_WRITE_B64_gfx9:
|
||||
Result.Addr = true;
|
||||
return Result;
|
||||
case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
|
||||
Result.SAddr = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AMDGPU::GLOBAL_LOAD_DWORD:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX2:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX3:
|
||||
|
@ -579,6 +601,9 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
|
|||
if (Regs.SOffset)
|
||||
AddrIdx[NumAddresses++] =
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset);
|
||||
if (Regs.SAddr)
|
||||
AddrIdx[NumAddresses++] =
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
|
||||
if (Regs.VAddr)
|
||||
AddrIdx[NumAddresses++] =
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
|
||||
|
@ -1402,6 +1427,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalLoadPair(
|
|||
|
||||
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg);
|
||||
|
||||
if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr))
|
||||
MIB.add(*SAddr);
|
||||
|
||||
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
|
||||
const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
|
||||
|
||||
|
@ -1471,6 +1499,17 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
|
|||
case 4:
|
||||
return AMDGPU::GLOBAL_LOAD_DWORDX4;
|
||||
}
|
||||
case GLOBAL_LOAD_SADDR:
|
||||
switch (Width) {
|
||||
default:
|
||||
return 0;
|
||||
case 2:
|
||||
return AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR;
|
||||
case 3:
|
||||
return AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR;
|
||||
case 4:
|
||||
return AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR;
|
||||
}
|
||||
case MIMG:
|
||||
assert((countPopulation(CI.DMask | Paired.DMask) == Width) &&
|
||||
"No overlaps");
|
||||
|
@ -2115,6 +2154,7 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
|
|||
OptimizeListAgain |= CI.Width + Paired.Width < 4;
|
||||
break;
|
||||
case GLOBAL_LOAD:
|
||||
case GLOBAL_LOAD_SADDR:
|
||||
NewMI = mergeGlobalLoadPair(CI, Paired, Where->I);
|
||||
OptimizeListAgain |= CI.Width + Paired.Width < 4;
|
||||
break;
|
||||
|
|
|
@ -228,3 +228,172 @@ body: |
|
|||
%2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
S_NOP 0, implicit %1, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
name: merge_global_load_dword_saddr_2
|
||||
body: |
|
||||
bb.0.entry:
|
||||
|
||||
; GCN-LABEL: name: merge_global_load_dword_saddr_2
|
||||
; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s64) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
|
||||
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
|
||||
; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]]
|
||||
%0:sreg_64_xexec = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
S_NOP 0, implicit %2, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: merge_global_load_dword_saddr_3
|
||||
body: |
|
||||
bb.0.entry:
|
||||
|
||||
; GCN-LABEL: name: merge_global_load_dword_saddr_3
|
||||
; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR [[DEF]], [[DEF1]], 0, 1, implicit $exec :: (load (s96) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0_sub1
|
||||
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX3_SADDR]].sub2
|
||||
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1
|
||||
; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]]
|
||||
%0:sreg_64_xexec = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 8, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
S_NOP 0, implicit %2, implicit %3, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: merge_global_load_dword_saddr_4
|
||||
body: |
|
||||
bb.0.entry:
|
||||
|
||||
; GCN-LABEL: name: merge_global_load_dword_saddr_4
|
||||
; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 2, implicit $exec :: (load (s128) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2
|
||||
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub3
|
||||
; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1
|
||||
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2
|
||||
; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
|
||||
; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1
|
||||
; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]]
|
||||
%0:sreg_64_xexec = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 2, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 2, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 8, 2, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 12, 2, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
S_NOP 0, implicit %2, implicit %3, implicit %4, implicit %5
|
||||
...
|
||||
|
||||
---
|
||||
name: merge_global_load_dword_saddr_6
|
||||
body: |
|
||||
bb.0.entry:
|
||||
|
||||
; GCN-LABEL: name: merge_global_load_dword_saddr_6
|
||||
; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 4, 3, implicit $exec :: (load (s128) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2
|
||||
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub3
|
||||
; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1
|
||||
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2
|
||||
; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
|
||||
; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 20, 3, implicit $exec :: (load (s64) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
|
||||
; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
|
||||
; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]]
|
||||
%0:sreg_64_xexec = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 8, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 12, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 16, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%6:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 20, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%7:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 24, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
S_NOP 0, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7
|
||||
...
|
||||
|
||||
---
|
||||
name: merge_global_load_dwordx2_saddr
|
||||
body: |
|
||||
bb.0.entry:
|
||||
|
||||
; GCN-LABEL: name: merge_global_load_dwordx2_saddr
|
||||
; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s128) from `i64 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1
|
||||
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub2_sub3
|
||||
; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]]
|
||||
%0:sreg_64_xexec = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR %0, %1, 0, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%3:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR %0, %1, 8, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
S_NOP 0, implicit %2, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: no_merge_global_load_dword_and_global_load_dword_saddr
|
||||
body: |
|
||||
bb.0.entry:
|
||||
|
||||
; GCN-LABEL: name: no_merge_global_load_dword_and_global_load_dword_saddr
|
||||
; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD]], implicit [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sreg_64_xexec = IMPLICIT_DEF
|
||||
%1:vreg_64_align2 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = GLOBAL_LOAD_DWORD %1, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1.sub0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
S_NOP 0, implicit %2, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: no_merge_global_load_dword_saddr_different_saddr
|
||||
body: |
|
||||
bb.0.entry:
|
||||
|
||||
; GCN-LABEL: name: no_merge_global_load_dword_saddr_different_saddr
|
||||
; GCN: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]].sub0_sub1, [[DEF1]], 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]].sub2_sub3, [[DEF1]], 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[GLOBAL_LOAD_DWORD_SADDR1]]
|
||||
%0:sgpr_128 = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0.sub0_sub1, %1, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0.sub2_sub3, %1, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
S_NOP 0, implicit %2, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: no_merge_global_load_dword_saddr_different_vaddr
|
||||
body: |
|
||||
bb.0.entry:
|
||||
|
||||
; GCN-LABEL: name: no_merge_global_load_dword_saddr_different_vaddr
|
||||
; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub1, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[GLOBAL_LOAD_DWORD_SADDR1]]
|
||||
%0:sreg_64_xexec = IMPLICIT_DEF
|
||||
%1:vreg_64_align2 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1.sub0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1.sub1, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
S_NOP 0, implicit %2, implicit %3
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue