forked from OSchip/llvm-project
AMDGPU/LoadStoreOptimizer: Set the correct offset whem merging MMOs
Summary: This is a follow up to r367237. MachineFunction::getMachineMemOperand() adds the offset parameter to the existing offset instead of resetting it. So we need to reset the offset to the correct value after calling this function. Reviewers: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65557 llvm-svn: 367881
This commit is contained in:
parent
1a29823b9c
commit
e15d95a987
|
@ -313,7 +313,12 @@ static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
|
|||
const MachineMemOperand *B) {
|
||||
unsigned MinOffset = std::min(A->getOffset(), B->getOffset());
|
||||
unsigned Size = A->getSize() + B->getSize();
|
||||
return MF.getMachineMemOperand(A, MinOffset, Size);
|
||||
// This function adds the offset parameter to the existing offset for A,
|
||||
// so we pass 0 here as the offset and then manually set it to the correct
|
||||
// value after the call.
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size);
|
||||
MMO->setOffset(MinOffset);
|
||||
return MMO;
|
||||
}
|
||||
|
||||
bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
|
||||
|
|
|
@ -65,7 +65,8 @@
|
|||
|
||||
attributes #0 = { convergent nounwind }
|
||||
|
||||
define amdgpu_kernel void @merge_mmos() { ret void }
|
||||
define amdgpu_kernel void @merge_mmos(i32 addrspace(1)* %ptr_addr1) { ret void }
|
||||
|
||||
...
|
||||
---
|
||||
name: mem_dependency
|
||||
|
@ -170,6 +171,8 @@ body: |
|
|||
# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: (dereferenceable invariant load 8, align 4)
|
||||
# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 4)
|
||||
# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 4)
|
||||
# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from %ir.ptr_addr1 + 64, align 4
|
||||
# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.ptr_addr1 + 64, align 4
|
||||
name: merge_mmos
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
|
@ -183,6 +186,11 @@ body: |
|
|||
%4:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 4, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %3, %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %4, %0, 0, 4, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
|
||||
%5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 64)
|
||||
%6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 68, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 68)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %5, %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 64)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %6, %0, 0, 68, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 68)
|
||||
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue