forked from OSchip/llvm-project
[AMDGPU] detect WaW hazards when moving/merging load/store instructions
Summary: In order to combine memory operations efficiently, the load/store optimizer might move some instructions around. It's usually safe to move instructions down past the merged instruction because the pass checks if memory operations can be re-ordered. Though, the current logic doesn't handle Write-after-Write hazards. This fixes a reflection issue with Monster Hunter World and DXVK. v2: - rebased on top of master - clean up the test case - handle WaW hazards correctly Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=40130 Original patch by Samuel Pitoiset. Reviewers: tpr, arsenm, nhaehnle Reviewed By: nhaehnle Subscribers: ronlieb, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye Differential Revision: https://reviews.llvm.org/D61313 llvm-svn: 361008
This commit is contained in:
parent
929af67361
commit
c4bc61bad7
|
@ -281,6 +281,7 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
|
|||
// registers are in SSA form.
|
||||
if (Use.isReg() &&
|
||||
((Use.readsReg() && RegDefs.count(Use.getReg())) ||
|
||||
(Use.isDef() && RegDefs.count(Use.getReg())) ||
|
||||
(Use.isDef() && TargetRegisterInfo::isPhysicalRegister(Use.getReg()) &&
|
||||
PhysRegUses.count(Use.getReg())))) {
|
||||
Insts.push_back(&MI);
|
||||
|
|
|
@ -59,6 +59,11 @@
|
|||
attributes #0 = { convergent nounwind }
|
||||
attributes #1 = { convergent nounwind readnone }
|
||||
|
||||
define amdgpu_kernel void @move_waw_hazards() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { convergent nounwind }
|
||||
...
|
||||
---
|
||||
name: mem_dependency
|
||||
|
@ -129,3 +134,32 @@ body: |
|
|||
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit %6, implicit %7
|
||||
|
||||
...
|
||||
---
|
||||
# Make sure Write-after-Write hazards are correctly detected and the
|
||||
# instructions moved accordingly.
|
||||
# operations.
|
||||
# CHECK-LABEL: name: move_waw_hazards
|
||||
# CHECK: S_AND_B64
|
||||
# CHECK: S_CMP_EQ_U32
|
||||
name: move_waw_hazards
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%3:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%6:sreg_32_xm0_xexec = S_MOV_B32 0
|
||||
%7:sreg_32_xm0 = S_MOV_B32 0
|
||||
%8:sreg_64_xexec = REG_SEQUENCE killed %6, %subreg.sub0, %7, %subreg.sub1
|
||||
%9:sreg_128 = S_LOAD_DWORDX4_IMM killed %8, 0, 0, 0 :: (invariant load 16, addrspace 6)
|
||||
%31:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %9, 0, 0, 0 :: (dereferenceable invariant load 4)
|
||||
%10:sreg_32_xm0_xexec = COPY %31.sub0
|
||||
%11:sreg_32_xm0_xexec = COPY killed %31.sub1
|
||||
%12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 2, 0, 0 :: (dereferenceable invariant load 4)
|
||||
%13:sreg_64 = V_CMP_NE_U32_e64 killed %11, 0, implicit $exec
|
||||
%15:sreg_64 = V_CMP_NE_U32_e64 killed %12, 0, implicit $exec
|
||||
%17:sreg_64_xexec = S_AND_B64 killed %13, killed %15, implicit-def dead $scc
|
||||
S_CMP_EQ_U32 killed %10, 0, implicit-def $scc
|
||||
%18:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 3, 0, 0 :: (dereferenceable invariant load 4)
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue