forked from OSchip/llvm-project
Revert "[AMDGPU] Omit unnecessary waitcnt before barriers"
This reverts commit 8d0c34fd4f
.
This commit is contained in:
parent
bcdb11e741
commit
7f97ac94f7
|
@ -737,12 +737,6 @@ def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
|
|||
"Hardware automatically inserts waitcnt before barrier"
|
||||
>;
|
||||
|
||||
def FeatureBackOffBarrier : SubtargetFeature <"back-off-barrier",
|
||||
"BackOffBarrier",
|
||||
"true",
|
||||
"Hardware supports backing off s_barrier if an exception occurs"
|
||||
>;
|
||||
|
||||
def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
|
||||
"HasTrigReducedRange",
|
||||
"true",
|
||||
|
@ -1031,8 +1025,7 @@ def FeatureISAVersion9_0_A : FeatureSet<
|
|||
FeatureMadMacF32Insts,
|
||||
FeatureSupportsSRAMECC,
|
||||
FeaturePackedTID,
|
||||
FullRate64Ops,
|
||||
FeatureBackOffBarrier]>;
|
||||
FullRate64Ops]>;
|
||||
|
||||
def FeatureISAVersion9_0_C : FeatureSet<
|
||||
[FeatureGFX9,
|
||||
|
@ -1066,8 +1059,7 @@ def FeatureISAVersion9_4_0 : FeatureSet<
|
|||
FeatureSupportsSRAMECC,
|
||||
FeaturePackedTID,
|
||||
FeatureArchitectedFlatScratch,
|
||||
FullRate64Ops,
|
||||
FeatureBackOffBarrier]>;
|
||||
FullRate64Ops]>;
|
||||
|
||||
// TODO: Organize more features into groups.
|
||||
def FeatureGroup {
|
||||
|
@ -1102,8 +1094,7 @@ def FeatureISAVersion10_1_0 : FeatureSet<
|
|||
FeatureMadMacF32Insts,
|
||||
FeatureDsSrc2Insts,
|
||||
FeatureLdsMisalignedBug,
|
||||
FeatureSupportsXNACK,
|
||||
FeatureBackOffBarrier])>;
|
||||
FeatureSupportsXNACK])>;
|
||||
|
||||
def FeatureISAVersion10_1_1 : FeatureSet<
|
||||
!listconcat(FeatureGroup.GFX10_1_Bugs,
|
||||
|
@ -1125,8 +1116,7 @@ def FeatureISAVersion10_1_1 : FeatureSet<
|
|||
FeatureMadMacF32Insts,
|
||||
FeatureDsSrc2Insts,
|
||||
FeatureLdsMisalignedBug,
|
||||
FeatureSupportsXNACK,
|
||||
FeatureBackOffBarrier])>;
|
||||
FeatureSupportsXNACK])>;
|
||||
|
||||
def FeatureISAVersion10_1_2 : FeatureSet<
|
||||
!listconcat(FeatureGroup.GFX10_1_Bugs,
|
||||
|
@ -1148,8 +1138,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
|
|||
FeatureMadMacF32Insts,
|
||||
FeatureDsSrc2Insts,
|
||||
FeatureLdsMisalignedBug,
|
||||
FeatureSupportsXNACK,
|
||||
FeatureBackOffBarrier])>;
|
||||
FeatureSupportsXNACK])>;
|
||||
|
||||
def FeatureISAVersion10_1_3 : FeatureSet<
|
||||
!listconcat(FeatureGroup.GFX10_1_Bugs,
|
||||
|
@ -1167,8 +1156,7 @@ def FeatureISAVersion10_1_3 : FeatureSet<
|
|||
FeatureMadMacF32Insts,
|
||||
FeatureDsSrc2Insts,
|
||||
FeatureLdsMisalignedBug,
|
||||
FeatureSupportsXNACK,
|
||||
FeatureBackOffBarrier])>;
|
||||
FeatureSupportsXNACK])>;
|
||||
|
||||
def FeatureISAVersion10_3_0 : FeatureSet<
|
||||
[FeatureGFX10,
|
||||
|
@ -1185,8 +1173,7 @@ def FeatureISAVersion10_3_0 : FeatureSet<
|
|||
FeatureNSAEncoding,
|
||||
FeatureNSAMaxSize13,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureShaderCyclesRegister,
|
||||
FeatureBackOffBarrier]>;
|
||||
FeatureShaderCyclesRegister]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
|
|
@ -72,7 +72,6 @@ protected:
|
|||
// Dynamically set bits that enable features.
|
||||
bool FlatForGlobal = false;
|
||||
bool AutoWaitcntBeforeBarrier = false;
|
||||
bool BackOffBarrier = false;
|
||||
bool UnalignedScratchAccess = false;
|
||||
bool UnalignedAccessMode = false;
|
||||
bool HasApertureRegs = false;
|
||||
|
@ -494,12 +493,6 @@ public:
|
|||
return AutoWaitcntBeforeBarrier;
|
||||
}
|
||||
|
||||
/// \returns true if the target supports backing off of s_barrier instructions
|
||||
/// when an exception is raised.
|
||||
bool supportsBackOffBarrier() const {
|
||||
return BackOffBarrier;
|
||||
}
|
||||
|
||||
bool hasUnalignedBufferAccess() const {
|
||||
return UnalignedBufferAccess;
|
||||
}
|
||||
|
|
|
@ -1135,12 +1135,12 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
|
|||
}
|
||||
}
|
||||
|
||||
// The subtarget may have an implicit S_WAITCNT 0 before barriers. If it does
|
||||
// not, we need to ensure the subtarget is capable of backing off barrier
|
||||
// instructions in case there are any outstanding memory operations that may
|
||||
// cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here.
|
||||
// Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0
|
||||
// occurs before the instruction. Doing it here prevents any additional
|
||||
// S_WAITCNTs from being emitted if the instruction was marked as
|
||||
// requiring a WAITCNT beforehand.
|
||||
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
|
||||
!ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
|
||||
!ST->hasAutoWaitcntBeforeBarrier()) {
|
||||
Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
|
||||
}
|
||||
|
||||
|
|
|
@ -1,97 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-back-off-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-BACKOFF %s
|
||||
|
||||
; Subtargets must wait for outstanding memory instructions before a barrier if
|
||||
; they cannot back off of the barrier.
|
||||
|
||||
define void @back_off_barrier_no_fence(i32* %in, i32* %out) #0 {
|
||||
; GFX9-NO-BACKOFF-LABEL: back_off_barrier_no_fence:
|
||||
; GFX9-NO-BACKOFF: ; %bb.0:
|
||||
; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1]
|
||||
; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NO-BACKOFF-NEXT: s_barrier
|
||||
; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0
|
||||
; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-BACKOFF-LABEL: back_off_barrier_no_fence:
|
||||
; GFX9-BACKOFF: ; %bb.0:
|
||||
; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1]
|
||||
; GFX9-BACKOFF-NEXT: s_barrier
|
||||
; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0
|
||||
; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-BACKOFF-LABEL: back_off_barrier_no_fence:
|
||||
; GFX10-BACKOFF: ; %bb.0:
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1]
|
||||
; GFX10-BACKOFF-NEXT: s_barrier
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load i32, i32* %in
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
store i32 %load, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @back_off_barrier_with_fence(i32* %in, i32* %out) #0 {
|
||||
; GFX9-NO-BACKOFF-LABEL: back_off_barrier_with_fence:
|
||||
; GFX9-NO-BACKOFF: ; %bb.0:
|
||||
; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1]
|
||||
; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NO-BACKOFF-NEXT: s_barrier
|
||||
; GFX9-NO-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0
|
||||
; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-BACKOFF-LABEL: back_off_barrier_with_fence:
|
||||
; GFX9-BACKOFF: ; %bb.0:
|
||||
; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1]
|
||||
; GFX9-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-BACKOFF-NEXT: s_barrier
|
||||
; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0
|
||||
; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-BACKOFF-LABEL: back_off_barrier_with_fence:
|
||||
; GFX10-BACKOFF: ; %bb.0:
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1]
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-BACKOFF-NEXT: s_barrier
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-BACKOFF-NEXT: buffer_gl0_inv
|
||||
; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load i32, i32* %in
|
||||
fence syncscope("workgroup") release
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
fence syncscope("workgroup") acquire
|
||||
store i32 %load, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.s.barrier()
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -35,7 +35,7 @@ body: |
|
|||
; GFX10: S_WAITCNT 0
|
||||
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||
; GFX10: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
||||
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 1
|
||||
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||
; GFX10: S_BARRIER
|
||||
; GFX10: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GFX10: S_WAITCNT 112
|
||||
|
@ -112,7 +112,7 @@ body: |
|
|||
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||
; GFX10: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
||||
; GFX10: S_WAITCNT 0
|
||||
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 1
|
||||
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||
; GFX10: S_BARRIER
|
||||
; GFX10: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GFX10: S_WAITCNT 112
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx802 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8_9 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9_10,GFX8_9 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-back-off-barrier -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX9_10 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX9_10 %s
|
||||
|
||||
; GCN-LABEL: barrier_vmcnt_global:
|
||||
; GFX8: flat_load_dword
|
||||
|
@ -42,7 +42,7 @@ bb:
|
|||
%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp4
|
||||
store i32 0, i32 addrspace(1)* %tmp5, align 4
|
||||
fence syncscope("singlethread") release
|
||||
tail call void @llvm.amdgcn.s.barrier()
|
||||
tail call void @llvm.amdgcn.s.barrier() #3
|
||||
fence syncscope("singlethread") acquire
|
||||
%tmp6 = add nuw nsw i64 %tmp2, 4294967296
|
||||
%tmp7 = lshr exact i64 %tmp6, 32
|
||||
|
@ -116,7 +116,7 @@ bb:
|
|||
%tmp5 = getelementptr inbounds i32, i32* %arg, i64 %tmp4
|
||||
store i32 0, i32* %tmp5, align 4
|
||||
fence syncscope("singlethread") release
|
||||
tail call void @llvm.amdgcn.s.barrier()
|
||||
tail call void @llvm.amdgcn.s.barrier() #3
|
||||
fence syncscope("singlethread") acquire
|
||||
%tmp6 = add nuw nsw i64 %tmp2, 4294967296
|
||||
%tmp7 = lshr exact i64 %tmp6, 32
|
||||
|
|
Loading…
Reference in New Issue