forked from OSchip/llvm-project
[AMDGPU] Limit memory scope for scratch, LDS and GDS
Changes for AMD GPU SIMemoryLegalizer: - Limit the memory scope to maximum supported by the scratch, LDS and GDS address spaces. - Improve assertion checking. - Correct toSIAtomicScope argument name. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D96643
This commit is contained in:
parent
b4c0d610a6
commit
8a91b68b95
|
@ -129,12 +129,43 @@ private:
|
||||||
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
|
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
|
||||||
IsVolatile(IsVolatile),
|
IsVolatile(IsVolatile),
|
||||||
IsNonTemporal(IsNonTemporal) {
|
IsNonTemporal(IsNonTemporal) {
|
||||||
|
|
||||||
|
if (Ordering == AtomicOrdering::NotAtomic) {
|
||||||
|
assert(Scope == SIAtomicScope::NONE &&
|
||||||
|
OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
|
||||||
|
!IsCrossAddressSpaceOrdering &&
|
||||||
|
FailureOrdering == AtomicOrdering::NotAtomic);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(Scope != SIAtomicScope::NONE &&
|
||||||
|
(OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
|
||||||
|
SIAtomicAddrSpace::NONE &&
|
||||||
|
(InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
|
||||||
|
SIAtomicAddrSpace::NONE &&
|
||||||
|
!isStrongerThan(FailureOrdering, Ordering));
|
||||||
|
|
||||||
// There is also no cross address space ordering if the ordering
|
// There is also no cross address space ordering if the ordering
|
||||||
// address space is the same as the instruction address space and
|
// address space is the same as the instruction address space and
|
||||||
// only contains a single address space.
|
// only contains a single address space.
|
||||||
if ((OrderingAddrSpace == InstrAddrSpace) &&
|
if ((OrderingAddrSpace == InstrAddrSpace) &&
|
||||||
isPowerOf2_32(uint32_t(InstrAddrSpace)))
|
isPowerOf2_32(uint32_t(InstrAddrSpace)))
|
||||||
this->IsCrossAddressSpaceOrdering = false;
|
this->IsCrossAddressSpaceOrdering = false;
|
||||||
|
|
||||||
|
// Limit the scope to the maximum supported by the instruction's address
|
||||||
|
// spaces.
|
||||||
|
if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
|
||||||
|
SIAtomicAddrSpace::NONE) {
|
||||||
|
this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
|
||||||
|
} else if ((InstrAddrSpace &
|
||||||
|
~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
|
||||||
|
SIAtomicAddrSpace::NONE) {
|
||||||
|
this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
|
||||||
|
} else if ((InstrAddrSpace &
|
||||||
|
~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
|
||||||
|
SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
|
||||||
|
this->Scope = std::min(Scope, SIAtomicScope::AGENT);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -202,12 +233,12 @@ private:
|
||||||
void reportUnsupported(const MachineBasicBlock::iterator &MI,
|
void reportUnsupported(const MachineBasicBlock::iterator &MI,
|
||||||
const char *Msg) const;
|
const char *Msg) const;
|
||||||
|
|
||||||
/// Inspects the target synchonization scope \p SSID and determines
|
/// Inspects the target synchronization scope \p SSID and determines
|
||||||
/// the SI atomic scope it corresponds to, the address spaces it
|
/// the SI atomic scope it corresponds to, the address spaces it
|
||||||
/// covers, and whether the memory ordering applies between address
|
/// covers, and whether the memory ordering applies between address
|
||||||
/// spaces.
|
/// spaces.
|
||||||
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
|
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
|
||||||
toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
|
toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace) const;
|
||||||
|
|
||||||
/// \return Return a bit set of the address spaces accessed by \p AS.
|
/// \return Return a bit set of the address spaces accessed by \p AS.
|
||||||
SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
|
SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
|
||||||
|
@ -476,7 +507,7 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
|
||||||
|
|
||||||
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
|
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
|
||||||
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
|
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
|
||||||
SIAtomicAddrSpace InstrScope) const {
|
SIAtomicAddrSpace InstrAddrSpace) const {
|
||||||
if (SSID == SyncScope::System)
|
if (SSID == SyncScope::System)
|
||||||
return std::make_tuple(SIAtomicScope::SYSTEM,
|
return std::make_tuple(SIAtomicScope::SYSTEM,
|
||||||
SIAtomicAddrSpace::ATOMIC,
|
SIAtomicAddrSpace::ATOMIC,
|
||||||
|
@ -499,23 +530,23 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
|
||||||
true);
|
true);
|
||||||
if (SSID == MMI->getSystemOneAddressSpaceSSID())
|
if (SSID == MMI->getSystemOneAddressSpaceSSID())
|
||||||
return std::make_tuple(SIAtomicScope::SYSTEM,
|
return std::make_tuple(SIAtomicScope::SYSTEM,
|
||||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
|
||||||
false);
|
false);
|
||||||
if (SSID == MMI->getAgentOneAddressSpaceSSID())
|
if (SSID == MMI->getAgentOneAddressSpaceSSID())
|
||||||
return std::make_tuple(SIAtomicScope::AGENT,
|
return std::make_tuple(SIAtomicScope::AGENT,
|
||||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
|
||||||
false);
|
false);
|
||||||
if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
|
if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
|
||||||
return std::make_tuple(SIAtomicScope::WORKGROUP,
|
return std::make_tuple(SIAtomicScope::WORKGROUP,
|
||||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
|
||||||
false);
|
false);
|
||||||
if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
|
if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
|
||||||
return std::make_tuple(SIAtomicScope::WAVEFRONT,
|
return std::make_tuple(SIAtomicScope::WAVEFRONT,
|
||||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
|
||||||
false);
|
false);
|
||||||
if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
|
if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
|
||||||
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
|
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
|
||||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
|
||||||
false);
|
false);
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
@ -591,7 +622,8 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
|
||||||
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
|
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
|
||||||
ScopeOrNone.getValue();
|
ScopeOrNone.getValue();
|
||||||
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
|
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
|
||||||
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
|
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
|
||||||
|
((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
|
||||||
reportUnsupported(MI, "Unsupported atomic address space");
|
reportUnsupported(MI, "Unsupported atomic address space");
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
@ -659,7 +691,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
|
||||||
}
|
}
|
||||||
|
|
||||||
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
|
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
|
||||||
IsCrossAddressSpaceOrdering);
|
IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
|
Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -13,10 +13,9 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
|
||||||
; GCN-NEXT: v_mov_b32_e32 v2, v1
|
; GCN-NEXT: v_mov_b32_e32 v2, v1
|
||||||
; GCN-NEXT: v_not_b32_e32 v1, v2
|
; GCN-NEXT: v_not_b32_e32 v1, v2
|
||||||
; GCN-NEXT: v_or_b32_e32 v1, -5, v1
|
; GCN-NEXT: v_or_b32_e32 v1, -5, v1
|
||||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1
|
; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1
|
||||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; GCN-NEXT: buffer_wbinvl1_vol
|
|
||||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||||
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||||
|
|
|
@ -34,7 +34,7 @@ define void @lds_atomic_fadd_noret_f32(float addrspace(3)* %ptr) nounwind {
|
||||||
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
||||||
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
|
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
|
||||||
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
||||||
; HAS-ATOMICS: s_waitcnt vmcnt(0) lgkmcnt(0)
|
; HAS-ATOMICS: s_waitcnt lgkmcnt(0)
|
||||||
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
||||||
define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
||||||
%idx.add = add nuw i32 %idx, 4
|
%idx.add = add nuw i32 %idx, 4
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue