[AMDGPU] Limit memory scope for scratch, LDS and GDS

Changes for AMD GPU SIMemoryLegalizer:

- Limit the memory scope to maximum supported by the scratch, LDS and
  GDS address spaces.

- Improve assertion checking.

- Correct toSIAtomicScope argument name.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D96643
This commit is contained in:
Tony Tye 2021-02-13 08:43:48 +00:00
parent b4c0d610a6
commit 8a91b68b95
6 changed files with 587 additions and 967 deletions

View File

@ -129,12 +129,43 @@ private:
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering), IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
IsVolatile(IsVolatile), IsVolatile(IsVolatile),
IsNonTemporal(IsNonTemporal) { IsNonTemporal(IsNonTemporal) {
if (Ordering == AtomicOrdering::NotAtomic) {
assert(Scope == SIAtomicScope::NONE &&
OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
!IsCrossAddressSpaceOrdering &&
FailureOrdering == AtomicOrdering::NotAtomic);
return;
}
assert(Scope != SIAtomicScope::NONE &&
(OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
SIAtomicAddrSpace::NONE &&
(InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
SIAtomicAddrSpace::NONE &&
!isStrongerThan(FailureOrdering, Ordering));
// There is also no cross address space ordering if the ordering // There is also no cross address space ordering if the ordering
// address space is the same as the instruction address space and // address space is the same as the instruction address space and
// only contains a single address space. // only contains a single address space.
if ((OrderingAddrSpace == InstrAddrSpace) && if ((OrderingAddrSpace == InstrAddrSpace) &&
isPowerOf2_32(uint32_t(InstrAddrSpace))) isPowerOf2_32(uint32_t(InstrAddrSpace)))
this->IsCrossAddressSpaceOrdering = false; this->IsCrossAddressSpaceOrdering = false;
// Limit the scope to the maximum supported by the instruction's address
// spaces.
if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
SIAtomicAddrSpace::NONE) {
this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
} else if ((InstrAddrSpace &
~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
SIAtomicAddrSpace::NONE) {
this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
} else if ((InstrAddrSpace &
~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
this->Scope = std::min(Scope, SIAtomicScope::AGENT);
}
} }
public: public:
@ -202,12 +233,12 @@ private:
void reportUnsupported(const MachineBasicBlock::iterator &MI, void reportUnsupported(const MachineBasicBlock::iterator &MI,
const char *Msg) const; const char *Msg) const;
/// Inspects the target synchonization scope \p SSID and determines /// Inspects the target synchronization scope \p SSID and determines
/// the SI atomic scope it corresponds to, the address spaces it /// the SI atomic scope it corresponds to, the address spaces it
/// covers, and whether the memory ordering applies between address /// covers, and whether the memory ordering applies between address
/// spaces. /// spaces.
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>> Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const; toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace) const;
/// \return Return a bit set of the address spaces accessed by \p AS. /// \return Return a bit set of the address spaces accessed by \p AS.
SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const; SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
@ -476,7 +507,7 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>> Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID, SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
SIAtomicAddrSpace InstrScope) const { SIAtomicAddrSpace InstrAddrSpace) const {
if (SSID == SyncScope::System) if (SSID == SyncScope::System)
return std::make_tuple(SIAtomicScope::SYSTEM, return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC, SIAtomicAddrSpace::ATOMIC,
@ -499,23 +530,23 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
true); true);
if (SSID == MMI->getSystemOneAddressSpaceSSID()) if (SSID == MMI->getSystemOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SYSTEM, return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC & InstrScope, SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false); false);
if (SSID == MMI->getAgentOneAddressSpaceSSID()) if (SSID == MMI->getAgentOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::AGENT, return std::make_tuple(SIAtomicScope::AGENT,
SIAtomicAddrSpace::ATOMIC & InstrScope, SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false); false);
if (SSID == MMI->getWorkgroupOneAddressSpaceSSID()) if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP, return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC & InstrScope, SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false); false);
if (SSID == MMI->getWavefrontOneAddressSpaceSSID()) if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT, return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC & InstrScope, SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false); false);
if (SSID == MMI->getSingleThreadOneAddressSpaceSSID()) if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SINGLETHREAD, return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC & InstrScope, SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false); false);
return None; return None;
} }
@ -591,7 +622,8 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) = std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
ScopeOrNone.getValue(); ScopeOrNone.getValue();
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) || if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) { ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
reportUnsupported(MI, "Unsupported atomic address space"); reportUnsupported(MI, "Unsupported atomic address space");
return None; return None;
} }
@ -659,7 +691,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
} }
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC, return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
IsCrossAddressSpaceOrdering); IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
} }
Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo( Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(

View File

@ -13,10 +13,9 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
; GCN-NEXT: v_mov_b32_e32 v2, v1 ; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_not_b32_e32 v1, v2 ; GCN-NEXT: v_not_b32_e32 v1, v2
; GCN-NEXT: v_or_b32_e32 v1, -5, v1 ; GCN-NEXT: v_or_b32_e32 v1, -5, v1
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 ; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1_vol
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]

View File

@ -34,7 +34,7 @@ define void @lds_atomic_fadd_noret_f32(float addrspace(3)* %ptr) nounwind {
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000 ; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32 ; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64 ; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
; HAS-ATOMICS: s_waitcnt vmcnt(0) lgkmcnt(0) ; HAS-ATOMICS: s_waitcnt lgkmcnt(0)
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]] ; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) { define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
%idx.add = add nuw i32 %idx, 4 %idx.add = add nuw i32 %idx, 4

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff