[AMDGPU] Limit memory scope for scratch, LDS and GDS

Changes for AMD GPU SIMemoryLegalizer:

- Limit the memory scope to maximum supported by the scratch, LDS and
  GDS address spaces.

- Improve assertion checking.

- Correct toSIAtomicScope argument name.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D96643
This commit is contained in:
Tony Tye 2021-02-13 08:43:48 +00:00
parent b4c0d610a6
commit 8a91b68b95
6 changed files with 587 additions and 967 deletions

View File

@ -129,12 +129,43 @@ private:
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
IsVolatile(IsVolatile),
IsNonTemporal(IsNonTemporal) {
if (Ordering == AtomicOrdering::NotAtomic) {
assert(Scope == SIAtomicScope::NONE &&
OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
!IsCrossAddressSpaceOrdering &&
FailureOrdering == AtomicOrdering::NotAtomic);
return;
}
assert(Scope != SIAtomicScope::NONE &&
(OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
SIAtomicAddrSpace::NONE &&
(InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
SIAtomicAddrSpace::NONE &&
!isStrongerThan(FailureOrdering, Ordering));
// There is also no cross address space ordering if the ordering
// address space is the same as the instruction address space and
// only contains a single address space.
if ((OrderingAddrSpace == InstrAddrSpace) &&
isPowerOf2_32(uint32_t(InstrAddrSpace)))
this->IsCrossAddressSpaceOrdering = false;
// Limit the scope to the maximum supported by the instruction's address
// spaces.
if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
SIAtomicAddrSpace::NONE) {
this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
} else if ((InstrAddrSpace &
~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
SIAtomicAddrSpace::NONE) {
this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
} else if ((InstrAddrSpace &
~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
this->Scope = std::min(Scope, SIAtomicScope::AGENT);
}
}
public:
@ -202,12 +233,12 @@ private:
void reportUnsupported(const MachineBasicBlock::iterator &MI,
const char *Msg) const;
/// Inspects the target synchonization scope \p SSID and determines
/// Inspects the target synchronization scope \p SSID and determines
/// the SI atomic scope it corresponds to, the address spaces it
/// covers, and whether the memory ordering applies between address
/// spaces.
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace) const;
/// \return Return a bit set of the address spaces accessed by \p AS.
SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
@ -476,7 +507,7 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
SIAtomicAddrSpace InstrScope) const {
SIAtomicAddrSpace InstrAddrSpace) const {
if (SSID == SyncScope::System)
return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC,
@ -499,23 +530,23 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
true);
if (SSID == MMI->getSystemOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC & InstrScope,
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
if (SSID == MMI->getAgentOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::AGENT,
SIAtomicAddrSpace::ATOMIC & InstrScope,
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC & InstrScope,
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC & InstrScope,
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC & InstrScope,
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
return None;
}
@ -591,7 +622,8 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
ScopeOrNone.getValue();
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
reportUnsupported(MI, "Unsupported atomic address space");
return None;
}
@ -659,7 +691,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
}
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
IsCrossAddressSpaceOrdering);
IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
}
Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(

View File

@ -13,10 +13,9 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_not_b32_e32 v1, v2
; GCN-NEXT: v_or_b32_e32 v1, -5, v1
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1_vol
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]

View File

@ -34,7 +34,7 @@ define void @lds_atomic_fadd_noret_f32(float addrspace(3)* %ptr) nounwind {
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
; HAS-ATOMICS: s_waitcnt vmcnt(0) lgkmcnt(0)
; HAS-ATOMICS: s_waitcnt lgkmcnt(0)
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
%idx.add = add nuw i32 %idx, 4

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff