forked from OSchip/llvm-project
[AMDGPU] Limit memory scope for scratch, LDS and GDS
Changes for AMD GPU SIMemoryLegalizer: - Limit the memory scope to maximum supported by the scratch, LDS and GDS address spaces. - Improve assertion checking. - Correct toSIAtomicScope argument name. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D96643
This commit is contained in:
parent
b4c0d610a6
commit
8a91b68b95
|
@ -129,12 +129,43 @@ private:
|
|||
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
|
||||
IsVolatile(IsVolatile),
|
||||
IsNonTemporal(IsNonTemporal) {
|
||||
|
||||
if (Ordering == AtomicOrdering::NotAtomic) {
|
||||
assert(Scope == SIAtomicScope::NONE &&
|
||||
OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
|
||||
!IsCrossAddressSpaceOrdering &&
|
||||
FailureOrdering == AtomicOrdering::NotAtomic);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(Scope != SIAtomicScope::NONE &&
|
||||
(OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
|
||||
SIAtomicAddrSpace::NONE &&
|
||||
(InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
|
||||
SIAtomicAddrSpace::NONE &&
|
||||
!isStrongerThan(FailureOrdering, Ordering));
|
||||
|
||||
// There is also no cross address space ordering if the ordering
|
||||
// address space is the same as the instruction address space and
|
||||
// only contains a single address space.
|
||||
if ((OrderingAddrSpace == InstrAddrSpace) &&
|
||||
isPowerOf2_32(uint32_t(InstrAddrSpace)))
|
||||
this->IsCrossAddressSpaceOrdering = false;
|
||||
|
||||
// Limit the scope to the maximum supported by the instruction's address
|
||||
// spaces.
|
||||
if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
|
||||
SIAtomicAddrSpace::NONE) {
|
||||
this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
|
||||
} else if ((InstrAddrSpace &
|
||||
~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
|
||||
SIAtomicAddrSpace::NONE) {
|
||||
this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
|
||||
} else if ((InstrAddrSpace &
|
||||
~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
|
||||
SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
|
||||
this->Scope = std::min(Scope, SIAtomicScope::AGENT);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -202,12 +233,12 @@ private:
|
|||
void reportUnsupported(const MachineBasicBlock::iterator &MI,
|
||||
const char *Msg) const;
|
||||
|
||||
/// Inspects the target synchonization scope \p SSID and determines
|
||||
/// Inspects the target synchronization scope \p SSID and determines
|
||||
/// the SI atomic scope it corresponds to, the address spaces it
|
||||
/// covers, and whether the memory ordering applies between address
|
||||
/// spaces.
|
||||
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
|
||||
toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
|
||||
toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace) const;
|
||||
|
||||
/// \return Return a bit set of the address spaces accessed by \p AS.
|
||||
SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
|
||||
|
@ -476,7 +507,7 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
|
|||
|
||||
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
|
||||
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
|
||||
SIAtomicAddrSpace InstrScope) const {
|
||||
SIAtomicAddrSpace InstrAddrSpace) const {
|
||||
if (SSID == SyncScope::System)
|
||||
return std::make_tuple(SIAtomicScope::SYSTEM,
|
||||
SIAtomicAddrSpace::ATOMIC,
|
||||
|
@ -499,23 +530,23 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
|
|||
true);
|
||||
if (SSID == MMI->getSystemOneAddressSpaceSSID())
|
||||
return std::make_tuple(SIAtomicScope::SYSTEM,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
|
||||
false);
|
||||
if (SSID == MMI->getAgentOneAddressSpaceSSID())
|
||||
return std::make_tuple(SIAtomicScope::AGENT,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
|
||||
false);
|
||||
if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
|
||||
return std::make_tuple(SIAtomicScope::WORKGROUP,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
|
||||
false);
|
||||
if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
|
||||
return std::make_tuple(SIAtomicScope::WAVEFRONT,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
|
||||
false);
|
||||
if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
|
||||
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
|
||||
false);
|
||||
return None;
|
||||
}
|
||||
|
@ -591,7 +622,8 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
|
|||
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
|
||||
ScopeOrNone.getValue();
|
||||
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
|
||||
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
|
||||
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
|
||||
((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
|
||||
reportUnsupported(MI, "Unsupported atomic address space");
|
||||
return None;
|
||||
}
|
||||
|
@ -659,7 +691,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
|
|||
}
|
||||
|
||||
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
|
||||
IsCrossAddressSpaceOrdering);
|
||||
IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
|
||||
}
|
||||
|
||||
Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -13,10 +13,9 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
|
|||
; GCN-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GCN-NEXT: v_not_b32_e32 v1, v2
|
||||
; GCN-NEXT: v_or_b32_e32 v1, -5, v1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: buffer_wbinvl1_vol
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
|
|
|
@ -34,7 +34,7 @@ define void @lds_atomic_fadd_noret_f32(float addrspace(3)* %ptr) nounwind {
|
|||
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
||||
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
|
||||
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
||||
; HAS-ATOMICS: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; HAS-ATOMICS: s_waitcnt lgkmcnt(0)
|
||||
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
||||
define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
||||
%idx.add = add nuw i32 %idx, 4
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue