forked from OSchip/llvm-project
[AMDGPU] Cleanup memory legalizer interfaces
- Rename interfaces to be in terms of acquire and release. - Improve comments. Differential Revision: https://reviews.llvm.org/D89355
This commit is contained in:
parent
4b6e3ad9b4
commit
907d799070
|
@ -280,23 +280,23 @@ public:
|
|||
virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
|
||||
const = 0;
|
||||
|
||||
/// Inserts any necessary instructions at position \p Pos relative
|
||||
/// to instruction \p MI to ensure any caches associated with
|
||||
/// address spaces \p AddrSpace for memory scopes up to memory scope
|
||||
/// \p Scope are invalidated. Returns true iff any instructions
|
||||
/// inserted.
|
||||
virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const = 0;
|
||||
/// Inserts any necessary instructions at position \p Pos relative to
|
||||
/// instruction \p MI to ensure any subsequent memory instructions of this
|
||||
/// thread with address spaces \p AddrSpace will observe the previous memory
|
||||
/// operations by any thread for memory scopes up to memory scope \p Scope .
|
||||
/// Returns true iff any instructions inserted.
|
||||
virtual bool insertAcquire(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const = 0;
|
||||
|
||||
/// Inserts any necessary instructions at position \p Pos relative
|
||||
/// to instruction \p MI to ensure memory instructions of kind \p Op
|
||||
/// associated with address spaces \p AddrSpace have completed as
|
||||
/// observed by other memory instructions executing in memory scope
|
||||
/// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
|
||||
/// ordering is between address spaces. Returns true iff any
|
||||
/// instructions inserted.
|
||||
/// to instruction \p MI to ensure memory instructions before \p Pos of kind
|
||||
/// \p Op associated with address spaces \p AddrSpace have completed. Used
|
||||
/// between memory instructions to enforce the order they become visible as
|
||||
/// observed by other memory instructions executing in memory scope \p Scope.
|
||||
/// \p IsCrossAddrSpaceOrdering indicates if the memory ordering is between
|
||||
/// address spaces. Returns true iff any instructions inserted.
|
||||
virtual bool insertWait(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
|
@ -304,6 +304,18 @@ public:
|
|||
bool IsCrossAddrSpaceOrdering,
|
||||
Position Pos) const = 0;
|
||||
|
||||
/// Inserts any necessary instructions at position \p Pos relative to
|
||||
/// instruction \p MI to ensure previous memory instructions by this thread
|
||||
/// with address spaces \p AddrSpace have completed and can be observed by
|
||||
/// subsequent memory instructions by any thread executing in memory scope \p
|
||||
/// Scope. \p IsCrossAddrSpaceOrdering indicates if the memory ordering is
|
||||
/// between address spaces. Returns true iff any instructions inserted.
|
||||
virtual bool insertRelease(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
bool IsCrossAddrSpaceOrdering,
|
||||
Position Pos) const = 0;
|
||||
|
||||
/// Virtual destructor to allow derivations to be deleted.
|
||||
virtual ~SICacheControl() = default;
|
||||
|
||||
|
@ -334,10 +346,16 @@ public:
|
|||
|
||||
bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
|
||||
|
||||
bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const override;
|
||||
bool insertAcquire(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const override;
|
||||
|
||||
bool insertRelease(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
bool IsCrossAddrSpaceOrdering,
|
||||
Position Pos) const override;
|
||||
|
||||
bool insertWait(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
|
@ -352,10 +370,10 @@ public:
|
|||
|
||||
SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
|
||||
|
||||
bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const override;
|
||||
bool insertAcquire(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const override;
|
||||
|
||||
};
|
||||
|
||||
|
@ -380,10 +398,10 @@ public:
|
|||
|
||||
bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
|
||||
|
||||
bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const override;
|
||||
bool insertAcquire(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const override;
|
||||
|
||||
bool insertWait(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
|
@ -678,9 +696,6 @@ bool SIGfx6CacheControl::enableLoadCacheBypass(
|
|||
bool Changed = false;
|
||||
|
||||
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
||||
/// TODO: Do not set glc for rmw atomic operations as they
|
||||
/// implicitly bypass the L1 cache.
|
||||
|
||||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
|
@ -718,10 +733,10 @@ bool SIGfx6CacheControl::enableNonTemporal(
|
|||
return Changed;
|
||||
}
|
||||
|
||||
bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const {
|
||||
bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const {
|
||||
if (!InsertCacheInv)
|
||||
return false;
|
||||
|
||||
|
@ -802,12 +817,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
|||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
// If no cross address space ordering then an LDS waitcnt is not
|
||||
// needed as LDS operations for all waves are executed in a
|
||||
// total global ordering as observed by all waves. Required if
|
||||
// also synchronizing with global/GDS memory as LDS operations
|
||||
// could be reordered with respect to later global/GDS memory
|
||||
// operations of the same wave.
|
||||
// If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
|
||||
// not needed as LDS operations for all waves are executed in a total
|
||||
// global ordering as observed by all waves. Required if also
|
||||
// synchronizing with global/GDS memory as LDS operations could be
|
||||
// reordered with respect to later global/GDS memory operations of the
|
||||
// same wave.
|
||||
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
||||
break;
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
|
@ -824,12 +839,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
|||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
// If no cross address space ordering then an GDS waitcnt is not
|
||||
// needed as GDS operations for all waves are executed in a
|
||||
// total global ordering as observed by all waves. Required if
|
||||
// also synchronizing with global/LDS memory as GDS operations
|
||||
// could be reordered with respect to later global/LDS memory
|
||||
// operations of the same wave.
|
||||
// If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
|
||||
// is not needed as GDS operations for all waves are executed in a total
|
||||
// global ordering as observed by all waves. Required if also
|
||||
// synchronizing with global/LDS memory as GDS operations could be
|
||||
// reordered with respect to later global/LDS memory operations of the
|
||||
// same wave.
|
||||
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
||||
break;
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
|
@ -859,10 +874,19 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
|||
return Changed;
|
||||
}
|
||||
|
||||
bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const {
|
||||
bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
bool IsCrossAddrSpaceOrdering,
|
||||
Position Pos) const {
|
||||
return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
|
||||
IsCrossAddrSpaceOrdering, Pos);
|
||||
}
|
||||
|
||||
bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const {
|
||||
if (!InsertCacheInv)
|
||||
return false;
|
||||
|
||||
|
@ -873,9 +897,9 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
|||
|
||||
const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
|
||||
|
||||
const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
|
||||
? AMDGPU::BUFFER_WBINVL1
|
||||
: AMDGPU::BUFFER_WBINVL1_VOL;
|
||||
const unsigned InvalidateL1 = STM.isAmdPalOS() || STM.isMesa3DOS()
|
||||
? AMDGPU::BUFFER_WBINVL1
|
||||
: AMDGPU::BUFFER_WBINVL1_VOL;
|
||||
|
||||
if (Pos == Position::AFTER)
|
||||
++MI;
|
||||
|
@ -884,7 +908,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
|||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
BuildMI(MBB, MI, DL, TII->get(Flush));
|
||||
BuildMI(MBB, MI, DL, TII->get(InvalidateL1));
|
||||
Changed = true;
|
||||
break;
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
|
@ -902,7 +926,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
|||
/// sequentially consistent, and no other thread can access scratch
|
||||
/// memory.
|
||||
|
||||
/// Other address spaces do not hava a cache.
|
||||
/// Other address spaces do not have a cache.
|
||||
|
||||
if (Pos == Position::AFTER)
|
||||
--MI;
|
||||
|
@ -930,8 +954,8 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
|
|||
case SIAtomicScope::WORKGROUP:
|
||||
// In WGP mode the waves of a work-group can be executing on either CU of
|
||||
// the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
|
||||
// CU mode and all waves of a work-group are on the same CU, and so the
|
||||
// L0 does not need to be bypassed.
|
||||
// CU mode all waves of a work-group are on the same CU, and so the L0
|
||||
// does not need to be bypassed.
|
||||
if (!CuMode) Changed |= enableGLCBit(MI);
|
||||
break;
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
|
@ -964,10 +988,10 @@ bool SIGfx10CacheControl::enableNonTemporal(
|
|||
return Changed;
|
||||
}
|
||||
|
||||
bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const {
|
||||
bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const {
|
||||
if (!InsertCacheInv)
|
||||
return false;
|
||||
|
||||
|
@ -1074,12 +1098,12 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
|||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
// If no cross address space ordering then an LDS waitcnt is not
|
||||
// needed as LDS operations for all waves are executed in a
|
||||
// total global ordering as observed by all waves. Required if
|
||||
// also synchronizing with global/GDS memory as LDS operations
|
||||
// could be reordered with respect to later global/GDS memory
|
||||
// operations of the same wave.
|
||||
// If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
|
||||
// not needed as LDS operations for all waves are executed in a total
|
||||
// global ordering as observed by all waves. Required if also
|
||||
// synchronizing with global/GDS memory as LDS operations could be
|
||||
// reordered with respect to later global/GDS memory operations of the
|
||||
// same wave.
|
||||
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
||||
break;
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
|
@ -1096,12 +1120,12 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
|||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
// If no cross address space ordering then an GDS waitcnt is not
|
||||
// needed as GDS operations for all waves are executed in a
|
||||
// total global ordering as observed by all waves. Required if
|
||||
// also synchronizing with global/LDS memory as GDS operations
|
||||
// could be reordered with respect to later global/LDS memory
|
||||
// operations of the same wave.
|
||||
// If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
|
||||
// is not needed as GDS operations for all waves are executed in a total
|
||||
// global ordering as observed by all waves. Required if also
|
||||
// synchronizing with global/LDS memory as GDS operations could be
|
||||
// reordered with respect to later global/LDS memory operations of the
|
||||
// same wave.
|
||||
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
||||
break;
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
|
@ -1177,9 +1201,9 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
|
|||
SIMemOp::LOAD,
|
||||
MOI.getIsCrossAddressSpaceOrdering(),
|
||||
Position::AFTER);
|
||||
Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
Position::AFTER);
|
||||
Changed |= CC->insertAcquire(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
Position::AFTER);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
|
@ -1203,11 +1227,10 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
|
|||
if (MOI.isAtomic()) {
|
||||
if (MOI.getOrdering() == AtomicOrdering::Release ||
|
||||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
|
||||
Changed |= CC->insertWait(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
SIMemOp::LOAD | SIMemOp::STORE,
|
||||
MOI.getIsCrossAddressSpaceOrdering(),
|
||||
Position::BEFORE);
|
||||
Changed |= CC->insertRelease(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
MOI.getIsCrossAddressSpaceOrdering(),
|
||||
Position::BEFORE);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
@ -1239,19 +1262,23 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
|
|||
/// ordering and memory scope, then library does not need to
|
||||
/// generate a fence. Could add support in this file for
|
||||
/// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
|
||||
/// adding waitcnt before a S_BARRIER.
|
||||
Changed |= CC->insertWait(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
SIMemOp::LOAD | SIMemOp::STORE,
|
||||
MOI.getIsCrossAddressSpaceOrdering(),
|
||||
Position::BEFORE);
|
||||
/// adding S_WAITCNT before a S_BARRIER.
|
||||
Changed |= CC->insertRelease(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
MOI.getIsCrossAddressSpaceOrdering(),
|
||||
Position::BEFORE);
|
||||
|
||||
// TODO: If both release and invalidate are happening they could be combined
|
||||
// to use the single "BUFFER_WBL2" instruction. This could be done by
|
||||
// reorganizing this code or as part of optimizing SIInsertWaitcnt pass to
|
||||
// track cache invalidate and write back instructions.
|
||||
|
||||
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
|
||||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
|
||||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
|
||||
Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
Position::BEFORE);
|
||||
Changed |= CC->insertAcquire(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
Position::BEFORE);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
@ -1270,11 +1297,10 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
|
|||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
|
||||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
|
||||
MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
|
||||
Changed |= CC->insertWait(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
SIMemOp::LOAD | SIMemOp::STORE,
|
||||
MOI.getIsCrossAddressSpaceOrdering(),
|
||||
Position::BEFORE);
|
||||
Changed |= CC->insertRelease(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
MOI.getIsCrossAddressSpaceOrdering(),
|
||||
Position::BEFORE);
|
||||
|
||||
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
|
||||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
|
||||
|
@ -1287,9 +1313,9 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
|
|||
SIMemOp::STORE,
|
||||
MOI.getIsCrossAddressSpaceOrdering(),
|
||||
Position::AFTER);
|
||||
Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
Position::AFTER);
|
||||
Changed |= CC->insertAcquire(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
Position::AFTER);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
|
|
Loading…
Reference in New Issue