forked from OSchip/llvm-project
[AMDGPU] Skip some work on subtargets without scalar stores. NFC.
This commit is contained in:
parent
b88f4f271b
commit
6a7db0dc8e
|
@ -1686,45 +1686,47 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
|||
}
|
||||
} while (Repeat);
|
||||
|
||||
SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
|
||||
if (ST->hasScalarStores()) {
|
||||
SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
|
||||
bool HaveScalarStores = false;
|
||||
|
||||
bool HaveScalarStores = false;
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
for (MachineInstr &MI : MBB) {
|
||||
if (!HaveScalarStores && TII->isScalarStore(MI))
|
||||
HaveScalarStores = true;
|
||||
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
for (MachineInstr &MI : MBB) {
|
||||
if (!HaveScalarStores && TII->isScalarStore(MI))
|
||||
HaveScalarStores = true;
|
||||
|
||||
if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
|
||||
MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
|
||||
EndPgmBlocks.push_back(&MBB);
|
||||
if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
|
||||
MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
|
||||
EndPgmBlocks.push_back(&MBB);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (HaveScalarStores) {
|
||||
// If scalar writes are used, the cache must be flushed or else the next
|
||||
// wave to reuse the same scratch memory can be clobbered.
|
||||
//
|
||||
// Insert s_dcache_wb at wave termination points if there were any scalar
|
||||
// stores, and only if the cache hasn't already been flushed. This could be
|
||||
// improved by looking across blocks for flushes in postdominating blocks
|
||||
// from the stores but an explicitly requested flush is probably very rare.
|
||||
for (MachineBasicBlock *MBB : EndPgmBlocks) {
|
||||
bool SeenDCacheWB = false;
|
||||
if (HaveScalarStores) {
|
||||
// If scalar writes are used, the cache must be flushed or else the next
|
||||
// wave to reuse the same scratch memory can be clobbered.
|
||||
//
|
||||
// Insert s_dcache_wb at wave termination points if there were any scalar
|
||||
// stores, and only if the cache hasn't already been flushed. This could
|
||||
// be improved by looking across blocks for flushes in postdominating
|
||||
// blocks from the stores but an explicitly requested flush is probably
|
||||
// very rare.
|
||||
for (MachineBasicBlock *MBB : EndPgmBlocks) {
|
||||
bool SeenDCacheWB = false;
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
|
||||
++I) {
|
||||
if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
|
||||
SeenDCacheWB = true;
|
||||
else if (TII->isScalarStore(*I))
|
||||
SeenDCacheWB = false;
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
|
||||
I != E; ++I) {
|
||||
if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
|
||||
SeenDCacheWB = true;
|
||||
else if (TII->isScalarStore(*I))
|
||||
SeenDCacheWB = false;
|
||||
|
||||
// FIXME: It would be better to insert this before a waitcnt if any.
|
||||
if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
|
||||
I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
|
||||
!SeenDCacheWB) {
|
||||
Modified = true;
|
||||
BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
|
||||
// FIXME: It would be better to insert this before a waitcnt if any.
|
||||
if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
|
||||
I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
|
||||
!SeenDCacheWB) {
|
||||
Modified = true;
|
||||
BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue