From 6a7db0dc8eefcfbf1cadde41cb40f6e16c0c242a Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 15 Dec 2021 12:28:28 +0000 Subject: [PATCH] [AMDGPU] Skip some work on subtargets without scalar stores. NFC. --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 68 +++++++++++---------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c9d9dd1fb82c..70c5a52c6b28 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1686,45 +1686,47 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { } } while (Repeat); - SmallVector EndPgmBlocks; + if (ST->hasScalarStores()) { + SmallVector EndPgmBlocks; + bool HaveScalarStores = false; - bool HaveScalarStores = false; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!HaveScalarStores && TII->isScalarStore(MI)) + HaveScalarStores = true; - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (!HaveScalarStores && TII->isScalarStore(MI)) - HaveScalarStores = true; - - if (MI.getOpcode() == AMDGPU::S_ENDPGM || - MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) - EndPgmBlocks.push_back(&MBB); + if (MI.getOpcode() == AMDGPU::S_ENDPGM || + MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) + EndPgmBlocks.push_back(&MBB); + } } - } - if (HaveScalarStores) { - // If scalar writes are used, the cache must be flushed or else the next - // wave to reuse the same scratch memory can be clobbered. - // - // Insert s_dcache_wb at wave termination points if there were any scalar - // stores, and only if the cache hasn't already been flushed. This could be - // improved by looking across blocks for flushes in postdominating blocks - // from the stores but an explicitly requested flush is probably very rare. - for (MachineBasicBlock *MBB : EndPgmBlocks) { - bool SeenDCacheWB = false; + if (HaveScalarStores) { + // If scalar writes are used, the cache must be flushed or else the next + // wave to reuse the same scratch memory can be clobbered. + // + // Insert s_dcache_wb at wave termination points if there were any scalar + // stores, and only if the cache hasn't already been flushed. This could + // be improved by looking across blocks for flushes in postdominating + // blocks from the stores but an explicitly requested flush is probably + // very rare. + for (MachineBasicBlock *MBB : EndPgmBlocks) { + bool SeenDCacheWB = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - if (I->getOpcode() == AMDGPU::S_DCACHE_WB) - SeenDCacheWB = true; - else if (TII->isScalarStore(*I)) - SeenDCacheWB = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->getOpcode() == AMDGPU::S_DCACHE_WB) + SeenDCacheWB = true; + else if (TII->isScalarStore(*I)) + SeenDCacheWB = false; - // FIXME: It would be better to insert this before a waitcnt if any. - if ((I->getOpcode() == AMDGPU::S_ENDPGM || - I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) && - !SeenDCacheWB) { - Modified = true; - BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB)); + // FIXME: It would be better to insert this before a waitcnt if any. + if ((I->getOpcode() == AMDGPU::S_ENDPGM || + I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) && + !SeenDCacheWB) { + Modified = true; + BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB)); + } } } }