[AMDGPU] Skip some work on subtargets without scalar stores. NFC.

2021-12-15 12:28:28 +00:00 · 2021-12-15 12:28:28 +00:00 · 6a7db0dc8e
parent b88f4f271b
commit 6a7db0dc8e
1 changed files with 35 additions and 33 deletions
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@ -1686,45 +1686,47 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
    }
  } while (Repeat);

-  SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
+  if (ST->hasScalarStores()) {
+    SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
+    bool HaveScalarStores = false;

-  bool HaveScalarStores = false;
+    for (MachineBasicBlock &MBB : MF) {
+      for (MachineInstr &MI : MBB) {
+        if (!HaveScalarStores && TII->isScalarStore(MI))
+          HaveScalarStores = true;

-  for (MachineBasicBlock &MBB : MF) {
-    for (MachineInstr &MI : MBB) {
-      if (!HaveScalarStores && TII->isScalarStore(MI))
-        HaveScalarStores = true;
-
-      if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
-          MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
-        EndPgmBlocks.push_back(&MBB);
+        if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
+            MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
+          EndPgmBlocks.push_back(&MBB);
+      }
    }
-  }

-  if (HaveScalarStores) {
-    // If scalar writes are used, the cache must be flushed or else the next
-    // wave to reuse the same scratch memory can be clobbered.
-    //
-    // Insert s_dcache_wb at wave termination points if there were any scalar
-    // stores, and only if the cache hasn't already been flushed. This could be
-    // improved by looking across blocks for flushes in postdominating blocks
-    // from the stores but an explicitly requested flush is probably very rare.
-    for (MachineBasicBlock *MBB : EndPgmBlocks) {
-      bool SeenDCacheWB = false;
+    if (HaveScalarStores) {
+      // If scalar writes are used, the cache must be flushed or else the next
+      // wave to reuse the same scratch memory can be clobbered.
+      //
+      // Insert s_dcache_wb at wave termination points if there were any scalar
+      // stores, and only if the cache hasn't already been flushed. This could
+      // be improved by looking across blocks for flushes in postdominating
+      // blocks from the stores but an explicitly requested flush is probably
+      // very rare.
+      for (MachineBasicBlock *MBB : EndPgmBlocks) {
+        bool SeenDCacheWB = false;

-      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-           ++I) {
-        if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
-          SeenDCacheWB = true;
-        else if (TII->isScalarStore(*I))
-          SeenDCacheWB = false;
+        for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+             I != E; ++I) {
+          if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
+            SeenDCacheWB = true;
+          else if (TII->isScalarStore(*I))
+            SeenDCacheWB = false;

-        // FIXME: It would be better to insert this before a waitcnt if any.
-        if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
-             I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
-            !SeenDCacheWB) {
-          Modified = true;
-          BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+          // FIXME: It would be better to insert this before a waitcnt if any.
+          if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
+               I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
+              !SeenDCacheWB) {
+            Modified = true;
+            BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+          }
        }
      }
    }