[OpenMP][CUDA] Refine the logic to determine grid size

This patch refines the logic to determine grid size as previous method can escape the check of whether `CudaBlocksPerGrid` could be greater than the actual hardware limit. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D119311
2022-02-10 14:13:18 -05:00 · 2022-02-10 14:13:18 -05:00 · f6685f7746
parent 547a667cee
commit f6685f7746
1 changed files with 6 additions and 4 deletions
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@ -1170,15 +1170,17 @@ public:
        DP("Using default number of teams %d\n", DeviceData[DeviceId].NumTeams);
        CudaBlocksPerGrid = DeviceData[DeviceId].NumTeams;
      }
-    } else if (TeamNum > DeviceData[DeviceId].BlocksPerGrid) {
-      DP("Capping number of teams to team limit %d\n",
-         DeviceData[DeviceId].BlocksPerGrid);
-      CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid;
    } else {
      DP("Using requested number of teams %d\n", TeamNum);
      CudaBlocksPerGrid = TeamNum;
    }

+    if (CudaBlocksPerGrid > DeviceData[DeviceId].BlocksPerGrid) {
+      DP("Capping number of teams to team limit %d\n",
+         DeviceData[DeviceId].BlocksPerGrid);
+      CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid;
+    }
+
    INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
         "Launching kernel %s with %d blocks and %d threads in %s mode\n",
         (getOffloadEntry(DeviceId, TgtEntryPtr))