[OpenMP][CUDA] Refine the logic to determine grid size

This patch refines the logic to determine grid size as previous method
can escape the check of whether `CudaBlocksPerGrid` could be greater than the actual
hardware limit.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D119311
This commit is contained in:
Shilei Tian 2022-02-10 14:13:18 -05:00
parent 547a667cee
commit f6685f7746
1 changed files with 6 additions and 4 deletions

View File

@ -1170,15 +1170,17 @@ public:
DP("Using default number of teams %d\n", DeviceData[DeviceId].NumTeams);
CudaBlocksPerGrid = DeviceData[DeviceId].NumTeams;
}
} else if (TeamNum > DeviceData[DeviceId].BlocksPerGrid) {
DP("Capping number of teams to team limit %d\n",
DeviceData[DeviceId].BlocksPerGrid);
CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid;
} else {
DP("Using requested number of teams %d\n", TeamNum);
CudaBlocksPerGrid = TeamNum;
}
if (CudaBlocksPerGrid > DeviceData[DeviceId].BlocksPerGrid) {
DP("Capping number of teams to team limit %d\n",
DeviceData[DeviceId].BlocksPerGrid);
CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid;
}
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
"Launching kernel %s with %d blocks and %d threads in %s mode\n",
(getOffloadEntry(DeviceId, TgtEntryPtr))