forked from OSchip/llvm-project
[OpenMP][CUDA] Refine the logic to determine grid size
This patch refines the logic to determine grid size as previous method can escape the check of whether `CudaBlocksPerGrid` could be greater than the actual hardware limit. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D119311
This commit is contained in:
parent
547a667cee
commit
f6685f7746
|
@ -1170,15 +1170,17 @@ public:
|
|||
DP("Using default number of teams %d\n", DeviceData[DeviceId].NumTeams);
|
||||
CudaBlocksPerGrid = DeviceData[DeviceId].NumTeams;
|
||||
}
|
||||
} else if (TeamNum > DeviceData[DeviceId].BlocksPerGrid) {
|
||||
DP("Capping number of teams to team limit %d\n",
|
||||
DeviceData[DeviceId].BlocksPerGrid);
|
||||
CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid;
|
||||
} else {
|
||||
DP("Using requested number of teams %d\n", TeamNum);
|
||||
CudaBlocksPerGrid = TeamNum;
|
||||
}
|
||||
|
||||
if (CudaBlocksPerGrid > DeviceData[DeviceId].BlocksPerGrid) {
|
||||
DP("Capping number of teams to team limit %d\n",
|
||||
DeviceData[DeviceId].BlocksPerGrid);
|
||||
CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid;
|
||||
}
|
||||
|
||||
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
|
||||
"Launching kernel %s with %d blocks and %d threads in %s mode\n",
|
||||
(getOffloadEntry(DeviceId, TgtEntryPtr))
|
||||
|
|
Loading…
Reference in New Issue