[OpenMP][AMDGPU] Use DS_Max_Warp_Number instead of WARPSIZE

The size of worker_rootS should have been DS_Max_Warp_Number.
This reduces memory usage by deviceRTL on AMDGPU from around 2.3GB
to around 770MB.

Reviewed By: JonChesterfield, jdoerfert

Differential Revision: https://reviews.llvm.org/D87084
This commit is contained in:
Pushpinder Singh 2020-09-03 07:57:46 -04:00
parent 05147d3309
commit 7634c64b61
2 changed files with 2 additions and 2 deletions

View File

@ -252,7 +252,7 @@ private:
workDescrForActiveParallel; // one, ONLY for the active par
ALIGN(16)
__kmpc_data_sharing_worker_slot_static worker_rootS[WARPSIZE];
__kmpc_data_sharing_worker_slot_static worker_rootS[DS_Max_Warp_Number];
ALIGN(16) __kmpc_data_sharing_master_slot_static master_rootS[1];
};

View File

@ -26,7 +26,7 @@ INLINE static void data_sharing_init_stack_common() {
omptarget_nvptx_TeamDescr *teamDescr =
&omptarget_nvptx_threadPrivateContext->TeamContext();
for (int WID = 0; WID < WARPSIZE; WID++) {
for (int WID = 0; WID < DS_Max_Warp_Number; WID++) {
__kmpc_data_sharing_slot *RootS = teamDescr->GetPreallocatedSlotAddr(WID);
DataSharingState.SlotPtr[WID] = RootS;
DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0];