forked from OSchip/llvm-project
[OpenMP][AMDGPU] Use DS_Max_Warp_Number instead of WARPSIZE
The size of worker_rootS should have been DS_Max_Warp_Number. This reduces memory usage by deviceRTL on AMDGPU from around 2.3GB to around 770MB. Reviewed By: JonChesterfield, jdoerfert Differential Revision: https://reviews.llvm.org/D87084
This commit is contained in:
parent
05147d3309
commit
7634c64b61
|
@ -252,7 +252,7 @@ private:
|
|||
workDescrForActiveParallel; // one, ONLY for the active par
|
||||
|
||||
ALIGN(16)
|
||||
__kmpc_data_sharing_worker_slot_static worker_rootS[WARPSIZE];
|
||||
__kmpc_data_sharing_worker_slot_static worker_rootS[DS_Max_Warp_Number];
|
||||
ALIGN(16) __kmpc_data_sharing_master_slot_static master_rootS[1];
|
||||
};
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ INLINE static void data_sharing_init_stack_common() {
|
|||
omptarget_nvptx_TeamDescr *teamDescr =
|
||||
&omptarget_nvptx_threadPrivateContext->TeamContext();
|
||||
|
||||
for (int WID = 0; WID < WARPSIZE; WID++) {
|
||||
for (int WID = 0; WID < DS_Max_Warp_Number; WID++) {
|
||||
__kmpc_data_sharing_slot *RootS = teamDescr->GetPreallocatedSlotAddr(WID);
|
||||
DataSharingState.SlotPtr[WID] = RootS;
|
||||
DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0];
|
||||
|
|
Loading…
Reference in New Issue