From 68d6278a6e9879794850e879a036774544b49e99 Mon Sep 17 00:00:00 2001 From: Jose M Monsalve Diaz Date: Thu, 22 Jul 2021 18:17:09 -0400 Subject: [PATCH] [OpenMP] Renaming RT functions `GetNumberOfBlocksInKernel` and `GetNumberOfThreadsInBlock` These functions should follow the camel case convention. These are really easy to change and are needed for D106033. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D106390 --- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip | 6 +++--- openmp/libomptarget/deviceRTLs/common/src/libcall.cu | 4 ++-- openmp/libomptarget/deviceRTLs/common/src/omptarget.cu | 4 +++- openmp/libomptarget/deviceRTLs/common/src/reduction.cu | 2 +- openmp/libomptarget/deviceRTLs/common/src/support.cu | 8 ++++---- openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu | 6 ++++-- openmp/libomptarget/deviceRTLs/target_interface.h | 4 ++-- 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip index 2bbeab73b136..2c6b8886b8ad 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -122,12 +122,12 @@ uint32_t get_workgroup_dim(uint32_t group_id, uint32_t grid_size, } } // namespace -EXTERN int GetNumberOfBlocksInKernel() { +EXTERN int __kmpc_get_hardware_num_blocks() { return get_grid_dim(__builtin_amdgcn_grid_size_x(), __builtin_amdgcn_workgroup_size_x()); } -EXTERN int GetNumberOfThreadsInBlock() { +EXTERN int __kmpc_get_hardware_num_threads_in_block() { return get_workgroup_dim(__builtin_amdgcn_workgroup_id_x(), __builtin_amdgcn_grid_size_x(), __builtin_amdgcn_workgroup_size_x()); @@ -140,7 +140,7 @@ EXTERN unsigned GetLaneId() { } EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() { - return GetNumberOfThreadsInBlock(); + return __kmpc_get_hardware_num_threads_in_block(); } // Atomics diff --git a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu index 9c62b5bb14c0..f1511298a99b 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu @@ -61,7 +61,7 @@ EXTERN int omp_get_max_threads(void) { EXTERN int omp_get_thread_limit(void) { if (__kmpc_is_spmd_exec_mode()) - return GetNumberOfThreadsInBlock(); + return __kmpc_get_hardware_num_threads_in_block(); int rc = threadLimit; PRINT(LD_IO, "call omp_get_thread_limit() return %d\n", rc); return rc; @@ -196,7 +196,7 @@ EXTERN int omp_get_ancestor_thread_num(int level) { EXTERN int omp_get_team_size(int level) { if (__kmpc_is_spmd_exec_mode()) - return level == 1 ? GetNumberOfThreadsInBlock() : 1; + return level == 1 ? __kmpc_get_hardware_num_threads_in_block() : 1; int rc = -1; unsigned parLevel = parallelLevel[GetWarpId()]; // If level is 0 or all parallel regions are not active - return 1. diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu index ce016ceb8acd..7f7982cbc3b8 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu @@ -94,7 +94,9 @@ static void __kmpc_spmd_kernel_init(bool RequiresFullRuntime) { if (GetLaneId() == 0) { parallelLevel[GetWarpId()] = - 1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0); + 1 + (__kmpc_get_hardware_num_threads_in_block() > 1 + ? OMP_ACTIVE_PARALLEL_LEVEL + : 0); } __kmpc_data_sharing_init_stack(); diff --git a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu index 3a658f50a996..e975c8d14018 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu @@ -199,7 +199,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2( __kmpc_is_spmd_exec_mode() ? GetNumberOfOmpThreads(/*isSPMDExecutionMode=*/true) : /*Master thread only*/ 1; uint32_t TeamId = GetBlockIdInKernel(); - uint32_t NumTeams = GetNumberOfBlocksInKernel(); + uint32_t NumTeams = __kmpc_get_hardware_num_blocks(); static unsigned SHARED(Bound); static unsigned SHARED(ChunkTeamCount); diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu index 3d93ea00fbae..a0a8116bb825 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/support.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu @@ -53,7 +53,7 @@ bool isRuntimeInitialized() { // // Called in Generic Execution Mode only. int GetMasterThreadID() { - return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1); + return (__kmpc_get_hardware_num_threads_in_block() - 1) & ~(WARPSIZE - 1); } // The last warp is reserved for the master; other warps are workers. @@ -109,7 +109,7 @@ int GetNumberOfOmpThreads(bool isSPMDExecutionMode) { if (Level != OMP_ACTIVE_PARALLEL_LEVEL + 1) { rc = 1; } else if (isSPMDExecutionMode) { - rc = GetNumberOfThreadsInBlock(); + rc = __kmpc_get_hardware_num_threads_in_block(); } else { rc = threadsInTeam; } @@ -127,7 +127,7 @@ int GetOmpTeamId() { int GetNumberOfOmpTeams() { // omp_num_teams - return GetNumberOfBlocksInKernel(); // assume 1 block per team + return __kmpc_get_hardware_num_blocks(); // assume 1 block per team } //////////////////////////////////////////////////////////////////////////////// @@ -169,7 +169,7 @@ void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) { if (!isSPMDExecutionMode) return GetNumberOfWorkersInTeam(); - return GetNumberOfThreadsInBlock(); + return __kmpc_get_hardware_num_threads_in_block(); } int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) { diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu index c1b4007f1dcb..896da1e26075 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu @@ -96,10 +96,12 @@ EXTERN int __kmpc_get_hardware_thread_id_in_block() { return __nvvm_read_ptx_sreg_tid_x(); } EXTERN int GetBlockIdInKernel() { return __nvvm_read_ptx_sreg_ctaid_x(); } -EXTERN int GetNumberOfBlocksInKernel() { +EXTERN int __kmpc_get_hardware_num_blocks() { return __nvvm_read_ptx_sreg_nctaid_x(); } -EXTERN int GetNumberOfThreadsInBlock() { return __nvvm_read_ptx_sreg_ntid_x(); } +EXTERN int __kmpc_get_hardware_num_threads_in_block() { + return __nvvm_read_ptx_sreg_ntid_x(); +} EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; } diff --git a/openmp/libomptarget/deviceRTLs/target_interface.h b/openmp/libomptarget/deviceRTLs/target_interface.h index 7e6ae0dc16da..a4961c3b95bb 100644 --- a/openmp/libomptarget/deviceRTLs/target_interface.h +++ b/openmp/libomptarget/deviceRTLs/target_interface.h @@ -18,8 +18,8 @@ // Calls to the NVPTX layer (assuming 1D layout) EXTERN int __kmpc_get_hardware_thread_id_in_block(); EXTERN int GetBlockIdInKernel(); -EXTERN int GetNumberOfBlocksInKernel(); -EXTERN int GetNumberOfThreadsInBlock(); +EXTERN int __kmpc_get_hardware_num_blocks(); +EXTERN int __kmpc_get_hardware_num_threads_in_block(); EXTERN unsigned GetWarpId(); EXTERN unsigned GetWarpSize(); EXTERN unsigned GetLaneId();