[libomptarget] Move supporti.h to support.cu

Summary:
[libomptarget] Move supporti.h to support.cu
Reimplementation of D69652, without the unity build and refactors.
Will need a clean build of libomptarget as the cmakelists changed.

Reviewers: ABataev, jdoerfert

Reviewed By: jdoerfert

Subscribers: mgorny, jfb, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D70131
This commit is contained in:
JonChesterfield 2019-11-13 11:36:30 +00:00 committed by Jon Chesterfield
parent 6ea4775900
commit fd9fa9995c
5 changed files with 87 additions and 68 deletions

View File

@ -55,6 +55,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
src/omptarget-nvptx.cu src/omptarget-nvptx.cu
src/parallel.cu src/parallel.cu
src/reduction.cu src/reduction.cu
src/support.cu
src/sync.cu src/sync.cu
src/task.cu src/task.cu
) )

View File

@ -385,6 +385,5 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
#include "omptarget-nvptxi.h" #include "omptarget-nvptxi.h"
#include "supporti.h"
#endif #endif

View File

@ -1,4 +1,4 @@
//===--------- supporti.h - NVPTX OpenMP support functions ------- CUDA -*-===// //===--------- support.cu - NVPTX OpenMP support functions ------- CUDA -*-===//
// //
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information. // See https://llvm.org/LICENSE.txt for license information.
@ -10,26 +10,28 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "support.h"
#include "debug.h"
#include "omptarget-nvptx.h"
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Execution Parameters // Execution Parameters
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
#include "target_impl.h" DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
execution_param = EMode; execution_param = EMode;
execution_param |= RMode; execution_param |= RMode;
} }
INLINE bool isGenericMode() { return (execution_param & ModeMask) == Generic; } DEVICE bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
INLINE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; } DEVICE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; }
INLINE bool isRuntimeUninitialized() { DEVICE bool isRuntimeUninitialized() {
return (execution_param & RuntimeMask) == RuntimeUninitialized; return (execution_param & RuntimeMask) == RuntimeUninitialized;
} }
INLINE bool isRuntimeInitialized() { DEVICE bool isRuntimeInitialized() {
return (execution_param & RuntimeMask) == RuntimeInitialized; return (execution_param & RuntimeMask) == RuntimeInitialized;
} }
@ -37,7 +39,7 @@ INLINE bool isRuntimeInitialized() {
// Execution Modes based on location parameter fields // Execution Modes based on location parameter fields
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
INLINE bool checkSPMDMode(kmp_Ident *loc) { DEVICE bool checkSPMDMode(kmp_Ident *loc) {
if (!loc) if (!loc)
return isSPMDMode(); return isSPMDMode();
@ -55,11 +57,11 @@ INLINE bool checkSPMDMode(kmp_Ident *loc) {
return isSPMDMode(); return isSPMDMode();
} }
INLINE bool checkGenericMode(kmp_Ident *loc) { DEVICE bool checkGenericMode(kmp_Ident *loc) {
return !checkSPMDMode(loc); return !checkSPMDMode(loc);
} }
INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) { DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc) {
if (!loc) if (!loc)
return isRuntimeUninitialized(); return isRuntimeUninitialized();
@ -82,7 +84,7 @@ INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) {
return isRuntimeUninitialized(); return isRuntimeUninitialized();
} }
INLINE bool checkRuntimeInitialized(kmp_Ident *loc) { DEVICE bool checkRuntimeInitialized(kmp_Ident *loc) {
return !checkRuntimeUninitialized(loc); return !checkRuntimeUninitialized(loc);
} }
@ -96,17 +98,17 @@ INLINE bool checkRuntimeInitialized(kmp_Ident *loc) {
// //
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
INLINE int GetThreadIdInBlock() { return threadIdx.x; } DEVICE int GetThreadIdInBlock() { return threadIdx.x; }
INLINE int GetBlockIdInKernel() { return blockIdx.x; } DEVICE int GetBlockIdInKernel() { return blockIdx.x; }
INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; } DEVICE int GetNumberOfBlocksInKernel() { return gridDim.x; }
INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; } DEVICE int GetNumberOfThreadsInBlock() { return blockDim.x; }
INLINE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; } DEVICE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; }
INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); } DEVICE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// //
@ -122,11 +124,11 @@ INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
// If NumThreads is 1024, master id is 992. // If NumThreads is 1024, master id is 992.
// //
// Called in Generic Execution Mode only. // Called in Generic Execution Mode only.
INLINE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); } DEVICE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
// The last warp is reserved for the master; other warps are workers. // The last warp is reserved for the master; other warps are workers.
// Called in Generic Execution Mode only. // Called in Generic Execution Mode only.
INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); } DEVICE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// get thread id in team // get thread id in team
@ -135,7 +137,7 @@ INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
// or a serial region by the master. If the master (whose CUDA thread // or a serial region by the master. If the master (whose CUDA thread
// id is GetMasterThreadID()) calls this routine, we return 0 because // id is GetMasterThreadID()) calls this routine, we return 0 because
// it is a shadow for the first worker. // it is a shadow for the first worker.
INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) { DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
// Implemented using control flow (predication) instead of with a modulo // Implemented using control flow (predication) instead of with a modulo
// operation. // operation.
int tid = GetThreadIdInBlock(); int tid = GetThreadIdInBlock();
@ -151,7 +153,7 @@ INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
// //
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
INLINE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) { DEVICE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
// omp_thread_num // omp_thread_num
int rc; int rc;
if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) { if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) {
@ -167,7 +169,7 @@ INLINE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
return rc; return rc;
} }
INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) { DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
// omp_num_threads // omp_num_threads
int rc; int rc;
int Level = parallelLevel[GetWarpId()]; int Level = parallelLevel[GetWarpId()];
@ -185,12 +187,12 @@ INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Team id linked to OpenMP // Team id linked to OpenMP
INLINE int GetOmpTeamId() { DEVICE int GetOmpTeamId() {
// omp_team_num // omp_team_num
return GetBlockIdInKernel(); // assume 1 block per team return GetBlockIdInKernel(); // assume 1 block per team
} }
INLINE int GetNumberOfOmpTeams() { DEVICE int GetNumberOfOmpTeams() {
// omp_num_teams // omp_num_teams
return GetNumberOfBlocksInKernel(); // assume 1 block per team return GetNumberOfBlocksInKernel(); // assume 1 block per team
} }
@ -198,12 +200,12 @@ INLINE int GetNumberOfOmpTeams() {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Masters // Masters
INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); } DEVICE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Parallel level // Parallel level
INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
__kmpc_impl_syncwarp(Mask); __kmpc_impl_syncwarp(Mask);
__kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt(); __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt); unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
@ -215,7 +217,7 @@ INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
__kmpc_impl_syncwarp(Mask); __kmpc_impl_syncwarp(Mask);
} }
INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
__kmpc_impl_syncwarp(Mask); __kmpc_impl_syncwarp(Mask);
__kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt(); __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt); unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
@ -231,13 +233,13 @@ INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
// get OpenMP number of procs // get OpenMP number of procs
// Get the number of processors in the device. // Get the number of processors in the device.
INLINE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) { DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) {
if (!isSPMDExecutionMode) if (!isSPMDExecutionMode)
return GetNumberOfWorkersInTeam(); return GetNumberOfWorkersInTeam();
return GetNumberOfThreadsInBlock(); return GetNumberOfThreadsInBlock();
} }
INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) { DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
return GetNumberOfProcsInDevice(isSPMDExecutionMode); return GetNumberOfProcsInDevice(isSPMDExecutionMode);
} }
@ -245,7 +247,7 @@ INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
// Memory // Memory
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
INLINE unsigned long PadBytes(unsigned long size, DEVICE unsigned long PadBytes(unsigned long size,
unsigned long alignment) // must be a power of 2 unsigned long alignment) // must be a power of 2
{ {
// compute the necessary padding to satisfy alignment constraint // compute the necessary padding to satisfy alignment constraint
@ -254,7 +256,7 @@ INLINE unsigned long PadBytes(unsigned long size,
return (~(unsigned long)size + 1) & (alignment - 1); return (~(unsigned long)size + 1) & (alignment - 1);
} }
INLINE void *SafeMalloc(size_t size, const char *msg) // check if success DEVICE void *SafeMalloc(size_t size, const char *msg) // check if success
{ {
void *ptr = malloc(size); void *ptr = malloc(size);
PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n", PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n",
@ -262,7 +264,7 @@ INLINE void *SafeMalloc(size_t size, const char *msg) // check if success
return ptr; return ptr;
} }
INLINE void *SafeFree(void *ptr, const char *msg) { DEVICE void *SafeFree(void *ptr, const char *msg) {
PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg); PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
free(ptr); free(ptr);
return NULL; return NULL;
@ -272,14 +274,14 @@ INLINE void *SafeFree(void *ptr, const char *msg) {
// Teams Reduction Scratchpad Helpers // Teams Reduction Scratchpad Helpers
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
INLINE unsigned int *GetTeamsReductionTimestamp() { DEVICE unsigned int *GetTeamsReductionTimestamp() {
return static_cast<unsigned int *>(ReductionScratchpadPtr); return static_cast<unsigned int *>(ReductionScratchpadPtr);
} }
INLINE char *GetTeamsReductionScratchpad() { DEVICE char *GetTeamsReductionScratchpad() {
return static_cast<char *>(ReductionScratchpadPtr) + 256; return static_cast<char *>(ReductionScratchpadPtr) + 256;
} }
INLINE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) { DEVICE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) {
ReductionScratchpadPtr = ScratchpadPtr; ReductionScratchpadPtr = ScratchpadPtr;
} }

View File

@ -10,7 +10,12 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#ifndef OMPTARGET_SUPPORT_H
#define OMPTARGET_SUPPORT_H
#include "interface.h"
#include "target_impl.h" #include "target_impl.h"
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Execution Parameters // Execution Parameters
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -26,58 +31,67 @@ enum RuntimeMode {
RuntimeMask = 0x02u, RuntimeMask = 0x02u,
}; };
INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode); DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode);
INLINE bool isGenericMode(); DEVICE bool isGenericMode();
INLINE bool isSPMDMode(); DEVICE bool isSPMDMode();
INLINE bool isRuntimeUninitialized(); DEVICE bool isRuntimeUninitialized();
INLINE bool isRuntimeInitialized(); DEVICE bool isRuntimeInitialized();
////////////////////////////////////////////////////////////////////////////////
// Execution Modes based on location parameter fields
////////////////////////////////////////////////////////////////////////////////
DEVICE bool checkSPMDMode(kmp_Ident *loc);
DEVICE bool checkGenericMode(kmp_Ident *loc);
DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc);
DEVICE bool checkRuntimeInitialized(kmp_Ident *loc);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// get info from machine // get info from machine
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// get low level ids of resources // get low level ids of resources
INLINE int GetThreadIdInBlock(); DEVICE int GetThreadIdInBlock();
INLINE int GetBlockIdInKernel(); DEVICE int GetBlockIdInKernel();
INLINE int GetNumberOfBlocksInKernel(); DEVICE int GetNumberOfBlocksInKernel();
INLINE int GetNumberOfThreadsInBlock(); DEVICE int GetNumberOfThreadsInBlock();
INLINE unsigned GetWarpId(); DEVICE unsigned GetWarpId();
INLINE unsigned GetLaneId(); DEVICE unsigned GetLaneId();
// get global ids to locate tread/team info (constant regardless of OMP) // get global ids to locate tread/team info (constant regardless of OMP)
INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode); DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode);
INLINE int GetMasterThreadID(); DEVICE int GetMasterThreadID();
INLINE int GetNumberOfWorkersInTeam(); DEVICE int GetNumberOfWorkersInTeam();
// get OpenMP thread and team ids // get OpenMP thread and team ids
INLINE int GetOmpThreadId(int threadId, DEVICE int GetOmpThreadId(int threadId,
bool isSPMDExecutionMode); // omp_thread_num bool isSPMDExecutionMode); // omp_thread_num
INLINE int GetOmpTeamId(); // omp_team_num DEVICE int GetOmpTeamId(); // omp_team_num
// get OpenMP number of threads and team // get OpenMP number of threads and team
INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads
INLINE int GetNumberOfOmpTeams(); // omp_num_teams DEVICE int GetNumberOfOmpTeams(); // omp_num_teams
// get OpenMP number of procs // get OpenMP number of procs
INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode); DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode);
INLINE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode); DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode);
// masters // masters
INLINE int IsTeamMaster(int ompThreadId); DEVICE int IsTeamMaster(int ompThreadId);
// Parallel level // Parallel level
INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask); DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask); DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Memory // Memory
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// safe alloc and free // safe alloc and free
INLINE void *SafeMalloc(size_t size, const char *msg); // check if success DEVICE void *SafeMalloc(size_t size, const char *msg); // check if success
INLINE void *SafeFree(void *ptr, const char *msg); DEVICE void *SafeFree(void *ptr, const char *msg);
// pad to a alignment (power of 2 only) // pad to a alignment (power of 2 only)
INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment); DEVICE unsigned long PadBytes(unsigned long size, unsigned long alignment);
#define ADD_BYTES(_addr, _bytes) \ #define ADD_BYTES(_addr, _bytes) \
((void *)((char *)((void *)(_addr)) + (_bytes))) ((void *)((char *)((void *)(_addr)) + (_bytes)))
#define SUB_BYTES(_addr, _bytes) \ #define SUB_BYTES(_addr, _bytes) \
@ -86,6 +100,8 @@ INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Teams Reduction Scratchpad Helpers // Teams Reduction Scratchpad Helpers
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
INLINE unsigned int *GetTeamsReductionTimestamp(); DEVICE unsigned int *GetTeamsReductionTimestamp();
INLINE char *GetTeamsReductionScratchpad(); DEVICE char *GetTeamsReductionScratchpad();
INLINE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr); DEVICE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr);
#endif

View File

@ -15,8 +15,9 @@
#include <cuda.h> #include <cuda.h>
#include "nvptx_interface.h" #include "nvptx_interface.h"
#define INLINE __forceinline__ __device__ #define DEVICE __device__
#define NOINLINE __noinline__ __device__ #define INLINE __forceinline__ DEVICE
#define NOINLINE __noinline__ DEVICE
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Kernel options // Kernel options