forked from OSchip/llvm-project
[libomptarget] Move supporti.h to support.cu
Summary: [libomptarget] Move supporti.h to support.cu Reimplementation of D69652, without the unity build and refactors. Will need a clean build of libomptarget as the cmakelists changed. Reviewers: ABataev, jdoerfert Reviewed By: jdoerfert Subscribers: mgorny, jfb, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D70131
This commit is contained in:
parent
6ea4775900
commit
fd9fa9995c
|
@ -55,6 +55,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
|
||||||
src/omptarget-nvptx.cu
|
src/omptarget-nvptx.cu
|
||||||
src/parallel.cu
|
src/parallel.cu
|
||||||
src/reduction.cu
|
src/reduction.cu
|
||||||
|
src/support.cu
|
||||||
src/sync.cu
|
src/sync.cu
|
||||||
src/task.cu
|
src/task.cu
|
||||||
)
|
)
|
||||||
|
|
|
@ -385,6 +385,5 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId);
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#include "omptarget-nvptxi.h"
|
#include "omptarget-nvptxi.h"
|
||||||
#include "supporti.h"
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
//===--------- supporti.h - NVPTX OpenMP support functions ------- CUDA -*-===//
|
//===--------- support.cu - NVPTX OpenMP support functions ------- CUDA -*-===//
|
||||||
//
|
//
|
||||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
// See https://llvm.org/LICENSE.txt for license information.
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
@ -10,26 +10,28 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "support.h"
|
||||||
|
#include "debug.h"
|
||||||
|
#include "omptarget-nvptx.h"
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Execution Parameters
|
// Execution Parameters
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#include "target_impl.h"
|
DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
|
||||||
|
|
||||||
INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
|
|
||||||
execution_param = EMode;
|
execution_param = EMode;
|
||||||
execution_param |= RMode;
|
execution_param |= RMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
|
DEVICE bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
|
||||||
|
|
||||||
INLINE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; }
|
DEVICE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; }
|
||||||
|
|
||||||
INLINE bool isRuntimeUninitialized() {
|
DEVICE bool isRuntimeUninitialized() {
|
||||||
return (execution_param & RuntimeMask) == RuntimeUninitialized;
|
return (execution_param & RuntimeMask) == RuntimeUninitialized;
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE bool isRuntimeInitialized() {
|
DEVICE bool isRuntimeInitialized() {
|
||||||
return (execution_param & RuntimeMask) == RuntimeInitialized;
|
return (execution_param & RuntimeMask) == RuntimeInitialized;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,7 +39,7 @@ INLINE bool isRuntimeInitialized() {
|
||||||
// Execution Modes based on location parameter fields
|
// Execution Modes based on location parameter fields
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
INLINE bool checkSPMDMode(kmp_Ident *loc) {
|
DEVICE bool checkSPMDMode(kmp_Ident *loc) {
|
||||||
if (!loc)
|
if (!loc)
|
||||||
return isSPMDMode();
|
return isSPMDMode();
|
||||||
|
|
||||||
|
@ -55,11 +57,11 @@ INLINE bool checkSPMDMode(kmp_Ident *loc) {
|
||||||
return isSPMDMode();
|
return isSPMDMode();
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE bool checkGenericMode(kmp_Ident *loc) {
|
DEVICE bool checkGenericMode(kmp_Ident *loc) {
|
||||||
return !checkSPMDMode(loc);
|
return !checkSPMDMode(loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) {
|
DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc) {
|
||||||
if (!loc)
|
if (!loc)
|
||||||
return isRuntimeUninitialized();
|
return isRuntimeUninitialized();
|
||||||
|
|
||||||
|
@ -82,7 +84,7 @@ INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) {
|
||||||
return isRuntimeUninitialized();
|
return isRuntimeUninitialized();
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE bool checkRuntimeInitialized(kmp_Ident *loc) {
|
DEVICE bool checkRuntimeInitialized(kmp_Ident *loc) {
|
||||||
return !checkRuntimeUninitialized(loc);
|
return !checkRuntimeUninitialized(loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,17 +98,17 @@ INLINE bool checkRuntimeInitialized(kmp_Ident *loc) {
|
||||||
//
|
//
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
INLINE int GetThreadIdInBlock() { return threadIdx.x; }
|
DEVICE int GetThreadIdInBlock() { return threadIdx.x; }
|
||||||
|
|
||||||
INLINE int GetBlockIdInKernel() { return blockIdx.x; }
|
DEVICE int GetBlockIdInKernel() { return blockIdx.x; }
|
||||||
|
|
||||||
INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
|
DEVICE int GetNumberOfBlocksInKernel() { return gridDim.x; }
|
||||||
|
|
||||||
INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
|
DEVICE int GetNumberOfThreadsInBlock() { return blockDim.x; }
|
||||||
|
|
||||||
INLINE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; }
|
DEVICE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; }
|
||||||
|
|
||||||
INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
|
DEVICE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
|
@ -122,11 +124,11 @@ INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
|
||||||
// If NumThreads is 1024, master id is 992.
|
// If NumThreads is 1024, master id is 992.
|
||||||
//
|
//
|
||||||
// Called in Generic Execution Mode only.
|
// Called in Generic Execution Mode only.
|
||||||
INLINE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
|
DEVICE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
|
||||||
|
|
||||||
// The last warp is reserved for the master; other warps are workers.
|
// The last warp is reserved for the master; other warps are workers.
|
||||||
// Called in Generic Execution Mode only.
|
// Called in Generic Execution Mode only.
|
||||||
INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
|
DEVICE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// get thread id in team
|
// get thread id in team
|
||||||
|
@ -135,7 +137,7 @@ INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
|
||||||
// or a serial region by the master. If the master (whose CUDA thread
|
// or a serial region by the master. If the master (whose CUDA thread
|
||||||
// id is GetMasterThreadID()) calls this routine, we return 0 because
|
// id is GetMasterThreadID()) calls this routine, we return 0 because
|
||||||
// it is a shadow for the first worker.
|
// it is a shadow for the first worker.
|
||||||
INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
|
DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
|
||||||
// Implemented using control flow (predication) instead of with a modulo
|
// Implemented using control flow (predication) instead of with a modulo
|
||||||
// operation.
|
// operation.
|
||||||
int tid = GetThreadIdInBlock();
|
int tid = GetThreadIdInBlock();
|
||||||
|
@ -151,7 +153,7 @@ INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
|
||||||
//
|
//
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
INLINE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
|
DEVICE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
|
||||||
// omp_thread_num
|
// omp_thread_num
|
||||||
int rc;
|
int rc;
|
||||||
if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) {
|
if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) {
|
||||||
|
@ -167,7 +169,7 @@ INLINE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
|
DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
|
||||||
// omp_num_threads
|
// omp_num_threads
|
||||||
int rc;
|
int rc;
|
||||||
int Level = parallelLevel[GetWarpId()];
|
int Level = parallelLevel[GetWarpId()];
|
||||||
|
@ -185,12 +187,12 @@ INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Team id linked to OpenMP
|
// Team id linked to OpenMP
|
||||||
|
|
||||||
INLINE int GetOmpTeamId() {
|
DEVICE int GetOmpTeamId() {
|
||||||
// omp_team_num
|
// omp_team_num
|
||||||
return GetBlockIdInKernel(); // assume 1 block per team
|
return GetBlockIdInKernel(); // assume 1 block per team
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE int GetNumberOfOmpTeams() {
|
DEVICE int GetNumberOfOmpTeams() {
|
||||||
// omp_num_teams
|
// omp_num_teams
|
||||||
return GetNumberOfBlocksInKernel(); // assume 1 block per team
|
return GetNumberOfBlocksInKernel(); // assume 1 block per team
|
||||||
}
|
}
|
||||||
|
@ -198,12 +200,12 @@ INLINE int GetNumberOfOmpTeams() {
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Masters
|
// Masters
|
||||||
|
|
||||||
INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
|
DEVICE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Parallel level
|
// Parallel level
|
||||||
|
|
||||||
INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
|
DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
|
||||||
__kmpc_impl_syncwarp(Mask);
|
__kmpc_impl_syncwarp(Mask);
|
||||||
__kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
|
__kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
|
||||||
unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
|
unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
|
||||||
|
@ -215,7 +217,7 @@ INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
|
||||||
__kmpc_impl_syncwarp(Mask);
|
__kmpc_impl_syncwarp(Mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
|
DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
|
||||||
__kmpc_impl_syncwarp(Mask);
|
__kmpc_impl_syncwarp(Mask);
|
||||||
__kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
|
__kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
|
||||||
unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
|
unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
|
||||||
|
@ -231,13 +233,13 @@ INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
|
||||||
// get OpenMP number of procs
|
// get OpenMP number of procs
|
||||||
|
|
||||||
// Get the number of processors in the device.
|
// Get the number of processors in the device.
|
||||||
INLINE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) {
|
DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) {
|
||||||
if (!isSPMDExecutionMode)
|
if (!isSPMDExecutionMode)
|
||||||
return GetNumberOfWorkersInTeam();
|
return GetNumberOfWorkersInTeam();
|
||||||
return GetNumberOfThreadsInBlock();
|
return GetNumberOfThreadsInBlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
|
DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
|
||||||
return GetNumberOfProcsInDevice(isSPMDExecutionMode);
|
return GetNumberOfProcsInDevice(isSPMDExecutionMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -245,7 +247,7 @@ INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
|
||||||
// Memory
|
// Memory
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
INLINE unsigned long PadBytes(unsigned long size,
|
DEVICE unsigned long PadBytes(unsigned long size,
|
||||||
unsigned long alignment) // must be a power of 2
|
unsigned long alignment) // must be a power of 2
|
||||||
{
|
{
|
||||||
// compute the necessary padding to satisfy alignment constraint
|
// compute the necessary padding to satisfy alignment constraint
|
||||||
|
@ -254,7 +256,7 @@ INLINE unsigned long PadBytes(unsigned long size,
|
||||||
return (~(unsigned long)size + 1) & (alignment - 1);
|
return (~(unsigned long)size + 1) & (alignment - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE void *SafeMalloc(size_t size, const char *msg) // check if success
|
DEVICE void *SafeMalloc(size_t size, const char *msg) // check if success
|
||||||
{
|
{
|
||||||
void *ptr = malloc(size);
|
void *ptr = malloc(size);
|
||||||
PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n",
|
PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n",
|
||||||
|
@ -262,7 +264,7 @@ INLINE void *SafeMalloc(size_t size, const char *msg) // check if success
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE void *SafeFree(void *ptr, const char *msg) {
|
DEVICE void *SafeFree(void *ptr, const char *msg) {
|
||||||
PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
|
PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
|
||||||
free(ptr);
|
free(ptr);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -272,14 +274,14 @@ INLINE void *SafeFree(void *ptr, const char *msg) {
|
||||||
// Teams Reduction Scratchpad Helpers
|
// Teams Reduction Scratchpad Helpers
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
INLINE unsigned int *GetTeamsReductionTimestamp() {
|
DEVICE unsigned int *GetTeamsReductionTimestamp() {
|
||||||
return static_cast<unsigned int *>(ReductionScratchpadPtr);
|
return static_cast<unsigned int *>(ReductionScratchpadPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE char *GetTeamsReductionScratchpad() {
|
DEVICE char *GetTeamsReductionScratchpad() {
|
||||||
return static_cast<char *>(ReductionScratchpadPtr) + 256;
|
return static_cast<char *>(ReductionScratchpadPtr) + 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) {
|
DEVICE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) {
|
||||||
ReductionScratchpadPtr = ScratchpadPtr;
|
ReductionScratchpadPtr = ScratchpadPtr;
|
||||||
}
|
}
|
|
@ -10,7 +10,12 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef OMPTARGET_SUPPORT_H
|
||||||
|
#define OMPTARGET_SUPPORT_H
|
||||||
|
|
||||||
|
#include "interface.h"
|
||||||
#include "target_impl.h"
|
#include "target_impl.h"
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Execution Parameters
|
// Execution Parameters
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -26,58 +31,67 @@ enum RuntimeMode {
|
||||||
RuntimeMask = 0x02u,
|
RuntimeMask = 0x02u,
|
||||||
};
|
};
|
||||||
|
|
||||||
INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode);
|
DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode);
|
||||||
INLINE bool isGenericMode();
|
DEVICE bool isGenericMode();
|
||||||
INLINE bool isSPMDMode();
|
DEVICE bool isSPMDMode();
|
||||||
INLINE bool isRuntimeUninitialized();
|
DEVICE bool isRuntimeUninitialized();
|
||||||
INLINE bool isRuntimeInitialized();
|
DEVICE bool isRuntimeInitialized();
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Execution Modes based on location parameter fields
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
DEVICE bool checkSPMDMode(kmp_Ident *loc);
|
||||||
|
DEVICE bool checkGenericMode(kmp_Ident *loc);
|
||||||
|
DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc);
|
||||||
|
DEVICE bool checkRuntimeInitialized(kmp_Ident *loc);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// get info from machine
|
// get info from machine
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
// get low level ids of resources
|
// get low level ids of resources
|
||||||
INLINE int GetThreadIdInBlock();
|
DEVICE int GetThreadIdInBlock();
|
||||||
INLINE int GetBlockIdInKernel();
|
DEVICE int GetBlockIdInKernel();
|
||||||
INLINE int GetNumberOfBlocksInKernel();
|
DEVICE int GetNumberOfBlocksInKernel();
|
||||||
INLINE int GetNumberOfThreadsInBlock();
|
DEVICE int GetNumberOfThreadsInBlock();
|
||||||
INLINE unsigned GetWarpId();
|
DEVICE unsigned GetWarpId();
|
||||||
INLINE unsigned GetLaneId();
|
DEVICE unsigned GetLaneId();
|
||||||
|
|
||||||
// get global ids to locate tread/team info (constant regardless of OMP)
|
// get global ids to locate tread/team info (constant regardless of OMP)
|
||||||
INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode);
|
DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode);
|
||||||
INLINE int GetMasterThreadID();
|
DEVICE int GetMasterThreadID();
|
||||||
INLINE int GetNumberOfWorkersInTeam();
|
DEVICE int GetNumberOfWorkersInTeam();
|
||||||
|
|
||||||
// get OpenMP thread and team ids
|
// get OpenMP thread and team ids
|
||||||
INLINE int GetOmpThreadId(int threadId,
|
DEVICE int GetOmpThreadId(int threadId,
|
||||||
bool isSPMDExecutionMode); // omp_thread_num
|
bool isSPMDExecutionMode); // omp_thread_num
|
||||||
INLINE int GetOmpTeamId(); // omp_team_num
|
DEVICE int GetOmpTeamId(); // omp_team_num
|
||||||
|
|
||||||
// get OpenMP number of threads and team
|
// get OpenMP number of threads and team
|
||||||
INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads
|
DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads
|
||||||
INLINE int GetNumberOfOmpTeams(); // omp_num_teams
|
DEVICE int GetNumberOfOmpTeams(); // omp_num_teams
|
||||||
|
|
||||||
// get OpenMP number of procs
|
// get OpenMP number of procs
|
||||||
INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode);
|
DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode);
|
||||||
INLINE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode);
|
DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode);
|
||||||
|
|
||||||
// masters
|
// masters
|
||||||
INLINE int IsTeamMaster(int ompThreadId);
|
DEVICE int IsTeamMaster(int ompThreadId);
|
||||||
|
|
||||||
// Parallel level
|
// Parallel level
|
||||||
INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
|
DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
|
||||||
INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
|
DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Memory
|
// Memory
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
// safe alloc and free
|
// safe alloc and free
|
||||||
INLINE void *SafeMalloc(size_t size, const char *msg); // check if success
|
DEVICE void *SafeMalloc(size_t size, const char *msg); // check if success
|
||||||
INLINE void *SafeFree(void *ptr, const char *msg);
|
DEVICE void *SafeFree(void *ptr, const char *msg);
|
||||||
// pad to a alignment (power of 2 only)
|
// pad to a alignment (power of 2 only)
|
||||||
INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment);
|
DEVICE unsigned long PadBytes(unsigned long size, unsigned long alignment);
|
||||||
#define ADD_BYTES(_addr, _bytes) \
|
#define ADD_BYTES(_addr, _bytes) \
|
||||||
((void *)((char *)((void *)(_addr)) + (_bytes)))
|
((void *)((char *)((void *)(_addr)) + (_bytes)))
|
||||||
#define SUB_BYTES(_addr, _bytes) \
|
#define SUB_BYTES(_addr, _bytes) \
|
||||||
|
@ -86,6 +100,8 @@ INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment);
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Teams Reduction Scratchpad Helpers
|
// Teams Reduction Scratchpad Helpers
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
INLINE unsigned int *GetTeamsReductionTimestamp();
|
DEVICE unsigned int *GetTeamsReductionTimestamp();
|
||||||
INLINE char *GetTeamsReductionScratchpad();
|
DEVICE char *GetTeamsReductionScratchpad();
|
||||||
INLINE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr);
|
DEVICE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -15,8 +15,9 @@
|
||||||
#include <cuda.h>
|
#include <cuda.h>
|
||||||
#include "nvptx_interface.h"
|
#include "nvptx_interface.h"
|
||||||
|
|
||||||
#define INLINE __forceinline__ __device__
|
#define DEVICE __device__
|
||||||
#define NOINLINE __noinline__ __device__
|
#define INLINE __forceinline__ DEVICE
|
||||||
|
#define NOINLINE __noinline__ DEVICE
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Kernel options
|
// Kernel options
|
||||||
|
|
Loading…
Reference in New Issue