forked from OSchip/llvm-project
[libomptarget][nfc] Update remaining uint32 to use lanemask_t
Summary: [libomptarget][nfc] Update remaining uint32 to use lanemask_t Update a few functions in the API to use lanemask_t instead of i32. NFC for nvptx. Also update the ActiveThreads type in DataSharingStateTy. This removes a lot of #ifdef from the downsteam amdgcn implementation. Reviewers: ABataev, jdoerfert, grokos, ronlieb, RaviNarayanaswamy Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D68513 llvm-svn: 373806
This commit is contained in:
parent
67cfa79c01
commit
58fd6b5b9c
|
@ -96,7 +96,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
|
|||
|
||||
EXTERN void *__kmpc_data_sharing_environment_begin(
|
||||
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
|
||||
void **SavedSharedFrame, int32_t *SavedActiveThreads,
|
||||
void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
|
||||
size_t SharingDataSize, size_t SharingDefaultDataSize,
|
||||
int16_t IsOMPRuntimeInitialized) {
|
||||
|
||||
|
@ -117,7 +117,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
|
|||
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
|
||||
void *&StackP = DataSharingState.StackPtr[WID];
|
||||
void * volatile &FrameP = DataSharingState.FramePtr[WID];
|
||||
int32_t &ActiveT = DataSharingState.ActiveThreads[WID];
|
||||
__kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
|
||||
|
||||
DSPRINT0(DSFLAG, "Save current slot/stack values.\n");
|
||||
// Save the current values.
|
||||
|
@ -225,7 +225,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
|
|||
|
||||
EXTERN void __kmpc_data_sharing_environment_end(
|
||||
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
|
||||
void **SavedSharedFrame, int32_t *SavedActiveThreads,
|
||||
void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
|
||||
int32_t IsEntryPoint) {
|
||||
|
||||
DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n");
|
||||
|
@ -260,7 +260,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
|
|||
// assume that threads will converge right after the call site that started
|
||||
// the environment.
|
||||
if (IsWarpMasterActiveThread()) {
|
||||
int32_t &ActiveT = DataSharingState.ActiveThreads[WID];
|
||||
__kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
|
||||
|
||||
DSPRINT0(DSFLAG, "Before restoring the stack\n");
|
||||
// Zero the bits in the mask. If it is still different from zero, then we
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#define _INTERFACES_H_
|
||||
|
||||
#include "option.h"
|
||||
#include "target_impl.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// OpenMP interface
|
||||
|
@ -422,9 +423,9 @@ EXTERN void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
|
|||
EXTERN void __kmpc_flush(kmp_Ident *loc);
|
||||
|
||||
// vote
|
||||
EXTERN int32_t __kmpc_warp_active_thread_mask();
|
||||
EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask();
|
||||
// syncwarp
|
||||
EXTERN void __kmpc_syncwarp(int32_t);
|
||||
EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t);
|
||||
|
||||
// tasks
|
||||
EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc,
|
||||
|
@ -475,11 +476,13 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
|
|||
EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
|
||||
int16_t IsOMPRuntimeInitialized);
|
||||
EXTERN void __kmpc_kernel_end_parallel();
|
||||
EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
|
||||
EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer,
|
||||
__kmpc_impl_lanemask_t Mask,
|
||||
bool *IsFinal,
|
||||
int32_t *LaneSource);
|
||||
EXTERN void __kmpc_kernel_end_convergent_parallel(void *buffer);
|
||||
EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask,
|
||||
EXTERN bool __kmpc_kernel_convergent_simd(void *buffer,
|
||||
__kmpc_impl_lanemask_t Mask,
|
||||
bool *IsFinal, int32_t *LaneSource,
|
||||
int32_t *LaneId, int32_t *NumLanes);
|
||||
EXTERN void __kmpc_kernel_end_convergent_simd(void *buffer);
|
||||
|
@ -510,12 +513,13 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *RootS,
|
|||
size_t InitialDataSize);
|
||||
EXTERN void *__kmpc_data_sharing_environment_begin(
|
||||
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
|
||||
void **SavedSharedFrame, int32_t *SavedActiveThreads,
|
||||
void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
|
||||
size_t SharingDataSize, size_t SharingDefaultDataSize,
|
||||
int16_t IsOMPRuntimeInitialized);
|
||||
EXTERN void __kmpc_data_sharing_environment_end(
|
||||
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
|
||||
void **SavedSharedFrame, int32_t *SavedActiveThreads, int32_t IsEntryPoint);
|
||||
void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
|
||||
int32_t IsEntryPoint);
|
||||
|
||||
EXTERN void *
|
||||
__kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID,
|
||||
|
|
|
@ -380,7 +380,8 @@ public:
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Support for dispatch next
|
||||
|
||||
INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) {
|
||||
INLINE static uint64_t Shuffle(__kmpc_impl_lanemask_t active, int64_t val,
|
||||
int leader) {
|
||||
uint32_t lo, hi;
|
||||
__kmpc_impl_unpack(val, lo, hi);
|
||||
hi = __kmpc_impl_shfl_sync(active, hi, leader);
|
||||
|
|
|
@ -107,7 +107,7 @@ struct DataSharingStateTy {
|
|||
__kmpc_data_sharing_slot *SlotPtr[DS_Max_Warp_Number];
|
||||
void *StackPtr[DS_Max_Warp_Number];
|
||||
void * volatile FramePtr[DS_Max_Warp_Number];
|
||||
int32_t ActiveThreads[DS_Max_Warp_Number];
|
||||
__kmpc_impl_lanemask_t ActiveThreads[DS_Max_Warp_Number];
|
||||
};
|
||||
// Additional worker slot type which is initialized with the default worker slot
|
||||
// size of 4*32 bytes.
|
||||
|
|
|
@ -44,13 +44,14 @@ typedef struct ConvergentSimdJob {
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// support for convergent simd (team of threads in a warp only)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask,
|
||||
EXTERN bool __kmpc_kernel_convergent_simd(void *buffer,
|
||||
__kmpc_impl_lanemask_t Mask,
|
||||
bool *IsFinal, int32_t *LaneSource,
|
||||
int32_t *LaneId, int32_t *NumLanes) {
|
||||
PRINT0(LD_IO, "call to __kmpc_kernel_convergent_simd\n");
|
||||
uint32_t ConvergentMask = Mask;
|
||||
__kmpc_impl_lanemask_t ConvergentMask = Mask;
|
||||
int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask);
|
||||
uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
|
||||
__kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
|
||||
*LaneSource += __kmpc_impl_ffs(WorkRemaining);
|
||||
*IsFinal = __kmpc_impl_popc(WorkRemaining) == 1;
|
||||
__kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();
|
||||
|
@ -117,13 +118,14 @@ typedef struct ConvergentParallelJob {
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// support for convergent parallelism (team of threads in a warp only)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
|
||||
EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer,
|
||||
__kmpc_impl_lanemask_t Mask,
|
||||
bool *IsFinal,
|
||||
int32_t *LaneSource) {
|
||||
PRINT0(LD_IO, "call to __kmpc_kernel_convergent_parallel\n");
|
||||
uint32_t ConvergentMask = Mask;
|
||||
__kmpc_impl_lanemask_t ConvergentMask = Mask;
|
||||
int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask);
|
||||
uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
|
||||
__kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
|
||||
*LaneSource += __kmpc_impl_ffs(WorkRemaining);
|
||||
*IsFinal = __kmpc_impl_popc(WorkRemaining) == 1;
|
||||
__kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();
|
||||
|
|
|
@ -140,7 +140,7 @@ EXTERN void __kmpc_flush(kmp_Ident *loc) {
|
|||
// Vote
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
EXTERN int32_t __kmpc_warp_active_thread_mask() {
|
||||
EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() {
|
||||
PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
|
||||
return __kmpc_impl_activemask();
|
||||
}
|
||||
|
@ -149,7 +149,7 @@ EXTERN int32_t __kmpc_warp_active_thread_mask() {
|
|||
// Syncwarp
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
EXTERN void __kmpc_syncwarp(int32_t Mask) {
|
||||
EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) {
|
||||
PRINT0(LD_IO, "call __kmpc_syncwarp\n");
|
||||
__kmpc_impl_syncwarp(Mask);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue