[libomptarget][nfc] Update remaining uint32 to use lanemask_t

Summary:
[libomptarget][nfc] Update remaining uint32 to use lanemask_t

Update a few functions in the API to use lanemask_t instead of i32. NFC for
nvptx. Also update the ActiveThreads type in DataSharingStateTy.
This removes a lot of #ifdef from the downsteam amdgcn implementation.

Reviewers: ABataev, jdoerfert, grokos, ronlieb, RaviNarayanaswamy

Subscribers: openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D68513

llvm-svn: 373806
This commit is contained in:
Jon Chesterfield 2019-10-04 22:30:28 +00:00
parent 67cfa79c01
commit 58fd6b5b9c
6 changed files with 27 additions and 20 deletions

View File

@ -96,7 +96,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
EXTERN void *__kmpc_data_sharing_environment_begin(
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
void **SavedSharedFrame, int32_t *SavedActiveThreads,
void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
size_t SharingDataSize, size_t SharingDefaultDataSize,
int16_t IsOMPRuntimeInitialized) {
@ -117,7 +117,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
void *&StackP = DataSharingState.StackPtr[WID];
void * volatile &FrameP = DataSharingState.FramePtr[WID];
int32_t &ActiveT = DataSharingState.ActiveThreads[WID];
__kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
DSPRINT0(DSFLAG, "Save current slot/stack values.\n");
// Save the current values.
@ -225,7 +225,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
EXTERN void __kmpc_data_sharing_environment_end(
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
void **SavedSharedFrame, int32_t *SavedActiveThreads,
void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
int32_t IsEntryPoint) {
DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n");
@ -260,7 +260,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
// assume that threads will converge right after the call site that started
// the environment.
if (IsWarpMasterActiveThread()) {
int32_t &ActiveT = DataSharingState.ActiveThreads[WID];
__kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
DSPRINT0(DSFLAG, "Before restoring the stack\n");
// Zero the bits in the mask. If it is still different from zero, then we

View File

@ -19,6 +19,7 @@
#define _INTERFACES_H_
#include "option.h"
#include "target_impl.h"
////////////////////////////////////////////////////////////////////////////////
// OpenMP interface
@ -422,9 +423,9 @@ EXTERN void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
EXTERN void __kmpc_flush(kmp_Ident *loc);
// vote
EXTERN int32_t __kmpc_warp_active_thread_mask();
EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask();
// syncwarp
EXTERN void __kmpc_syncwarp(int32_t);
EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t);
// tasks
EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc,
@ -475,11 +476,13 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
int16_t IsOMPRuntimeInitialized);
EXTERN void __kmpc_kernel_end_parallel();
EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer,
__kmpc_impl_lanemask_t Mask,
bool *IsFinal,
int32_t *LaneSource);
EXTERN void __kmpc_kernel_end_convergent_parallel(void *buffer);
EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask,
EXTERN bool __kmpc_kernel_convergent_simd(void *buffer,
__kmpc_impl_lanemask_t Mask,
bool *IsFinal, int32_t *LaneSource,
int32_t *LaneId, int32_t *NumLanes);
EXTERN void __kmpc_kernel_end_convergent_simd(void *buffer);
@ -510,12 +513,13 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *RootS,
size_t InitialDataSize);
EXTERN void *__kmpc_data_sharing_environment_begin(
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
void **SavedSharedFrame, int32_t *SavedActiveThreads,
void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
size_t SharingDataSize, size_t SharingDefaultDataSize,
int16_t IsOMPRuntimeInitialized);
EXTERN void __kmpc_data_sharing_environment_end(
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
void **SavedSharedFrame, int32_t *SavedActiveThreads, int32_t IsEntryPoint);
void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
int32_t IsEntryPoint);
EXTERN void *
__kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID,

View File

@ -380,7 +380,8 @@ public:
////////////////////////////////////////////////////////////////////////////////
// Support for dispatch next
INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) {
INLINE static uint64_t Shuffle(__kmpc_impl_lanemask_t active, int64_t val,
int leader) {
uint32_t lo, hi;
__kmpc_impl_unpack(val, lo, hi);
hi = __kmpc_impl_shfl_sync(active, hi, leader);

View File

@ -107,7 +107,7 @@ struct DataSharingStateTy {
__kmpc_data_sharing_slot *SlotPtr[DS_Max_Warp_Number];
void *StackPtr[DS_Max_Warp_Number];
void * volatile FramePtr[DS_Max_Warp_Number];
int32_t ActiveThreads[DS_Max_Warp_Number];
__kmpc_impl_lanemask_t ActiveThreads[DS_Max_Warp_Number];
};
// Additional worker slot type which is initialized with the default worker slot
// size of 4*32 bytes.

View File

@ -44,13 +44,14 @@ typedef struct ConvergentSimdJob {
////////////////////////////////////////////////////////////////////////////////
// support for convergent simd (team of threads in a warp only)
////////////////////////////////////////////////////////////////////////////////
EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask,
EXTERN bool __kmpc_kernel_convergent_simd(void *buffer,
__kmpc_impl_lanemask_t Mask,
bool *IsFinal, int32_t *LaneSource,
int32_t *LaneId, int32_t *NumLanes) {
PRINT0(LD_IO, "call to __kmpc_kernel_convergent_simd\n");
uint32_t ConvergentMask = Mask;
__kmpc_impl_lanemask_t ConvergentMask = Mask;
int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask);
uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
__kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
*LaneSource += __kmpc_impl_ffs(WorkRemaining);
*IsFinal = __kmpc_impl_popc(WorkRemaining) == 1;
__kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();
@ -117,13 +118,14 @@ typedef struct ConvergentParallelJob {
////////////////////////////////////////////////////////////////////////////////
// support for convergent parallelism (team of threads in a warp only)
////////////////////////////////////////////////////////////////////////////////
EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer,
__kmpc_impl_lanemask_t Mask,
bool *IsFinal,
int32_t *LaneSource) {
PRINT0(LD_IO, "call to __kmpc_kernel_convergent_parallel\n");
uint32_t ConvergentMask = Mask;
__kmpc_impl_lanemask_t ConvergentMask = Mask;
int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask);
uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
__kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
*LaneSource += __kmpc_impl_ffs(WorkRemaining);
*IsFinal = __kmpc_impl_popc(WorkRemaining) == 1;
__kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();

View File

@ -140,7 +140,7 @@ EXTERN void __kmpc_flush(kmp_Ident *loc) {
// Vote
////////////////////////////////////////////////////////////////////////////////
EXTERN int32_t __kmpc_warp_active_thread_mask() {
EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() {
PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
return __kmpc_impl_activemask();
}
@ -149,7 +149,7 @@ EXTERN int32_t __kmpc_warp_active_thread_mask() {
// Syncwarp
////////////////////////////////////////////////////////////////////////////////
EXTERN void __kmpc_syncwarp(int32_t Mask) {
EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) {
PRINT0(LD_IO, "call __kmpc_syncwarp\n");
__kmpc_impl_syncwarp(Mask);
}