forked from OSchip/llvm-project
[libomptarget][nfc] Extract function from data_sharing, move to common
Summary: [libomptarget][nfc] Extract function from data_sharing, move to common Finding the first active thread in the warp is different on nvptx and amdgcn, mostly due to warp size and the desire for efficiency. Reviewers: ABataev, jdoerfert, grokos Reviewed By: jdoerfert Subscribers: jvesely, mgorny, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D71643
This commit is contained in:
parent
9d38fd8d0b
commit
8adae6027c
|
@ -57,12 +57,13 @@ get_filename_component(devicertl_base_directory
|
||||||
set(cuda_sources
|
set(cuda_sources
|
||||||
${devicertl_base_directory}/common/src/cancel.cu
|
${devicertl_base_directory}/common/src/cancel.cu
|
||||||
${devicertl_base_directory}/common/src/critical.cu
|
${devicertl_base_directory}/common/src/critical.cu
|
||||||
${devicertl_base_directory}/common/src/loop.cu
|
${devicertl_base_directory}/common/src/data_sharing.cu
|
||||||
${devicertl_base_directory}/common/src/libcall.cu
|
${devicertl_base_directory}/common/src/libcall.cu
|
||||||
${devicertl_base_directory}/common/src/reduction.cu
|
${devicertl_base_directory}/common/src/loop.cu
|
||||||
${devicertl_base_directory}/common/src/omp_data.cu
|
${devicertl_base_directory}/common/src/omp_data.cu
|
||||||
${devicertl_base_directory}/common/src/omptarget.cu
|
${devicertl_base_directory}/common/src/omptarget.cu
|
||||||
${devicertl_base_directory}/common/src/parallel.cu
|
${devicertl_base_directory}/common/src/parallel.cu
|
||||||
|
${devicertl_base_directory}/common/src/reduction.cu
|
||||||
${devicertl_base_directory}/common/src/sync.cu
|
${devicertl_base_directory}/common/src/sync.cu
|
||||||
${devicertl_base_directory}/common/src/task.cu)
|
${devicertl_base_directory}/common/src/task.cu)
|
||||||
|
|
||||||
|
|
|
@ -101,6 +101,8 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
|
||||||
return __lanemask_gt();
|
return __lanemask_gt();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXTERN bool __kmpc_impl_is_first_active_thread();
|
||||||
|
|
||||||
INLINE uint32_t __kmpc_impl_smid() {
|
INLINE uint32_t __kmpc_impl_smid() {
|
||||||
return __smid();
|
return __smid();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
//===----- data_sharing.cu - NVPTX OpenMP debug utilities -------- CUDA -*-===//
|
//===----- data_sharing.cu - OpenMP GPU data sharing ------------- CUDA -*-===//
|
||||||
//
|
//
|
||||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
// See https://llvm.org/LICENSE.txt for license information.
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
@ -6,21 +6,13 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
//
|
//
|
||||||
// This file contains the implementation of data sharing environments/
|
// This file contains the implementation of data sharing environments
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
#include "common/omptarget.h"
|
#include "common/omptarget.h"
|
||||||
#include "target_impl.h"
|
#include "target_impl.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
// Return true if this is the first active thread in the warp.
|
|
||||||
INLINE static bool IsWarpMasterActiveThread() {
|
|
||||||
unsigned long long Mask = __kmpc_impl_activemask();
|
|
||||||
unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
|
|
||||||
unsigned long long Sh = Mask << ShNum;
|
|
||||||
// Truncate Sh to the 32 lower bits
|
|
||||||
return (unsigned)Sh == 0;
|
|
||||||
}
|
|
||||||
// Return true if this is the master thread.
|
// Return true if this is the master thread.
|
||||||
INLINE static bool IsMasterThread(bool isSPMDExecutionMode) {
|
INLINE static bool IsMasterThread(bool isSPMDExecutionMode) {
|
||||||
return !isSPMDExecutionMode && GetMasterThreadID() == GetThreadIdInBlock();
|
return !isSPMDExecutionMode && GetMasterThreadID() == GetThreadIdInBlock();
|
||||||
|
@ -128,7 +120,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
|
||||||
DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);
|
DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);
|
||||||
|
|
||||||
// Only the warp active master needs to grow the stack.
|
// Only the warp active master needs to grow the stack.
|
||||||
if (IsWarpMasterActiveThread()) {
|
if (__kmpc_impl_is_first_active_thread()) {
|
||||||
// Save the current active threads.
|
// Save the current active threads.
|
||||||
ActiveT = CurActiveThreads;
|
ActiveT = CurActiveThreads;
|
||||||
|
|
||||||
|
@ -229,7 +221,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
|
||||||
unsigned WID = GetWarpId();
|
unsigned WID = GetWarpId();
|
||||||
|
|
||||||
if (IsEntryPoint) {
|
if (IsEntryPoint) {
|
||||||
if (IsWarpMasterActiveThread()) {
|
if (__kmpc_impl_is_first_active_thread()) {
|
||||||
DSPRINT0(DSFLAG, "Doing clean up\n");
|
DSPRINT0(DSFLAG, "Doing clean up\n");
|
||||||
|
|
||||||
// The master thread cleans the saved slot, because this is an environment
|
// The master thread cleans the saved slot, because this is an environment
|
||||||
|
@ -255,7 +247,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
|
||||||
// warp diverged and returns in different places). This only works if we
|
// warp diverged and returns in different places). This only works if we
|
||||||
// assume that threads will converge right after the call site that started
|
// assume that threads will converge right after the call site that started
|
||||||
// the environment.
|
// the environment.
|
||||||
if (IsWarpMasterActiveThread()) {
|
if (__kmpc_impl_is_first_active_thread()) {
|
||||||
__kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
|
__kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
|
||||||
|
|
||||||
DSPRINT0(DSFLAG, "Before restoring the stack\n");
|
DSPRINT0(DSFLAG, "Before restoring the stack\n");
|
|
@ -53,9 +53,8 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
|
||||||
set(cuda_src_files
|
set(cuda_src_files
|
||||||
${devicertl_common_directory}/src/cancel.cu
|
${devicertl_common_directory}/src/cancel.cu
|
||||||
${devicertl_common_directory}/src/critical.cu
|
${devicertl_common_directory}/src/critical.cu
|
||||||
src/data_sharing.cu
|
${devicertl_common_directory}/src/data_sharing.cu
|
||||||
${devicertl_common_directory}/src/libcall.cu
|
${devicertl_common_directory}/src/libcall.cu
|
||||||
src/target_impl.cu
|
|
||||||
${devicertl_common_directory}/src/loop.cu
|
${devicertl_common_directory}/src/loop.cu
|
||||||
${devicertl_common_directory}/src/omptarget.cu
|
${devicertl_common_directory}/src/omptarget.cu
|
||||||
${devicertl_common_directory}/src/parallel.cu
|
${devicertl_common_directory}/src/parallel.cu
|
||||||
|
@ -63,6 +62,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
|
||||||
${devicertl_common_directory}/src/support.cu
|
${devicertl_common_directory}/src/support.cu
|
||||||
${devicertl_common_directory}/src/sync.cu
|
${devicertl_common_directory}/src/sync.cu
|
||||||
${devicertl_common_directory}/src/task.cu
|
${devicertl_common_directory}/src/task.cu
|
||||||
|
src/target_impl.cu
|
||||||
)
|
)
|
||||||
|
|
||||||
set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
|
set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
|
||||||
|
|
|
@ -94,6 +94,15 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return true if this is the first active thread in the warp.
|
||||||
|
INLINE bool __kmpc_impl_is_first_active_thread() {
|
||||||
|
unsigned long long Mask = __kmpc_impl_activemask();
|
||||||
|
unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
|
||||||
|
unsigned long long Sh = Mask << ShNum;
|
||||||
|
// Truncate Sh to the 32 lower bits
|
||||||
|
return (unsigned)Sh == 0;
|
||||||
|
}
|
||||||
|
|
||||||
INLINE uint32_t __kmpc_impl_smid() {
|
INLINE uint32_t __kmpc_impl_smid() {
|
||||||
uint32_t id;
|
uint32_t id;
|
||||||
asm("mov.u32 %0, %%smid;" : "=r"(id));
|
asm("mov.u32 %0, %%smid;" : "=r"(id));
|
||||||
|
|
Loading…
Reference in New Issue