forked from OSchip/llvm-project
[OpenMP] Add flag for disabling thread state in runtime
The runtime uses thread state values to indicate when we use an ICV or are in nested parallelism. This is done for OpenMP correctness, but it not needed in the majority of cases. The new flag added is `-fopenmp-assume-no-thread-state`. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D120106
This commit is contained in:
parent
03ec026eac
commit
0870a4f59a
|
@ -246,6 +246,7 @@ LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading de
|
|||
LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
|
||||
LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
|
||||
LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
|
||||
LANGOPT(OpenMPNoThreadState , 1, 0, "Assume that no thread in a parallel region will modify an ICV.")
|
||||
LANGOPT(RenderScript , 1, 0, "RenderScript")
|
||||
|
||||
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
|
||||
|
|
|
@ -2473,6 +2473,10 @@ def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-te
|
|||
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
|
||||
def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">,
|
||||
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
|
||||
def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">, Group<f_Group>,
|
||||
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>,
|
||||
HelpText<"Assert no thread in a parallel region modifies an ICV">,
|
||||
MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
|
||||
defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
|
||||
LangOpts<"OpenMPTargetNewRuntime">, DefaultTrue,
|
||||
PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,
|
||||
|
|
|
@ -1210,6 +1210,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
|
|||
"__omp_rtl_assume_teams_oversubscription");
|
||||
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription,
|
||||
"__omp_rtl_assume_threads_oversubscription");
|
||||
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoThreadState,
|
||||
"__omp_rtl_assume_no_thread_state");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5995,6 +5995,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
options::OPT_fno_openmp_assume_threads_oversubscription,
|
||||
/*Default=*/false))
|
||||
CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
|
||||
if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
|
||||
CmdArgs.push_back("-fopenmp-assume-no-thread-state");
|
||||
break;
|
||||
default:
|
||||
// By default, if Clang doesn't know how to generate useful OpenMP code
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-threads-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-THREADS
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-teams-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-TEAMS
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-no-thread-state -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-STATE
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-teams-oversubscription -fopenmp-is-device -o - | FileCheck %s --check-prefix=CHECK-RUNTIME
|
||||
// expected-no-diagnostics
|
||||
|
||||
|
@ -16,26 +17,37 @@
|
|||
// CHECK: @__omp_rtl_debug_kind = weak_odr hidden constant i32 1
|
||||
// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
|
||||
//.
|
||||
// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr hidden constant i32 111
|
||||
// CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK-EQ: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
|
||||
//.
|
||||
// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
|
||||
// CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK-DEFAULT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
|
||||
//.
|
||||
// CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
|
||||
// CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1
|
||||
// CHECK-THREADS: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
|
||||
//.
|
||||
// CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
|
||||
// CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
|
||||
// CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK-TEAMS: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
|
||||
//.
|
||||
// CHECK-STATE: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
|
||||
// CHECK-STATE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK-STATE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK-STATE: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
|
||||
//.
|
||||
// CHECK-RUNTIME-NOT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
|
||||
// CHECK-RUNTIME-NOT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
|
||||
// CHECK-RUNTIME-NOT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
|
||||
// CHECK-RUNTIME-NOT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
|
||||
//.
|
||||
void foo() {
|
||||
#pragma omp target
|
||||
|
|
|
@ -38,8 +38,13 @@ uint32_t getDebugKind();
|
|||
/// Return the amount of dynamic shared memory that was allocated at launch.
|
||||
uint64_t getDynamicMemorySize();
|
||||
|
||||
/// Return if debugging is enabled for the given debug kind.
|
||||
bool isDebugMode(DebugKind Level);
|
||||
|
||||
/// Indicates if this kernel may require thread-specific states, or if it was
|
||||
/// explicitly disabled by the user.
|
||||
bool mayUseThreadStates();
|
||||
|
||||
} // namespace config
|
||||
} // namespace _OMP
|
||||
|
||||
|
|
|
@ -20,7 +20,9 @@ using namespace _OMP;
|
|||
|
||||
#pragma omp declare target
|
||||
|
||||
extern uint32_t __omp_rtl_debug_kind; // defined by CGOpenMPRuntimeGPU
|
||||
// defined by CGOpenMPRuntimeGPU
|
||||
extern uint32_t __omp_rtl_debug_kind;
|
||||
extern uint32_t __omp_rtl_assume_no_thread_state;
|
||||
|
||||
// TODO: We want to change the name as soon as the old runtime is gone.
|
||||
// This variable should be visibile to the plugin so we override the default
|
||||
|
@ -48,4 +50,6 @@ bool config::isDebugMode(config::DebugKind Kind) {
|
|||
return config::getDebugKind() & Kind;
|
||||
}
|
||||
|
||||
bool config::mayUseThreadStates() { return !__omp_rtl_assume_no_thread_state; }
|
||||
|
||||
#pragma omp end declare target
|
||||
|
|
|
@ -285,7 +285,8 @@ ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
|
|||
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
|
||||
|
||||
uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) {
|
||||
if (OMP_LIKELY(TeamState.ICVState.LevelVar == 0))
|
||||
if (OMP_LIKELY(!config::mayUseThreadStates() ||
|
||||
TeamState.ICVState.LevelVar == 0))
|
||||
return TeamState.ICVState.*Var;
|
||||
uint32_t TId = mapping::getThreadIdInBlock();
|
||||
if (!ThreadStates[TId]) {
|
||||
|
@ -299,13 +300,13 @@ uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) {
|
|||
|
||||
uint32_t &lookup32Impl(uint32_t ICVStateTy::*Var) {
|
||||
uint32_t TId = mapping::getThreadIdInBlock();
|
||||
if (OMP_UNLIKELY(ThreadStates[TId]))
|
||||
if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
|
||||
return ThreadStates[TId]->ICVState.*Var;
|
||||
return TeamState.ICVState.*Var;
|
||||
}
|
||||
uint64_t &lookup64Impl(uint64_t ICVStateTy::*Var) {
|
||||
uint64_t TId = mapping::getThreadIdInBlock();
|
||||
if (OMP_UNLIKELY(ThreadStates[TId]))
|
||||
if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
|
||||
return ThreadStates[TId]->ICVState.*Var;
|
||||
return TeamState.ICVState.*Var;
|
||||
}
|
||||
|
@ -380,6 +381,9 @@ void state::init(bool IsSPMD) {
|
|||
}
|
||||
|
||||
void state::enterDataEnvironment(IdentTy *Ident) {
|
||||
ASSERT(config::mayUseThreadStates() &&
|
||||
"Thread state modified while explicitly disabled!");
|
||||
|
||||
unsigned TId = mapping::getThreadIdInBlock();
|
||||
ThreadStateTy *NewThreadState =
|
||||
static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
|
||||
|
@ -388,6 +392,9 @@ void state::enterDataEnvironment(IdentTy *Ident) {
|
|||
}
|
||||
|
||||
void state::exitDataEnvironment() {
|
||||
ASSERT(config::mayUseThreadStates() &&
|
||||
"Thread state modified while explicitly disabled!");
|
||||
|
||||
unsigned TId = mapping::getThreadIdInBlock();
|
||||
resetStateForThread(TId);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue