forked from OSchip/llvm-project
Create a runtime option to disable task throttling.
Patch by viroulep (Philippe Virouleau) Differential Revision: https://reviews.llvm.org/D63196 llvm-svn: 364934
This commit is contained in:
parent
50be3481d4
commit
a23806e67a
|
@ -2121,6 +2121,7 @@ typedef enum kmp_tasking_mode {
|
|||
extern kmp_tasking_mode_t
|
||||
__kmp_tasking_mode; /* determines how/when to execute tasks */
|
||||
extern int __kmp_task_stealing_constraint;
|
||||
extern int __kmp_enable_task_throttling;
|
||||
#if OMP_40_ENABLED
|
||||
extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
|
||||
// specified, defaults to 0 otherwise
|
||||
|
|
|
@ -341,6 +341,7 @@ omp_memspace_handle_t const omp_low_lat_mem_space =
|
|||
KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4);
|
||||
|
||||
int __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */
|
||||
int __kmp_enable_task_throttling = 1;
|
||||
|
||||
#ifdef DEBUG_SUSPEND
|
||||
int __kmp_suspend_count = 0;
|
||||
|
|
|
@ -4682,6 +4682,20 @@ static void __kmp_stg_print_forkjoin_frames_mode(kmp_str_buf_t *buffer,
|
|||
} // __kmp_stg_print_forkjoin_frames
|
||||
#endif /* USE_ITT_BUILD */
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// KMP_ENABLE_TASK_THROTTLING
|
||||
|
||||
static void __kmp_stg_parse_task_throttling(char const *name,
|
||||
char const *value, void *data) {
|
||||
__kmp_stg_parse_bool(name, value, &__kmp_enable_task_throttling);
|
||||
} // __kmp_stg_parse_task_throttling
|
||||
|
||||
|
||||
static void __kmp_stg_print_task_throttling(kmp_str_buf_t *buffer,
|
||||
char const *name, void *data) {
|
||||
__kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling);
|
||||
} // __kmp_stg_print_task_throttling
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// OMP_DISPLAY_ENV
|
||||
|
||||
|
@ -5003,6 +5017,8 @@ static kmp_setting_t __kmp_stg_table[] = {
|
|||
{"KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode,
|
||||
__kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0},
|
||||
#endif
|
||||
{"KMP_ENABLE_TASK_THROTTLING", __kmp_stg_parse_task_throttling,
|
||||
__kmp_stg_print_task_throttling, NULL, 0, 0},
|
||||
|
||||
#if OMP_40_ENABLED
|
||||
{"OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env,
|
||||
|
|
|
@ -374,7 +374,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
|
|||
// Check if deque is full
|
||||
if (TCR_4(thread_data->td.td_deque_ntasks) >=
|
||||
TASK_DEQUE_SIZE(thread_data->td)) {
|
||||
if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
|
||||
if (__kmp_enable_task_throttling &&
|
||||
__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
|
||||
thread->th.th_current_task)) {
|
||||
KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "
|
||||
"TASK_NOT_PUSHED for task %p\n",
|
||||
|
@ -394,7 +395,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
|
|||
// Need to recheck as we can get a proxy task from thread outside of OpenMP
|
||||
if (TCR_4(thread_data->td.td_deque_ntasks) >=
|
||||
TASK_DEQUE_SIZE(thread_data->td)) {
|
||||
if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
|
||||
if (__kmp_enable_task_throttling &&
|
||||
__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
|
||||
thread->th.th_current_task)) {
|
||||
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
|
||||
KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; "
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
// RUN: %libomp-compile && env KMP_ENABLE_TASK_THROTTLING=0 %libomp-run
|
||||
// RUN: %libomp-compile && env KMP_ENABLE_TASK_THROTTLING=1 %libomp-run
|
||||
|
||||
#include<omp.h>
|
||||
#include<stdlib.h>
|
||||
#include<string.h>
|
||||
|
||||
/**
|
||||
* Test the task throttling behavior of the runtime.
|
||||
* Unless OMP_NUM_THREADS is 1, the master thread pushes tasks to its own tasks
|
||||
* queue until either of the following happens:
|
||||
* - the task queue is full, and it starts serializing tasks
|
||||
* - all tasks have been pushed, and it can begin execution
|
||||
* The idea is to create a huge number of tasks which execution are blocked
|
||||
* until the master thread comes to execute tasks (they need to be blocking,
|
||||
* otherwise the second thread will start emptying the queue).
|
||||
* At this point we can check the number of enqueued tasks: iff all tasks have
|
||||
* been enqueued, then there was no task throttling.
|
||||
* Otherwise there has been some sort of task throttling.
|
||||
* If what we detect doesn't match the value of the environment variable, the
|
||||
* test is failed.
|
||||
*/
|
||||
|
||||
|
||||
#define NUM_TASKS 2000
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
int i;
|
||||
int block = 1;
|
||||
int tid;
|
||||
int throttling = strcmp(getenv("KMP_ENABLE_TASK_THROTTLING"), "1") == 0;
|
||||
int enqueued = 0;
|
||||
int failed = -1;
|
||||
|
||||
#pragma omp parallel num_threads(2)
|
||||
#pragma omp master
|
||||
{
|
||||
for (i = 0; i < NUM_TASKS; i++) {
|
||||
enqueued++;
|
||||
#pragma omp task
|
||||
{
|
||||
tid = omp_get_thread_num();
|
||||
if (tid == 0) {
|
||||
// As soon as the master thread starts executing task we should unlock
|
||||
// all tasks, and detect the test failure if it has not been done yet.
|
||||
if (failed < 0)
|
||||
failed = throttling ? enqueued == NUM_TASKS : enqueued < NUM_TASKS;
|
||||
block = 0;
|
||||
}
|
||||
while (block)
|
||||
;
|
||||
}
|
||||
}
|
||||
block = 0;
|
||||
}
|
||||
|
||||
return failed;
|
||||
}
|
Loading…
Reference in New Issue