forked from OSchip/llvm-project
[OpenMP][OMPT] Fix and add event callbacks for detached tasks
The OpenMP spec has the task-fulfill event for a call to omp_fulfill_event. If the task did not yet finish execution, ompt_task_early_fulfill is used, otherwise ompt_task_late_fulfill. If a task does not complete, when the execution finishes (i.e., the task goes in detached mode), ompt_task_detach instead of ompt_task_complete must be used, when the next task is scheduled. A test for both cases is included, which only work with clang-11+ Reviewed By: hbae Differential revision: https://reviews.llvm.org/D80843
This commit is contained in:
parent
e0bca46b08
commit
10995c77b4
|
@ -577,24 +577,20 @@ static inline void __ompt_task_start(kmp_task_t *task,
|
|||
|
||||
// __ompt_task_finish:
|
||||
// Build and trigger final task-schedule event
|
||||
static inline void
|
||||
__ompt_task_finish(kmp_task_t *task, kmp_taskdata_t *resumed_task,
|
||||
ompt_task_status_t status = ompt_task_complete) {
|
||||
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
|
||||
if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
|
||||
taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
|
||||
status = ompt_task_cancel;
|
||||
}
|
||||
|
||||
/* let OMPT know that we're returning to the callee task */
|
||||
static inline void __ompt_task_finish(kmp_task_t *task,
|
||||
kmp_taskdata_t *resumed_task,
|
||||
ompt_task_status_t status) {
|
||||
if (ompt_enabled.ompt_callback_task_schedule) {
|
||||
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
|
||||
if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
|
||||
taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
|
||||
status = ompt_task_cancel;
|
||||
}
|
||||
|
||||
/* let OMPT know that we're returning to the callee task */
|
||||
ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
|
||||
&(taskdata->ompt_task_info.task_data), status,
|
||||
&((resumed_task ? resumed_task
|
||||
: (taskdata->ompt_task_info.scheduling_parent
|
||||
? taskdata->ompt_task_info.scheduling_parent
|
||||
: taskdata->td_parent))
|
||||
->ompt_task_info.task_data));
|
||||
(resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -803,6 +799,10 @@ static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
|
|||
// gtid: global thread ID for calling thread
|
||||
// task: task to be finished
|
||||
// resumed_task: task to be resumed. (may be NULL if task is serialized)
|
||||
//
|
||||
// template<ompt>: effectively ompt_enabled.enabled!=0
|
||||
// the version with ompt=false is inlined, allowing to optimize away all ompt
|
||||
// code in this case
|
||||
template <bool ompt>
|
||||
static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
|
||||
kmp_taskdata_t *resumed_task) {
|
||||
|
@ -849,10 +849,6 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
|
|||
return;
|
||||
}
|
||||
}
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt)
|
||||
__ompt_task_finish(task, resumed_task);
|
||||
#endif
|
||||
|
||||
// Check mutexinoutset dependencies, release locks
|
||||
kmp_depnode_t *node = taskdata->td_depnode;
|
||||
|
@ -907,8 +903,18 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
|
|||
// task finished execution
|
||||
KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
|
||||
taskdata->td_flags.executing = 0; // suspend the finishing task
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
// For a detached task, which is not completed, we switch back
|
||||
// the omp_fulfill_event signals completion
|
||||
// locking is necessary to avoid a race with ompt_task_late_fulfill
|
||||
if (ompt)
|
||||
__ompt_task_finish(task, resumed_task, ompt_task_detach);
|
||||
#endif
|
||||
|
||||
// no access to taskdata after this point!
|
||||
// __kmp_fulfill_event might free taskdata at any time from now
|
||||
|
||||
taskdata->td_flags.proxy = TASK_PROXY; // proxify!
|
||||
detach = true;
|
||||
}
|
||||
|
@ -919,6 +925,12 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
|
|||
if (!detach) {
|
||||
taskdata->td_flags.complete = 1; // mark the task as completed
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
// This is not a detached task, we are done here
|
||||
if (ompt)
|
||||
__ompt_task_finish(task, resumed_task, ompt_task_complete);
|
||||
#endif
|
||||
|
||||
// Only need to keep track of count if team parallel and tasking not
|
||||
// serialized, or task is detachable and event has already been fulfilled
|
||||
if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
|
||||
|
@ -3867,12 +3879,26 @@ void __kmp_fulfill_event(kmp_event_t *event) {
|
|||
// point.
|
||||
// We need to take the lock to avoid races
|
||||
__kmp_acquire_tas_lock(&event->lock, gtid);
|
||||
if (taskdata->td_flags.proxy == TASK_PROXY)
|
||||
if (taskdata->td_flags.proxy == TASK_PROXY) {
|
||||
detached = true;
|
||||
} else {
|
||||
#if OMPT_SUPPORT
|
||||
// The OMPT event must occur under mutual exclusion,
|
||||
// otherwise the tool might access ptask after free
|
||||
if (UNLIKELY(ompt_enabled.enabled))
|
||||
__ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
|
||||
#endif
|
||||
}
|
||||
event->type = KMP_EVENT_UNINITIALIZED;
|
||||
__kmp_release_tas_lock(&event->lock, gtid);
|
||||
|
||||
if (detached) {
|
||||
#if OMPT_SUPPORT
|
||||
// We free ptask afterwards and know the task is finished,
|
||||
// so locking is not necessary
|
||||
if (UNLIKELY(ompt_enabled.enabled))
|
||||
__ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
|
||||
#endif
|
||||
// If the task detached complete the proxy task
|
||||
if (gtid >= 0) {
|
||||
kmp_team_t *team = taskdata->td_team;
|
||||
|
|
|
@ -734,9 +734,13 @@ on_ompt_callback_task_schedule(
|
|||
ompt_task_status_t prior_task_status,
|
||||
ompt_data_t *second_task_data)
|
||||
{
|
||||
printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status);
|
||||
if(prior_task_status == ompt_task_complete)
|
||||
{
|
||||
printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64
|
||||
", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
|
||||
ompt_get_thread_data()->value, first_task_data->value,
|
||||
(second_task_data ? second_task_data->value : -1),
|
||||
ompt_task_status_t_values[prior_task_status], prior_task_status);
|
||||
if (prior_task_status == ompt_task_complete ||
|
||||
prior_task_status == ompt_task_late_fulfill) {
|
||||
printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
// RUN: %libomp-compile -fopenmp-version=50 && env OMP_NUM_THREADS='3' \
|
||||
// RUN: %libomp-run | %sort-threads | FileCheck %s
|
||||
|
||||
// Checked gcc 9.2 still does not support detach clause on task construct.
|
||||
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9
|
||||
// clang supports detach clause since version 11.
|
||||
// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7
|
||||
// icc compiler does not support detach clause.
|
||||
// UNSUPPORTED: icc
|
||||
|
||||
#include "callback.h"
|
||||
#include <omp.h>
|
||||
|
||||
int main() {
|
||||
#pragma omp parallel
|
||||
#pragma omp master
|
||||
{
|
||||
omp_event_handle_t event;
|
||||
#pragma omp task detach(event) if (0)
|
||||
{ omp_fulfill_event(event); }
|
||||
#pragma omp taskwait
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check if libomp supports the callbacks for this test.
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
|
||||
|
||||
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
|
||||
// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]],
|
||||
// CHECK-SAME: parent_task_frame.exit=[[NULL]],
|
||||
// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
|
||||
// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
|
||||
// CHECK-SAME: requested_team_size=3,
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
|
||||
// CHECK-SAME: parallel_id=[[PARALLEL_ID]],
|
||||
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
|
||||
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
|
||||
// CHECK-SAME: parent_task_frame.exit=0x{{[0-f]+}},
|
||||
// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
|
||||
// CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]],
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
|
||||
// CHECK-SAME: first_task_id=[[IMPLICIT_TASK_ID]],
|
||||
// CHECK-SAME: second_task_id=[[TASK_ID]],
|
||||
// CHECK-SAME: prior_task_status=ompt_task_switch=7
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
|
||||
// CHECK-SAME: first_task_id=[[TASK_ID]],
|
||||
// CHECK-SAME: second_task_id=18446744073709551615,
|
||||
// CHECK-SAME: prior_task_status=ompt_task_early_fulfill=5
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
|
||||
// CHECK-SAME: first_task_id=[[TASK_ID]],
|
||||
// CHECK-SAME: second_task_id=[[IMPLICIT_TASK_ID]],
|
||||
// CHECK-SAME: prior_task_status=ompt_task_complete=1
|
|
@ -0,0 +1,76 @@
|
|||
// RUN: %libomp-compile -fopenmp-version=50 && env OMP_NUM_THREADS='3' \
|
||||
// RUN: %libomp-run | %sort-threads | FileCheck %s
|
||||
|
||||
// Checked gcc 9.2 still does not support detach clause on task construct.
|
||||
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9
|
||||
// clang supports detach clause since version 11.
|
||||
// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7
|
||||
// icc compiler does not support detach clause.
|
||||
// UNSUPPORTED: icc
|
||||
|
||||
#include "callback.h"
|
||||
#include <omp.h>
|
||||
|
||||
int main() {
|
||||
#pragma omp parallel
|
||||
#pragma omp master
|
||||
{
|
||||
omp_event_handle_t event;
|
||||
omp_event_handle_t *f_event;
|
||||
#pragma omp task detach(event) depend(out : f_event) shared(f_event) if (0)
|
||||
{
|
||||
printf("task 1\n");
|
||||
f_event = &event;
|
||||
}
|
||||
#pragma omp task depend(in : f_event)
|
||||
{ printf("task 2\n"); }
|
||||
printf("calling omp_fulfill_event\n");
|
||||
omp_fulfill_event(*f_event);
|
||||
#pragma omp taskwait
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check if libomp supports the callbacks for this test.
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
|
||||
|
||||
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
|
||||
// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]],
|
||||
// CHECK-SAME: parent_task_frame.exit=[[NULL]],
|
||||
// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
|
||||
// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
|
||||
// CHECK-SAME: requested_team_size=3,
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
|
||||
// CHECK-SAME: parallel_id=[[PARALLEL_ID]],
|
||||
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
|
||||
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
|
||||
// CHECK-SAME: parent_task_frame.exit=0x{{[0-f]+}},
|
||||
// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
|
||||
// CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]],
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
|
||||
// CHECK-SAME: first_task_id=[[IMPLICIT_TASK_ID]],
|
||||
// CHECK-SAME: second_task_id=[[TASK_ID]],
|
||||
// CHECK-SAME: prior_task_status=ompt_task_switch=7
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
|
||||
// CHECK-SAME: first_task_id=[[TASK_ID]],
|
||||
// CHECK-SAME: second_task_id=[[IMPLICIT_TASK_ID]],
|
||||
// CHECK-SAME: prior_task_status=ompt_task_detach=4
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
|
||||
// CHECK-SAME: first_task_id=[[TASK_ID]],
|
||||
// CHECK-SAME: second_task_id=18446744073709551615,
|
||||
// CHECK-SAME: prior_task_status=ompt_task_late_fulfill=6
|
Loading…
Reference in New Issue