[OpenMP][OMPT] Fix and add event callbacks for detached tasks

The OpenMP spec has the task-fulfill event for a call to omp_fulfill_event.
If the task did not yet finish execution, ompt_task_early_fulfill is used,
otherwise ompt_task_late_fulfill.
If a task does not complete, when the execution finishes (i.e., the task goes
in detached mode), ompt_task_detach instead of ompt_task_complete must be
used, when the next task is scheduled.

A test for both cases is included, which only work with clang-11+

Reviewed By: hbae

Differential revision: https://reviews.llvm.org/D80843
This commit is contained in:
Joachim Protze 2020-06-01 14:49:45 +02:00
parent e0bca46b08
commit 10995c77b4
4 changed files with 197 additions and 23 deletions

View File

@ -577,24 +577,20 @@ static inline void __ompt_task_start(kmp_task_t *task,
// __ompt_task_finish:
// Build and trigger final task-schedule event
static inline void
__ompt_task_finish(kmp_task_t *task, kmp_taskdata_t *resumed_task,
ompt_task_status_t status = ompt_task_complete) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
status = ompt_task_cancel;
}
/* let OMPT know that we're returning to the callee task */
static inline void __ompt_task_finish(kmp_task_t *task,
kmp_taskdata_t *resumed_task,
ompt_task_status_t status) {
if (ompt_enabled.ompt_callback_task_schedule) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
status = ompt_task_cancel;
}
/* let OMPT know that we're returning to the callee task */
ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
&(taskdata->ompt_task_info.task_data), status,
&((resumed_task ? resumed_task
: (taskdata->ompt_task_info.scheduling_parent
? taskdata->ompt_task_info.scheduling_parent
: taskdata->td_parent))
->ompt_task_info.task_data));
(resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
}
}
#endif
@ -803,6 +799,10 @@ static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
// gtid: global thread ID for calling thread
// task: task to be finished
// resumed_task: task to be resumed. (may be NULL if task is serialized)
//
// template<ompt>: effectively ompt_enabled.enabled!=0
// the version with ompt=false is inlined, allowing to optimize away all ompt
// code in this case
template <bool ompt>
static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
kmp_taskdata_t *resumed_task) {
@ -849,10 +849,6 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
return;
}
}
#if OMPT_SUPPORT
if (ompt)
__ompt_task_finish(task, resumed_task);
#endif
// Check mutexinoutset dependencies, release locks
kmp_depnode_t *node = taskdata->td_depnode;
@ -907,8 +903,18 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
// task finished execution
KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
taskdata->td_flags.executing = 0; // suspend the finishing task
#if OMPT_SUPPORT
// For a detached task, which is not completed, we switch back
// the omp_fulfill_event signals completion
// locking is necessary to avoid a race with ompt_task_late_fulfill
if (ompt)
__ompt_task_finish(task, resumed_task, ompt_task_detach);
#endif
// no access to taskdata after this point!
// __kmp_fulfill_event might free taskdata at any time from now
taskdata->td_flags.proxy = TASK_PROXY; // proxify!
detach = true;
}
@ -919,6 +925,12 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
if (!detach) {
taskdata->td_flags.complete = 1; // mark the task as completed
#if OMPT_SUPPORT
// This is not a detached task, we are done here
if (ompt)
__ompt_task_finish(task, resumed_task, ompt_task_complete);
#endif
// Only need to keep track of count if team parallel and tasking not
// serialized, or task is detachable and event has already been fulfilled
if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
@ -3867,12 +3879,26 @@ void __kmp_fulfill_event(kmp_event_t *event) {
// point.
// We need to take the lock to avoid races
__kmp_acquire_tas_lock(&event->lock, gtid);
if (taskdata->td_flags.proxy == TASK_PROXY)
if (taskdata->td_flags.proxy == TASK_PROXY) {
detached = true;
} else {
#if OMPT_SUPPORT
// The OMPT event must occur under mutual exclusion,
// otherwise the tool might access ptask after free
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
#endif
}
event->type = KMP_EVENT_UNINITIALIZED;
__kmp_release_tas_lock(&event->lock, gtid);
if (detached) {
#if OMPT_SUPPORT
// We free ptask afterwards and know the task is finished,
// so locking is not necessary
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
#endif
// If the task detached complete the proxy task
if (gtid >= 0) {
kmp_team_t *team = taskdata->td_team;

View File

@ -734,9 +734,13 @@ on_ompt_callback_task_schedule(
ompt_task_status_t prior_task_status,
ompt_data_t *second_task_data)
{
printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status);
if(prior_task_status == ompt_task_complete)
{
printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64
", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
ompt_get_thread_data()->value, first_task_data->value,
(second_task_data ? second_task_data->value : -1),
ompt_task_status_t_values[prior_task_status], prior_task_status);
if (prior_task_status == ompt_task_complete ||
prior_task_status == ompt_task_late_fulfill) {
printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value);
}
}

View File

@ -0,0 +1,68 @@
// RUN: %libomp-compile -fopenmp-version=50 && env OMP_NUM_THREADS='3' \
// RUN: %libomp-run | %sort-threads | FileCheck %s
// Checked gcc 9.2 still does not support detach clause on task construct.
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9
// clang supports detach clause since version 11.
// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7
// icc compiler does not support detach clause.
// UNSUPPORTED: icc
#include "callback.h"
#include <omp.h>
int main() {
#pragma omp parallel
#pragma omp master
{
omp_event_handle_t event;
#pragma omp task detach(event) if (0)
{ omp_fulfill_event(event); }
#pragma omp taskwait
}
return 0;
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]],
// CHECK-SAME: parent_task_frame.exit=[[NULL]],
// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
// CHECK-SAME: requested_team_size=3,
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID]],
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
// CHECK-SAME: parent_task_frame.exit=0x{{[0-f]+}},
// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
// CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]],
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
// CHECK-SAME: first_task_id=[[IMPLICIT_TASK_ID]],
// CHECK-SAME: second_task_id=[[TASK_ID]],
// CHECK-SAME: prior_task_status=ompt_task_switch=7
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
// CHECK-SAME: first_task_id=[[TASK_ID]],
// CHECK-SAME: second_task_id=18446744073709551615,
// CHECK-SAME: prior_task_status=ompt_task_early_fulfill=5
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
// CHECK-SAME: first_task_id=[[TASK_ID]],
// CHECK-SAME: second_task_id=[[IMPLICIT_TASK_ID]],
// CHECK-SAME: prior_task_status=ompt_task_complete=1

View File

@ -0,0 +1,76 @@
// RUN: %libomp-compile -fopenmp-version=50 && env OMP_NUM_THREADS='3' \
// RUN: %libomp-run | %sort-threads | FileCheck %s
// Checked gcc 9.2 still does not support detach clause on task construct.
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9
// clang supports detach clause since version 11.
// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7
// icc compiler does not support detach clause.
// UNSUPPORTED: icc
#include "callback.h"
#include <omp.h>
int main() {
#pragma omp parallel
#pragma omp master
{
omp_event_handle_t event;
omp_event_handle_t *f_event;
#pragma omp task detach(event) depend(out : f_event) shared(f_event) if (0)
{
printf("task 1\n");
f_event = &event;
}
#pragma omp task depend(in : f_event)
{ printf("task 2\n"); }
printf("calling omp_fulfill_event\n");
omp_fulfill_event(*f_event);
#pragma omp taskwait
}
return 0;
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]],
// CHECK-SAME: parent_task_frame.exit=[[NULL]],
// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
// CHECK-SAME: requested_team_size=3,
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID]],
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
// CHECK-SAME: parent_task_frame.exit=0x{{[0-f]+}},
// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
// CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]],
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
// CHECK-SAME: first_task_id=[[IMPLICIT_TASK_ID]],
// CHECK-SAME: second_task_id=[[TASK_ID]],
// CHECK-SAME: prior_task_status=ompt_task_switch=7
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
// CHECK-SAME: first_task_id=[[TASK_ID]],
// CHECK-SAME: second_task_id=[[IMPLICIT_TASK_ID]],
// CHECK-SAME: prior_task_status=ompt_task_detach=4
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
// CHECK-SAME: first_task_id=[[TASK_ID]],
// CHECK-SAME: second_task_id=18446744073709551615,
// CHECK-SAME: prior_task_status=ompt_task_late_fulfill=6