[OpenMP][OMPT]Add event callbacks for taskwait with depend

This adds the missing event callbacks to express dependencies on included tasks
and taskwait with depend clause.

The test fails for GCC, see bug report:
https://bugs.llvm.org/show_bug.cgi?id=46573

Reviewed by: hbae

Differential Revision: https://reviews.llvm.org/D81891
This commit is contained in:
Joachim Protze 2020-06-16 00:31:14 +02:00
parent 01c4574a12
commit 47cb8a0f0b
3 changed files with 184 additions and 6 deletions

View File

@ -205,7 +205,7 @@ static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
return new_head;
}
static inline void __kmp_track_dependence(kmp_depnode_t *source,
static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
kmp_depnode_t *sink,
kmp_task_t *sink_task) {
#ifdef KMP_SUPPORT_GRAPH_OUTPUT
@ -224,11 +224,14 @@ static inline void __kmp_track_dependence(kmp_depnode_t *source,
*/
if (ompt_enabled.ompt_callback_task_dependence) {
kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
ompt_data_t *sink_data;
if (sink_task)
sink_data = &(KMP_TASK_TO_TASKDATA(sink_task)->ompt_task_info.task_data);
else
sink_data = &__kmp_threads[gtid]->th.ompt_thread_info.task_data;
ompt_callbacks.ompt_callback(ompt_callback_task_dependence)(
&(task_source->ompt_task_info.task_data),
&(task_sink->ompt_task_info.task_data));
&(task_source->ompt_task_info.task_data), sink_data);
}
#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
}
@ -246,7 +249,7 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
if (dep->dn.task) {
KMP_ACQUIRE_DEPNODE(gtid, dep);
if (dep->dn.task) {
__kmp_track_dependence(dep, node, task);
__kmp_track_dependence(gtid, dep, node, task);
dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
@ -272,7 +275,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
// synchronously add source to sink' list of successors
KMP_ACQUIRE_DEPNODE(gtid, sink);
if (sink->dn.task) {
__kmp_track_dependence(sink, source, task);
__kmp_track_dependence(gtid, sink, source, task);
sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
@ -635,6 +638,23 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
return ret;
}
void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task,
ompt_data_t *taskwait_task_data) {
#if OMPT_SUPPORT
if (ompt_enabled.ompt_callback_task_schedule) {
ompt_data_t task_data = ompt_data_none;
ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
current_task ? &(current_task->ompt_task_info.task_data) : &task_data,
ompt_task_switch, taskwait_task_data);
ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
taskwait_task_data, ompt_task_complete,
current_task ? &(current_task->ompt_task_info.task_data) : &task_data);
}
current_task->ompt_task_info.frame.enter_frame.ptr = NULL;
*taskwait_task_data = ompt_data_none;
#endif /* OMPT_SUPPORT */
}
/*!
@ingroup TASKING
@param loc_ref location of the original task directive
@ -661,6 +681,74 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *current_task = thread->th.th_current_task;
#if OMPT_SUPPORT
// this function represents a taskwait construct with depend clause
// We signal 4 events:
// - creation of the taskwait task
// - dependences of the taskwait task
// - schedule and finish of the taskwait task
ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data;
KMP_ASSERT(taskwait_task_data->ptr == NULL);
if (ompt_enabled.enabled) {
if (!current_task->ompt_task_info.frame.enter_frame.ptr)
current_task->ompt_task_info.frame.enter_frame.ptr =
OMPT_GET_FRAME_ADDRESS(0);
if (ompt_enabled.ompt_callback_task_create) {
ompt_data_t task_data = ompt_data_none;
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
current_task ? &(current_task->ompt_task_info.task_data) : &task_data,
current_task ? &(current_task->ompt_task_info.frame) : NULL,
taskwait_task_data,
ompt_task_explicit | ompt_task_undeferred | ompt_task_mergeable, 1,
OMPT_GET_RETURN_ADDRESS(0));
}
}
#if OMPT_OPTIONAL
/* OMPT grab all dependences if requested by the tool */
if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) {
kmp_int32 i;
int ompt_ndeps = ndeps + ndeps_noalias;
ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
KMP_ASSERT(ompt_deps != NULL);
for (i = 0; i < ndeps; i++) {
ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
if (dep_list[i].flags.in && dep_list[i].flags.out)
ompt_deps[i].dependence_type = ompt_dependence_type_inout;
else if (dep_list[i].flags.out)
ompt_deps[i].dependence_type = ompt_dependence_type_out;
else if (dep_list[i].flags.in)
ompt_deps[i].dependence_type = ompt_dependence_type_in;
else if (dep_list[i].flags.mtx)
ompt_deps[ndeps + i].dependence_type =
ompt_dependence_type_mutexinoutset;
}
for (i = 0; i < ndeps_noalias; i++) {
ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
else if (noalias_dep_list[i].flags.out)
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
else if (noalias_dep_list[i].flags.in)
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
else if (noalias_dep_list[i].flags.mtx)
ompt_deps[ndeps + i].dependence_type =
ompt_dependence_type_mutexinoutset;
}
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
taskwait_task_data, ompt_deps, ompt_ndeps);
/* We can now free the allocated memory for the dependencies */
/* For OMPD we might want to delay the free until end of this function */
KMP_OMPT_DEPS_FREE(thread, ompt_deps);
ompt_deps = NULL;
}
#endif /* OMPT_OPTIONAL */
#endif /* OMPT_SUPPORT */
// We can return immediately as:
// - dependences are not computed in serial teams (except with proxy tasks)
// - if the dephash is not yet created it means we have nothing to wait for
@ -675,6 +763,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
"dependencies : loc=%p\n",
gtid, loc_ref));
__ompt_taskwait_dep_finish(current_task, taskwait_task_data);
return;
}
@ -687,6 +776,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
"dependencies : loc=%p\n",
gtid, loc_ref));
__ompt_taskwait_dep_finish(current_task, taskwait_task_data);
return;
}
@ -698,6 +788,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
__kmp_task_stealing_constraint);
}
__ompt_taskwait_dep_finish(current_task, taskwait_task_data);
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n",
gtid, loc_ref));
}

View File

@ -54,6 +54,13 @@ int main() {
// CHECK-SAME: parallel_id=[[PARALLEL_ID]],
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// The following is to match the taskwait task created in __kmpc_omp_wait_deps
// this should go away, once codegen for "detached if(0)" is fixed
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
// CHECK-SAME: has_dependences=yes
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
// CHECK-SAME: parent_task_frame.exit=0x{{[0-f]+}},

View File

@ -0,0 +1,80 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
// The GOMP wrapper does not handle `task if(0) depend()` and drops the
// dependency. Once this is fixed, reevaluate the GCC status:
// XFAIL: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9, gcc-10
#include "callback.h"
#include <omp.h>
int main() {
int x = 0;
#pragma omp parallel num_threads(2)
{
#pragma omp master
{
print_ids(0);
printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value,
&x);
#pragma omp task depend(out : x)
{ x++; }
print_fuzzy_address(1);
//#pragma omp taskwait depend(in: x) <-- currently not supported in clang
#pragma omp task if (0) depend(in : x)
{}
print_fuzzy_address(2);
}
}
return 0;
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_dependences'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_depende
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]],
// CHECK-SAME: reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: address of x: [[ADDRX:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]],
// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
// CHECK-SAME: new_task_id=[[FIRST_TASK:[0-f]+]],
// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences:
// CHECK-SAME: task_id=[[FIRST_TASK]], deps=[([[ADDRX]],
// CHECK-SAME: ompt_dependence_type_inout)], ndeps=1
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]],
// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
// CHECK-SAME: new_task_id=[[SECOND_TASK:[0-f]+]],
// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
// CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred|
// CHECK-SAME: ompt_task_mergeable=1207959556, has_dependences=yes
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences:
// CHECK-SAME: task_id=[[SECOND_TASK]], deps=[([[ADDRX]],
// CHECK-SAME: ompt_dependence_type_in)], ndeps=1
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[SECOND_TASK]]
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]