From cbea36903e88b04221b2c4ef95712c907f474b6b Mon Sep 17 00:00:00 2001 From: Joachim Protze Date: Mon, 15 Jun 2020 18:39:47 +0200 Subject: [PATCH] [OpenMP][OMPT] Add callbacks for doacross loops Adds the callbacks for ordered with source/sink dependencies. The test for task dependencies changed, because callbach.h now actually prints the passed dependencies and the test also checks for the address. Reviewed by: hbae Differential Revision: https://reviews.llvm.org/D81807 --- openmp/runtime/src/kmp_csupport.cpp | 34 +++++++ openmp/runtime/test/ompt/callback.h | 33 ++++++- .../synchronization/ordered_dependences.c | 63 ++++++++++++ openmp/runtime/test/ompt/tasks/dependences.c | 99 ++++++++++++------- 4 files changed, 189 insertions(+), 40 deletions(-) create mode 100644 openmp/runtime/test/ompt/synchronization/ordered_dependences.c diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index 579c4d566d78..9cfa64d6ff9e 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -4023,6 +4023,9 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { lo = pr_buf->th_doacross_info[2]; up = pr_buf->th_doacross_info[3]; st = pr_buf->th_doacross_info[4]; +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_dependence_t deps[num_dims]; +#endif if (st == 1) { // most common case if (vec[0] < lo || vec[0] > up) { KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " @@ -4048,6 +4051,10 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { } iter_number = (kmp_uint64)(lo - vec[0]) / (-st); } +#if OMPT_SUPPORT && OMPT_OPTIONAL + deps[0].variable.value = iter_number; + deps[0].dependence_type = ompt_dependence_type_sink; +#endif for (i = 1; i < num_dims; ++i) { kmp_int64 iter, ln; kmp_int32 j = i * 4; @@ -4081,6 +4088,10 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { iter = (kmp_uint64)(lo - vec[i]) / (-st); } iter_number = iter + ln * iter_number; +#if OMPT_SUPPORT && OMPT_OPTIONAL + deps[i].variable.value = iter; + deps[i].dependence_type = ompt_dependence_type_sink; +#endif } shft = iter_number % 32; // use 32-bit granularity iter_number >>= 5; // divided by 32 @@ -4089,6 +4100,12 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { KMP_YIELD(TRUE); } KMP_MB(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_dependences) { + ompt_callbacks.ompt_callback(ompt_callback_dependences)( + &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); + } +#endif KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", gtid, (iter_number << 5) + shft)); @@ -4116,6 +4133,9 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { num_dims = pr_buf->th_doacross_info[0]; lo = pr_buf->th_doacross_info[2]; st = pr_buf->th_doacross_info[4]; +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_dependence_t deps[num_dims]; +#endif if (st == 1) { // most common case iter_number = vec[0] - lo; } else if (st > 0) { @@ -4123,6 +4143,10 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { } else { // negative increment iter_number = (kmp_uint64)(lo - vec[0]) / (-st); } +#if OMPT_SUPPORT && OMPT_OPTIONAL + deps[0].variable.value = iter_number; + deps[0].dependence_type = ompt_dependence_type_source; +#endif for (i = 1; i < num_dims; ++i) { kmp_int64 iter, ln; kmp_int32 j = i * 4; @@ -4137,7 +4161,17 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { iter = (kmp_uint64)(lo - vec[i]) / (-st); } iter_number = iter + ln * iter_number; +#if OMPT_SUPPORT && OMPT_OPTIONAL + deps[i].variable.value = iter; + deps[i].dependence_type = ompt_dependence_type_source; +#endif } +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_dependences) { + ompt_callbacks.ompt_callback(ompt_callback_dependences)( + &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); + } +#endif shft = iter_number % 32; // use 32-bit granularity iter_number >>= 5; // divided by 32 flag = 1 << shft; diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h index f91729456739..c49d43e44207 100644 --- a/openmp/runtime/test/ompt/callback.h +++ b/openmp/runtime/test/ompt/callback.h @@ -47,6 +47,17 @@ static const char* ompt_cancel_flag_t_values[] = { "ompt_cancel_discarded_task" }; +static const char *ompt_dependence_type_t_values[] = { + NULL, + "ompt_dependence_type_in", // 1 + "ompt_dependence_type_out", // 2 + "ompt_dependence_type_inout", // 3 + "ompt_dependence_type_mutexinoutset", // 4 + "ompt_dependence_type_source", // 5 + "ompt_dependence_type_sink", // 6 + "ompt_dependence_type_inoutset" // 7 +}; + static void format_task_type(int type, char *buffer) { char *progress = buffer; if (type & ompt_task_initial) @@ -971,10 +982,24 @@ on_ompt_callback_dependences( const ompt_dependence_t *deps, int ndeps) { - printf("%" PRIu64 ":" _TOOL_PREFIX - " ompt_event_task_dependences: task_id=%" PRIu64 - ", deps=%p, ndeps=%d\n", - ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps); + char buffer[2048]; + char *progress = buffer; + for (int i = 0; i < ndeps && progress < buffer + 2000; i++) { + if (deps[i].dependence_type == ompt_dependence_type_source || + deps[i].dependence_type == ompt_dependence_type_sink) + progress += + sprintf(progress, "(%ld, %s), ", deps[i].variable.value, + ompt_dependence_type_t_values[deps[i].dependence_type]); + else + progress += + sprintf(progress, "(%p, %s), ", deps[i].variable.ptr, + ompt_dependence_type_t_values[deps[i].dependence_type]); + } + if (ndeps > 0) + progress[-2] = 0; + printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64 + ", deps=[%s], ndeps=%d\n", + ompt_get_thread_data()->value, task_data->value, buffer, ndeps); } static void diff --git a/openmp/runtime/test/ompt/synchronization/ordered_dependences.c b/openmp/runtime/test/ompt/synchronization/ordered_dependences.c new file mode 100644 index 000000000000..9da86e299af6 --- /dev/null +++ b/openmp/runtime/test/ompt/synchronization/ordered_dependences.c @@ -0,0 +1,63 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include + +int main() { + int a[10][10]; + int i, j; +#pragma omp parallel num_threads(2) +#pragma omp for ordered(2) + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) { + a[i][j] = i + j + 1; + printf("%d, %d\n", i, j); +#pragma omp ordered depend(sink : i - 1, j) depend(sink : i, j - 1) + if (i > 0 && j > 0) + a[i][j] = a[i - 1][j] + a[i][j - 1] + 1; + printf("%d, %d\n", i, j); +#pragma omp ordered depend(source) + } + + return 0; +} +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] + +// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[IMPL_TASK:[0-9]+]], + +// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (0, +// CHECK-SAME: ompt_dependence_type_source)], ndeps=2 + +// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (0, +// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2 + +// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (1, +// CHECK-SAME: ompt_dependence_type_source)], ndeps=2 + +// CHECK: {{^}}[[WORKER:[0-9]+]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[IMPL_TASK:[0-9]+]], + +// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (0, +// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2 + +// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(1, ompt_dependence_type_source), (0, +// CHECK-SAME: ompt_dependence_type_source)], ndeps=2 + +// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (1, +// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2 + +// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(1, ompt_dependence_type_sink), (0, +// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2 + +// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(1, ompt_dependence_type_source), (1, +// CHECK-SAME: ompt_dependence_type_source)], ndeps=2 diff --git a/openmp/runtime/test/ompt/tasks/dependences.c b/openmp/runtime/test/ompt/tasks/dependences.c index 57b61f9b5d86..9e9349f95610 100644 --- a/openmp/runtime/test/ompt/tasks/dependences.c +++ b/openmp/runtime/test/ompt/tasks/dependences.c @@ -3,59 +3,86 @@ // UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 #include "callback.h" -#include +#include #include #include -int main() -{ +int main() { int x = 0; - #pragma omp parallel num_threads(2) +#pragma omp parallel num_threads(2) { - #pragma omp master - { +#pragma omp master + { print_ids(0); - #pragma omp task depend(out:x) + printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value, + &x); +#pragma omp task depend(out : x) { x++; delay(100); } print_fuzzy_address(1); print_ids(0); - - #pragma omp task depend(in:x) - { - x = -1; - } + +#pragma omp task depend(in : x) + { x = -1; } print_ids(0); } } x++; - - // Check if libomp supports the callbacks for this test. - // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' - // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependences' - // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependence' - - // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] - - // make sure initial data pointers are null - // CHECK-NOT: 0: new_task_data initially not null - - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[FIRST_TASK:[0-f]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, task_type=ompt_task_explicit=4, has_dependences=yes - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[FIRST_TASK]], deps={{0x[0-f]+}}, ndeps=1 - // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] - // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=yes - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[SECOND_TASK]], deps={{0x[0-f]+}}, ndeps=1 - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependence_pair: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]] - // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - - return 0; } + +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_dependences' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_depende + +// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + +// make sure initial data pointers are null +// CHECK-NOT: 0: new_task_data initially not null + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + +// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]], +// CHECK-SAME: reenter_frame=[[NULL]] + +// CHECK: {{^}}[[MASTER_ID]]: address of x: [[ADDRX:0x[0-f]+]] +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[FIRST_TASK:[0-f]+]], +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, +// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences: +// CHECK-SAME: task_id=[[FIRST_TASK]], deps=[([[ADDRX]], +// CHECK-SAME: ompt_dependence_type_inout)], ndeps=1 + +// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] +// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], +// CHECK-SAME: reenter_frame=[[NULL]] + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, +// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences: +// CHECK-SAME: task_id=[[SECOND_TASK]], deps=[([[ADDRX]], +// CHECK-SAME: ompt_dependence_type_in)], ndeps=1 + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependence_pair: +// CHECK-SAME: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]] + +// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], +// CHECK-SAME: reenter_frame=[[NULL]]