[OpenMP][OMPT] Add callbacks for doacross loops

Adds the callbacks for ordered with source/sink dependencies.

The test for task dependencies changed, because callbach.h now actually prints
the passed dependencies and the test also checks for the address.

Reviewed by: hbae

Differential Revision: https://reviews.llvm.org/D81807
This commit is contained in:
Joachim Protze 2020-06-15 18:39:47 +02:00
parent eae76faeea
commit cbea36903e
4 changed files with 189 additions and 40 deletions

View File

@ -4023,6 +4023,9 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
lo = pr_buf->th_doacross_info[2];
up = pr_buf->th_doacross_info[3];
st = pr_buf->th_doacross_info[4];
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_dependence_t deps[num_dims];
#endif
if (st == 1) { // most common case
if (vec[0] < lo || vec[0] > up) {
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
@ -4048,6 +4051,10 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
}
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
deps[0].variable.value = iter_number;
deps[0].dependence_type = ompt_dependence_type_sink;
#endif
for (i = 1; i < num_dims; ++i) {
kmp_int64 iter, ln;
kmp_int32 j = i * 4;
@ -4081,6 +4088,10 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
iter = (kmp_uint64)(lo - vec[i]) / (-st);
}
iter_number = iter + ln * iter_number;
#if OMPT_SUPPORT && OMPT_OPTIONAL
deps[i].variable.value = iter;
deps[i].dependence_type = ompt_dependence_type_sink;
#endif
}
shft = iter_number % 32; // use 32-bit granularity
iter_number >>= 5; // divided by 32
@ -4089,6 +4100,12 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
KMP_YIELD(TRUE);
}
KMP_MB();
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_dependences) {
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
&(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
}
#endif
KA_TRACE(20,
("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
gtid, (iter_number << 5) + shft));
@ -4116,6 +4133,9 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
num_dims = pr_buf->th_doacross_info[0];
lo = pr_buf->th_doacross_info[2];
st = pr_buf->th_doacross_info[4];
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_dependence_t deps[num_dims];
#endif
if (st == 1) { // most common case
iter_number = vec[0] - lo;
} else if (st > 0) {
@ -4123,6 +4143,10 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
} else { // negative increment
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
deps[0].variable.value = iter_number;
deps[0].dependence_type = ompt_dependence_type_source;
#endif
for (i = 1; i < num_dims; ++i) {
kmp_int64 iter, ln;
kmp_int32 j = i * 4;
@ -4137,7 +4161,17 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
iter = (kmp_uint64)(lo - vec[i]) / (-st);
}
iter_number = iter + ln * iter_number;
#if OMPT_SUPPORT && OMPT_OPTIONAL
deps[i].variable.value = iter;
deps[i].dependence_type = ompt_dependence_type_source;
#endif
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_dependences) {
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
&(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
}
#endif
shft = iter_number % 32; // use 32-bit granularity
iter_number >>= 5; // divided by 32
flag = 1 << shft;

View File

@ -47,6 +47,17 @@ static const char* ompt_cancel_flag_t_values[] = {
"ompt_cancel_discarded_task"
};
static const char *ompt_dependence_type_t_values[] = {
NULL,
"ompt_dependence_type_in", // 1
"ompt_dependence_type_out", // 2
"ompt_dependence_type_inout", // 3
"ompt_dependence_type_mutexinoutset", // 4
"ompt_dependence_type_source", // 5
"ompt_dependence_type_sink", // 6
"ompt_dependence_type_inoutset" // 7
};
static void format_task_type(int type, char *buffer) {
char *progress = buffer;
if (type & ompt_task_initial)
@ -971,10 +982,24 @@ on_ompt_callback_dependences(
const ompt_dependence_t *deps,
int ndeps)
{
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_task_dependences: task_id=%" PRIu64
", deps=%p, ndeps=%d\n",
ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps);
char buffer[2048];
char *progress = buffer;
for (int i = 0; i < ndeps && progress < buffer + 2000; i++) {
if (deps[i].dependence_type == ompt_dependence_type_source ||
deps[i].dependence_type == ompt_dependence_type_sink)
progress +=
sprintf(progress, "(%ld, %s), ", deps[i].variable.value,
ompt_dependence_type_t_values[deps[i].dependence_type]);
else
progress +=
sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
ompt_dependence_type_t_values[deps[i].dependence_type]);
}
if (ndeps > 0)
progress[-2] = 0;
printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64
", deps=[%s], ndeps=%d\n",
ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
}
static void

View File

@ -0,0 +1,63 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
#include "callback.h"
#include <omp.h>
int main() {
int a[10][10];
int i, j;
#pragma omp parallel num_threads(2)
#pragma omp for ordered(2)
for (i = 0; i < 2; i++)
for (j = 0; j < 2; j++) {
a[i][j] = i + j + 1;
printf("%d, %d\n", i, j);
#pragma omp ordered depend(sink : i - 1, j) depend(sink : i, j - 1)
if (i > 0 && j > 0)
a[i][j] = a[i - 1][j] + a[i][j - 1] + 1;
printf("%d, %d\n", i, j);
#pragma omp ordered depend(source)
}
return 0;
}
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_loop_begin:
// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[IMPL_TASK:[0-9]+]],
// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (0,
// CHECK-SAME: ompt_dependence_type_source)], ndeps=2
// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (0,
// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2
// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (1,
// CHECK-SAME: ompt_dependence_type_source)], ndeps=2
// CHECK: {{^}}[[WORKER:[0-9]+]]: ompt_event_loop_begin:
// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[IMPL_TASK:[0-9]+]],
// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (0,
// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2
// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
// CHECK-SAME: deps=[(1, ompt_dependence_type_source), (0,
// CHECK-SAME: ompt_dependence_type_source)], ndeps=2
// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (1,
// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2
// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
// CHECK-SAME: deps=[(1, ompt_dependence_type_sink), (0,
// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2
// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
// CHECK-SAME: deps=[(1, ompt_dependence_type_source), (1,
// CHECK-SAME: ompt_dependence_type_source)], ndeps=2

View File

@ -3,59 +3,86 @@
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
#include "callback.h"
#include <omp.h>
#include <omp.h>
#include <math.h>
#include <unistd.h>
int main()
{
int main() {
int x = 0;
#pragma omp parallel num_threads(2)
#pragma omp parallel num_threads(2)
{
#pragma omp master
{
#pragma omp master
{
print_ids(0);
#pragma omp task depend(out:x)
printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value,
&x);
#pragma omp task depend(out : x)
{
x++;
delay(100);
}
print_fuzzy_address(1);
print_ids(0);
#pragma omp task depend(in:x)
{
x = -1;
}
#pragma omp task depend(in : x)
{ x = -1; }
print_ids(0);
}
}
x++;
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependences'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependence'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[FIRST_TASK:[0-f]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, task_type=ompt_task_explicit=4, has_dependences=yes
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[FIRST_TASK]], deps={{0x[0-f]+}}, ndeps=1
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=yes
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[SECOND_TASK]], deps={{0x[0-f]+}}, ndeps=1
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependence_pair: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
return 0;
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_dependences'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_depende
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]],
// CHECK-SAME: reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: address of x: [[ADDRX:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]],
// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
// CHECK-SAME: new_task_id=[[FIRST_TASK:[0-f]+]],
// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences:
// CHECK-SAME: task_id=[[FIRST_TASK]], deps=[([[ADDRX]],
// CHECK-SAME: ompt_dependence_type_inout)], ndeps=1
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]],
// CHECK-SAME: reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]],
// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
// CHECK-SAME: new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}},
// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences:
// CHECK-SAME: task_id=[[SECOND_TASK]], deps=[([[ADDRX]],
// CHECK-SAME: ompt_dependence_type_in)], ndeps=1
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependence_pair:
// CHECK-SAME: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]],
// CHECK-SAME: reenter_frame=[[NULL]]