forked from OSchip/llvm-project
[OpenMP][OMPT] Add callbacks for doacross loops
Adds the callbacks for ordered with source/sink dependencies. The test for task dependencies changed, because callbach.h now actually prints the passed dependencies and the test also checks for the address. Reviewed by: hbae Differential Revision: https://reviews.llvm.org/D81807
This commit is contained in:
parent
eae76faeea
commit
cbea36903e
|
@ -4023,6 +4023,9 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
|
|||
lo = pr_buf->th_doacross_info[2];
|
||||
up = pr_buf->th_doacross_info[3];
|
||||
st = pr_buf->th_doacross_info[4];
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
ompt_dependence_t deps[num_dims];
|
||||
#endif
|
||||
if (st == 1) { // most common case
|
||||
if (vec[0] < lo || vec[0] > up) {
|
||||
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
|
||||
|
@ -4048,6 +4051,10 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
|
|||
}
|
||||
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
|
||||
}
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
deps[0].variable.value = iter_number;
|
||||
deps[0].dependence_type = ompt_dependence_type_sink;
|
||||
#endif
|
||||
for (i = 1; i < num_dims; ++i) {
|
||||
kmp_int64 iter, ln;
|
||||
kmp_int32 j = i * 4;
|
||||
|
@ -4081,6 +4088,10 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
|
|||
iter = (kmp_uint64)(lo - vec[i]) / (-st);
|
||||
}
|
||||
iter_number = iter + ln * iter_number;
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
deps[i].variable.value = iter;
|
||||
deps[i].dependence_type = ompt_dependence_type_sink;
|
||||
#endif
|
||||
}
|
||||
shft = iter_number % 32; // use 32-bit granularity
|
||||
iter_number >>= 5; // divided by 32
|
||||
|
@ -4089,6 +4100,12 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
|
|||
KMP_YIELD(TRUE);
|
||||
}
|
||||
KMP_MB();
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
if (ompt_enabled.ompt_callback_dependences) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
|
||||
&(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
|
||||
}
|
||||
#endif
|
||||
KA_TRACE(20,
|
||||
("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
|
||||
gtid, (iter_number << 5) + shft));
|
||||
|
@ -4116,6 +4133,9 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
|
|||
num_dims = pr_buf->th_doacross_info[0];
|
||||
lo = pr_buf->th_doacross_info[2];
|
||||
st = pr_buf->th_doacross_info[4];
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
ompt_dependence_t deps[num_dims];
|
||||
#endif
|
||||
if (st == 1) { // most common case
|
||||
iter_number = vec[0] - lo;
|
||||
} else if (st > 0) {
|
||||
|
@ -4123,6 +4143,10 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
|
|||
} else { // negative increment
|
||||
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
|
||||
}
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
deps[0].variable.value = iter_number;
|
||||
deps[0].dependence_type = ompt_dependence_type_source;
|
||||
#endif
|
||||
for (i = 1; i < num_dims; ++i) {
|
||||
kmp_int64 iter, ln;
|
||||
kmp_int32 j = i * 4;
|
||||
|
@ -4137,7 +4161,17 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
|
|||
iter = (kmp_uint64)(lo - vec[i]) / (-st);
|
||||
}
|
||||
iter_number = iter + ln * iter_number;
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
deps[i].variable.value = iter;
|
||||
deps[i].dependence_type = ompt_dependence_type_source;
|
||||
#endif
|
||||
}
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
if (ompt_enabled.ompt_callback_dependences) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
|
||||
&(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
|
||||
}
|
||||
#endif
|
||||
shft = iter_number % 32; // use 32-bit granularity
|
||||
iter_number >>= 5; // divided by 32
|
||||
flag = 1 << shft;
|
||||
|
|
|
@ -47,6 +47,17 @@ static const char* ompt_cancel_flag_t_values[] = {
|
|||
"ompt_cancel_discarded_task"
|
||||
};
|
||||
|
||||
static const char *ompt_dependence_type_t_values[] = {
|
||||
NULL,
|
||||
"ompt_dependence_type_in", // 1
|
||||
"ompt_dependence_type_out", // 2
|
||||
"ompt_dependence_type_inout", // 3
|
||||
"ompt_dependence_type_mutexinoutset", // 4
|
||||
"ompt_dependence_type_source", // 5
|
||||
"ompt_dependence_type_sink", // 6
|
||||
"ompt_dependence_type_inoutset" // 7
|
||||
};
|
||||
|
||||
static void format_task_type(int type, char *buffer) {
|
||||
char *progress = buffer;
|
||||
if (type & ompt_task_initial)
|
||||
|
@ -971,10 +982,24 @@ on_ompt_callback_dependences(
|
|||
const ompt_dependence_t *deps,
|
||||
int ndeps)
|
||||
{
|
||||
printf("%" PRIu64 ":" _TOOL_PREFIX
|
||||
" ompt_event_task_dependences: task_id=%" PRIu64
|
||||
", deps=%p, ndeps=%d\n",
|
||||
ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps);
|
||||
char buffer[2048];
|
||||
char *progress = buffer;
|
||||
for (int i = 0; i < ndeps && progress < buffer + 2000; i++) {
|
||||
if (deps[i].dependence_type == ompt_dependence_type_source ||
|
||||
deps[i].dependence_type == ompt_dependence_type_sink)
|
||||
progress +=
|
||||
sprintf(progress, "(%ld, %s), ", deps[i].variable.value,
|
||||
ompt_dependence_type_t_values[deps[i].dependence_type]);
|
||||
else
|
||||
progress +=
|
||||
sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
|
||||
ompt_dependence_type_t_values[deps[i].dependence_type]);
|
||||
}
|
||||
if (ndeps > 0)
|
||||
progress[-2] = 0;
|
||||
printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64
|
||||
", deps=[%s], ndeps=%d\n",
|
||||
ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
|
||||
// REQUIRES: ompt
|
||||
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
|
||||
#include "callback.h"
|
||||
#include <omp.h>
|
||||
|
||||
int main() {
|
||||
int a[10][10];
|
||||
int i, j;
|
||||
#pragma omp parallel num_threads(2)
|
||||
#pragma omp for ordered(2)
|
||||
for (i = 0; i < 2; i++)
|
||||
for (j = 0; j < 2; j++) {
|
||||
a[i][j] = i + j + 1;
|
||||
printf("%d, %d\n", i, j);
|
||||
#pragma omp ordered depend(sink : i - 1, j) depend(sink : i, j - 1)
|
||||
if (i > 0 && j > 0)
|
||||
a[i][j] = a[i - 1][j] + a[i][j - 1] + 1;
|
||||
printf("%d, %d\n", i, j);
|
||||
#pragma omp ordered depend(source)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
|
||||
|
||||
// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_loop_begin:
|
||||
// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[IMPL_TASK:[0-9]+]],
|
||||
|
||||
// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
|
||||
// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (0,
|
||||
// CHECK-SAME: ompt_dependence_type_source)], ndeps=2
|
||||
|
||||
// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
|
||||
// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (0,
|
||||
// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2
|
||||
|
||||
// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
|
||||
// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (1,
|
||||
// CHECK-SAME: ompt_dependence_type_source)], ndeps=2
|
||||
|
||||
// CHECK: {{^}}[[WORKER:[0-9]+]]: ompt_event_loop_begin:
|
||||
// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[IMPL_TASK:[0-9]+]],
|
||||
|
||||
// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
|
||||
// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (0,
|
||||
// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2
|
||||
|
||||
// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
|
||||
// CHECK-SAME: deps=[(1, ompt_dependence_type_source), (0,
|
||||
// CHECK-SAME: ompt_dependence_type_source)], ndeps=2
|
||||
|
||||
// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
|
||||
// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (1,
|
||||
// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2
|
||||
|
||||
// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
|
||||
// CHECK-SAME: deps=[(1, ompt_dependence_type_sink), (0,
|
||||
// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2
|
||||
|
||||
// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]],
|
||||
// CHECK-SAME: deps=[(1, ompt_dependence_type_source), (1,
|
||||
// CHECK-SAME: ompt_dependence_type_source)], ndeps=2
|
|
@ -7,15 +7,16 @@
|
|||
#include <math.h>
|
||||
#include <unistd.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
int x = 0;
|
||||
#pragma omp parallel num_threads(2)
|
||||
#pragma omp parallel num_threads(2)
|
||||
{
|
||||
#pragma omp master
|
||||
#pragma omp master
|
||||
{
|
||||
print_ids(0);
|
||||
#pragma omp task depend(out:x)
|
||||
printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value,
|
||||
&x);
|
||||
#pragma omp task depend(out : x)
|
||||
{
|
||||
x++;
|
||||
delay(100);
|
||||
|
@ -23,39 +24,65 @@ int main()
|
|||
print_fuzzy_address(1);
|
||||
print_ids(0);
|
||||
|
||||
#pragma omp task depend(in:x)
|
||||
{
|
||||
x = -1;
|
||||
}
|
||||
#pragma omp task depend(in : x)
|
||||
{ x = -1; }
|
||||
print_ids(0);
|
||||
}
|
||||
}
|
||||
|
||||
x++;
|
||||
|
||||
|
||||
// Check if libomp supports the callbacks for this test.
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependences'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependence'
|
||||
|
||||
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
|
||||
|
||||
// make sure initial data pointers are null
|
||||
// CHECK-NOT: 0: new_task_data initially not null
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
|
||||
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[FIRST_TASK:[0-f]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, task_type=ompt_task_explicit=4, has_dependences=yes
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[FIRST_TASK]], deps={{0x[0-f]+}}, ndeps=1
|
||||
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
|
||||
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=yes
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[SECOND_TASK]], deps={{0x[0-f]+}}, ndeps=1
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependence_pair: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]]
|
||||
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check if libomp supports the callbacks for this test.
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_dependences'
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_depende
|
||||
|
||||
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
|
||||
|
||||
// make sure initial data pointers are null
|
||||
// CHECK-NOT: 0: new_task_data initially not null
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin:
|
||||
// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
|
||||
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
|
||||
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]],
|
||||
// CHECK-SAME: reenter_frame=[[NULL]]
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: address of x: [[ADDRX:0x[0-f]+]]
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
|
||||
// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]],
|
||||
// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
|
||||
// CHECK-SAME: new_task_id=[[FIRST_TASK:[0-f]+]],
|
||||
// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
|
||||
// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences:
|
||||
// CHECK-SAME: task_id=[[FIRST_TASK]], deps=[([[ADDRX]],
|
||||
// CHECK-SAME: ompt_dependence_type_inout)], ndeps=1
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
|
||||
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
|
||||
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]],
|
||||
// CHECK-SAME: reenter_frame=[[NULL]]
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
|
||||
// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]],
|
||||
// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
|
||||
// CHECK-SAME: new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}},
|
||||
// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences:
|
||||
// CHECK-SAME: task_id=[[SECOND_TASK]], deps=[([[ADDRX]],
|
||||
// CHECK-SAME: ompt_dependence_type_in)], ndeps=1
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependence_pair:
|
||||
// CHECK-SAME: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]]
|
||||
|
||||
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]],
|
||||
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]],
|
||||
// CHECK-SAME: reenter_frame=[[NULL]]
|
||||
|
|
Loading…
Reference in New Issue