forked from OSchip/llvm-project
765 lines
32 KiB
C
Executable File
765 lines
32 KiB
C
Executable File
#ifndef _BSD_SOURCE
|
|
#define _BSD_SOURCE
|
|
#endif
|
|
#define _DEFAULT_SOURCE
|
|
#include <stdio.h>
|
|
#ifndef __STDC_FORMAT_MACROS
|
|
#define __STDC_FORMAT_MACROS
|
|
#endif
|
|
#include <inttypes.h>
|
|
#include <omp.h>
|
|
#include <ompt.h>
|
|
#include "ompt-signal.h"
|
|
|
|
// Used to detect architecture
|
|
#include "../../src/kmp_platform.h"
|
|
|
|
static const char* ompt_thread_type_t_values[] = {
|
|
NULL,
|
|
"ompt_thread_initial",
|
|
"ompt_thread_worker",
|
|
"ompt_thread_other"
|
|
};
|
|
|
|
static const char* ompt_task_status_t_values[] = {
|
|
NULL,
|
|
"ompt_task_complete",
|
|
"ompt_task_yield",
|
|
"ompt_task_cancel",
|
|
"ompt_task_others"
|
|
};
|
|
static const char* ompt_cancel_flag_t_values[] = {
|
|
"ompt_cancel_parallel",
|
|
"ompt_cancel_sections",
|
|
"ompt_cancel_do",
|
|
"ompt_cancel_taskgroup",
|
|
"ompt_cancel_activated",
|
|
"ompt_cancel_detected",
|
|
"ompt_cancel_discarded_task"
|
|
};
|
|
|
|
static void format_task_type(int type, char *buffer) {
|
|
char *progress = buffer;
|
|
if (type & ompt_task_initial)
|
|
progress += sprintf(progress, "ompt_task_initial");
|
|
if (type & ompt_task_implicit)
|
|
progress += sprintf(progress, "ompt_task_implicit");
|
|
if (type & ompt_task_explicit)
|
|
progress += sprintf(progress, "ompt_task_explicit");
|
|
if (type & ompt_task_target)
|
|
progress += sprintf(progress, "ompt_task_target");
|
|
if (type & ompt_task_undeferred)
|
|
progress += sprintf(progress, "|ompt_task_undeferred");
|
|
if (type & ompt_task_untied)
|
|
progress += sprintf(progress, "|ompt_task_untied");
|
|
if (type & ompt_task_final)
|
|
progress += sprintf(progress, "|ompt_task_final");
|
|
if (type & ompt_task_mergeable)
|
|
progress += sprintf(progress, "|ompt_task_mergeable");
|
|
if (type & ompt_task_merged)
|
|
progress += sprintf(progress, "|ompt_task_merged");
|
|
}
|
|
|
|
static ompt_set_callback_t ompt_set_callback;
|
|
static ompt_get_callback_t ompt_get_callback;
|
|
static ompt_get_state_t ompt_get_state;
|
|
static ompt_get_task_info_t ompt_get_task_info;
|
|
static ompt_get_thread_data_t ompt_get_thread_data;
|
|
static ompt_get_parallel_info_t ompt_get_parallel_info;
|
|
static ompt_get_unique_id_t ompt_get_unique_id;
|
|
static ompt_get_num_procs_t ompt_get_num_procs;
|
|
static ompt_get_num_places_t ompt_get_num_places;
|
|
static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
|
|
static ompt_get_place_num_t ompt_get_place_num;
|
|
static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
|
|
static ompt_get_proc_id_t ompt_get_proc_id;
|
|
static ompt_enumerate_states_t ompt_enumerate_states;
|
|
static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
|
|
|
|
static void print_ids(int level)
|
|
{
|
|
int task_type, thread_num;
|
|
omp_frame_t *frame;
|
|
ompt_data_t *task_parallel_data;
|
|
ompt_data_t *task_data;
|
|
int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
|
|
&task_parallel_data, &thread_num);
|
|
char buffer[2048];
|
|
format_task_type(task_type, buffer);
|
|
if (frame)
|
|
printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64
|
|
", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, "
|
|
"task_type=%s=%d, thread_num=%d\n",
|
|
ompt_get_thread_data()->value, level,
|
|
exists_task ? task_parallel_data->value : 0,
|
|
exists_task ? task_data->value : 0, frame->exit_frame,
|
|
frame->enter_frame, buffer, task_type, thread_num);
|
|
}
|
|
|
|
#define get_frame_address(level) __builtin_frame_address(level)
|
|
|
|
#define print_frame(level) \
|
|
printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \
|
|
ompt_get_thread_data()->value, level, get_frame_address(level))
|
|
|
|
// clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
|
|
#if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
|
|
#if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
|
|
#define print_frame_from_outlined_fn(level) print_frame(level+1)
|
|
#else
|
|
#define print_frame_from_outlined_fn(level) print_frame(level)
|
|
#endif
|
|
|
|
#if defined(__clang__) && __clang_major__ >= 5
|
|
#warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
|
|
#warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
|
|
#endif
|
|
#endif
|
|
|
|
// This macro helps to define a label at the current position that can be used
|
|
// to get the current address in the code.
|
|
//
|
|
// For print_current_address():
|
|
// To reliably determine the offset between the address of the label and the
|
|
// actual return address, we insert a NOP instruction as a jump target as the
|
|
// compiler would otherwise insert an instruction that we can't control. The
|
|
// instruction length is target dependent and is explained below.
|
|
//
|
|
// (The empty block between "#pragma omp ..." and the __asm__ statement is a
|
|
// workaround for a bug in the Intel Compiler.)
|
|
#define define_ompt_label(id) \
|
|
{} \
|
|
__asm__("nop"); \
|
|
ompt_label_##id:
|
|
|
|
// This macro helps to get the address of a label that is inserted by the above
|
|
// macro define_ompt_label(). The address is obtained with a GNU extension
|
|
// (&&label) that has been tested with gcc, clang and icc.
|
|
#define get_ompt_label_address(id) (&& ompt_label_##id)
|
|
|
|
// This macro prints the exact address that a previously called runtime function
|
|
// returns to.
|
|
#define print_current_address(id) \
|
|
define_ompt_label(id) \
|
|
print_possible_return_addresses(get_ompt_label_address(id))
|
|
|
|
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
|
// On X86 the NOP instruction is 1 byte long. In addition, the comiler inserts
|
|
// a MOV instruction for non-void runtime functions which is 3 bytes long.
|
|
#define print_possible_return_addresses(addr) \
|
|
printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
|
|
ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
|
|
#elif KMP_ARCH_PPC64
|
|
// On Power the NOP instruction is 4 bytes long. In addition, the compiler
|
|
// inserts an LD instruction which accounts for another 4 bytes. In contrast to
|
|
// X86 this instruction is always there, even for void runtime functions.
|
|
#define print_possible_return_addresses(addr) \
|
|
printf("%" PRIu64 ": current_address=%p\n", ompt_get_thread_data()->value, \
|
|
((char *)addr) - 8)
|
|
#elif KMP_ARCH_AARCH64
|
|
// On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
|
|
// store instruction (another 4 bytes long).
|
|
#define print_possible_return_addresses(addr) \
|
|
printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
|
|
((char *)addr) - 4, ((char *)addr) - 8)
|
|
#else
|
|
#error Unsupported target architecture, cannot determine address offset!
|
|
#endif
|
|
|
|
|
|
// This macro performs a somewhat similar job to print_current_address(), except
|
|
// that it discards a certain number of nibbles from the address and only prints
|
|
// the most significant bits / nibbles. This can be used for cases where the
|
|
// return address can only be approximated.
|
|
//
|
|
// To account for overflows (ie the most significant bits / nibbles have just
|
|
// changed as we are a few bytes above the relevant power of two) the addresses
|
|
// of the "current" and of the "previous block" are printed.
|
|
#define print_fuzzy_address(id) \
|
|
define_ompt_label(id) \
|
|
print_fuzzy_address_blocks(get_ompt_label_address(id))
|
|
|
|
// If you change this define you need to adapt all capture patterns in the tests
|
|
// to include or discard the new number of nibbles!
|
|
#define FUZZY_ADDRESS_DISCARD_NIBBLES 2
|
|
#define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
|
|
#define print_fuzzy_address_blocks(addr) \
|
|
printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \
|
|
" or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \
|
|
ompt_get_thread_data()->value, \
|
|
((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \
|
|
((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \
|
|
((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \
|
|
((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
|
|
|
|
static void
|
|
on_ompt_callback_mutex_acquire(
|
|
ompt_mutex_kind_t kind,
|
|
unsigned int hint,
|
|
unsigned int impl,
|
|
omp_wait_id_t wait_id,
|
|
const void *codeptr_ra)
|
|
{
|
|
switch(kind)
|
|
{
|
|
case ompt_mutex_lock:
|
|
printf("%" PRIu64 ": ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_nest_lock:
|
|
printf("%" PRIu64 ": ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_critical:
|
|
printf("%" PRIu64 ": ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_atomic:
|
|
printf("%" PRIu64 ": ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_ordered:
|
|
printf("%" PRIu64 ": ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_mutex_acquired(
|
|
ompt_mutex_kind_t kind,
|
|
omp_wait_id_t wait_id,
|
|
const void *codeptr_ra)
|
|
{
|
|
switch(kind)
|
|
{
|
|
case ompt_mutex_lock:
|
|
printf("%" PRIu64 ": ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_nest_lock:
|
|
printf("%" PRIu64 ": ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_critical:
|
|
printf("%" PRIu64 ": ompt_event_acquired_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_atomic:
|
|
printf("%" PRIu64 ": ompt_event_acquired_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_ordered:
|
|
printf("%" PRIu64 ": ompt_event_acquired_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_mutex_released(
|
|
ompt_mutex_kind_t kind,
|
|
omp_wait_id_t wait_id,
|
|
const void *codeptr_ra)
|
|
{
|
|
switch(kind)
|
|
{
|
|
case ompt_mutex_lock:
|
|
printf("%" PRIu64 ": ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_nest_lock:
|
|
printf("%" PRIu64 ": ompt_event_release_nest_lock_last: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_critical:
|
|
printf("%" PRIu64 ": ompt_event_release_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_atomic:
|
|
printf("%" PRIu64 ": ompt_event_release_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_ordered:
|
|
printf("%" PRIu64 ": ompt_event_release_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_nest_lock(
|
|
ompt_scope_endpoint_t endpoint,
|
|
omp_wait_id_t wait_id,
|
|
const void *codeptr_ra)
|
|
{
|
|
switch(endpoint)
|
|
{
|
|
case ompt_scope_begin:
|
|
printf("%" PRIu64 ": ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
case ompt_scope_end:
|
|
printf("%" PRIu64 ": ompt_event_release_nest_lock_prev: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_sync_region(
|
|
ompt_sync_region_kind_t kind,
|
|
ompt_scope_endpoint_t endpoint,
|
|
ompt_data_t *parallel_data,
|
|
ompt_data_t *task_data,
|
|
const void *codeptr_ra)
|
|
{
|
|
switch(endpoint)
|
|
{
|
|
case ompt_scope_begin:
|
|
switch(kind)
|
|
{
|
|
case ompt_sync_region_barrier:
|
|
printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
|
|
print_ids(0);
|
|
break;
|
|
case ompt_sync_region_taskwait:
|
|
printf("%" PRIu64 ": ompt_event_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
|
|
break;
|
|
case ompt_sync_region_taskgroup:
|
|
printf("%" PRIu64 ": ompt_event_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
|
|
break;
|
|
}
|
|
break;
|
|
case ompt_scope_end:
|
|
switch(kind)
|
|
{
|
|
case ompt_sync_region_barrier:
|
|
printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
|
|
break;
|
|
case ompt_sync_region_taskwait:
|
|
printf("%" PRIu64 ": ompt_event_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
|
|
break;
|
|
case ompt_sync_region_taskgroup:
|
|
printf("%" PRIu64 ": ompt_event_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_sync_region_wait(
|
|
ompt_sync_region_kind_t kind,
|
|
ompt_scope_endpoint_t endpoint,
|
|
ompt_data_t *parallel_data,
|
|
ompt_data_t *task_data,
|
|
const void *codeptr_ra)
|
|
{
|
|
switch(endpoint)
|
|
{
|
|
case ompt_scope_begin:
|
|
switch(kind)
|
|
{
|
|
case ompt_sync_region_barrier:
|
|
printf("%" PRIu64 ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
|
|
break;
|
|
case ompt_sync_region_taskwait:
|
|
printf("%" PRIu64 ": ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
|
|
break;
|
|
case ompt_sync_region_taskgroup:
|
|
printf("%" PRIu64 ": ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
|
|
break;
|
|
}
|
|
break;
|
|
case ompt_scope_end:
|
|
switch(kind)
|
|
{
|
|
case ompt_sync_region_barrier:
|
|
printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
|
|
break;
|
|
case ompt_sync_region_taskwait:
|
|
printf("%" PRIu64 ": ompt_event_wait_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
|
|
break;
|
|
case ompt_sync_region_taskgroup:
|
|
printf("%" PRIu64 ": ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_flush(
|
|
ompt_data_t *thread_data,
|
|
const void *codeptr_ra)
|
|
{
|
|
printf("%" PRIu64 ": ompt_event_flush: codeptr_ra=%p\n", thread_data->value, codeptr_ra);
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_cancel(
|
|
ompt_data_t *task_data,
|
|
int flags,
|
|
const void *codeptr_ra)
|
|
{
|
|
const char* first_flag_value;
|
|
const char* second_flag_value;
|
|
if(flags & ompt_cancel_parallel)
|
|
first_flag_value = ompt_cancel_flag_t_values[0];
|
|
else if(flags & ompt_cancel_sections)
|
|
first_flag_value = ompt_cancel_flag_t_values[1];
|
|
else if(flags & ompt_cancel_do)
|
|
first_flag_value = ompt_cancel_flag_t_values[2];
|
|
else if(flags & ompt_cancel_taskgroup)
|
|
first_flag_value = ompt_cancel_flag_t_values[3];
|
|
|
|
if(flags & ompt_cancel_activated)
|
|
second_flag_value = ompt_cancel_flag_t_values[4];
|
|
else if(flags & ompt_cancel_detected)
|
|
second_flag_value = ompt_cancel_flag_t_values[5];
|
|
else if(flags & ompt_cancel_discarded_task)
|
|
second_flag_value = ompt_cancel_flag_t_values[6];
|
|
|
|
printf("%" PRIu64 ": ompt_event_cancel: task_data=%" PRIu64 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, task_data->value, first_flag_value, second_flag_value, flags, codeptr_ra);
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_idle(
|
|
ompt_scope_endpoint_t endpoint)
|
|
{
|
|
switch(endpoint)
|
|
{
|
|
case ompt_scope_begin:
|
|
printf("%" PRIu64 ": ompt_event_idle_begin:\n", ompt_get_thread_data()->value);
|
|
break;
|
|
case ompt_scope_end:
|
|
printf("%" PRIu64 ": ompt_event_idle_end:\n", ompt_get_thread_data()->value);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_implicit_task(
|
|
ompt_scope_endpoint_t endpoint,
|
|
ompt_data_t *parallel_data,
|
|
ompt_data_t *task_data,
|
|
unsigned int team_size,
|
|
unsigned int thread_num)
|
|
{
|
|
switch(endpoint)
|
|
{
|
|
case ompt_scope_begin:
|
|
if(task_data->ptr)
|
|
printf("%s\n", "0: task_data initially not null");
|
|
task_data->value = ompt_get_unique_id();
|
|
printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num);
|
|
break;
|
|
case ompt_scope_end:
|
|
printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_lock_init(
|
|
ompt_mutex_kind_t kind,
|
|
unsigned int hint,
|
|
unsigned int impl,
|
|
omp_wait_id_t wait_id,
|
|
const void *codeptr_ra)
|
|
{
|
|
switch(kind)
|
|
{
|
|
case ompt_mutex_lock:
|
|
printf("%" PRIu64 ": ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_nest_lock:
|
|
printf("%" PRIu64 ": ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_lock_destroy(
|
|
ompt_mutex_kind_t kind,
|
|
omp_wait_id_t wait_id,
|
|
const void *codeptr_ra)
|
|
{
|
|
switch(kind)
|
|
{
|
|
case ompt_mutex_lock:
|
|
printf("%" PRIu64 ": ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
case ompt_mutex_nest_lock:
|
|
printf("%" PRIu64 ": ompt_event_destroy_nest_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_work(
|
|
ompt_work_type_t wstype,
|
|
ompt_scope_endpoint_t endpoint,
|
|
ompt_data_t *parallel_data,
|
|
ompt_data_t *task_data,
|
|
uint64_t count,
|
|
const void *codeptr_ra)
|
|
{
|
|
switch(endpoint)
|
|
{
|
|
case ompt_scope_begin:
|
|
switch(wstype)
|
|
{
|
|
case ompt_work_loop:
|
|
printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
case ompt_work_sections:
|
|
printf("%" PRIu64 ": ompt_event_sections_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
case ompt_work_single_executor:
|
|
printf("%" PRIu64 ": ompt_event_single_in_block_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
case ompt_work_single_other:
|
|
printf("%" PRIu64 ": ompt_event_single_others_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
case ompt_work_workshare:
|
|
//impl
|
|
break;
|
|
case ompt_work_distribute:
|
|
printf("%" PRIu64 ": ompt_event_distribute_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
case ompt_work_taskloop:
|
|
//impl
|
|
printf("%" PRIu64 ": ompt_event_taskloop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
}
|
|
break;
|
|
case ompt_scope_end:
|
|
switch(wstype)
|
|
{
|
|
case ompt_work_loop:
|
|
printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
case ompt_work_sections:
|
|
printf("%" PRIu64 ": ompt_event_sections_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
case ompt_work_single_executor:
|
|
printf("%" PRIu64 ": ompt_event_single_in_block_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
case ompt_work_single_other:
|
|
printf("%" PRIu64 ": ompt_event_single_others_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
case ompt_work_workshare:
|
|
//impl
|
|
break;
|
|
case ompt_work_distribute:
|
|
printf("%" PRIu64 ": ompt_event_distribute_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
case ompt_work_taskloop:
|
|
//impl
|
|
printf("%" PRIu64 ": ompt_event_taskloop_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_master(
|
|
ompt_scope_endpoint_t endpoint,
|
|
ompt_data_t *parallel_data,
|
|
ompt_data_t *task_data,
|
|
const void *codeptr_ra)
|
|
{
|
|
switch(endpoint)
|
|
{
|
|
case ompt_scope_begin:
|
|
printf("%" PRIu64 ": ompt_event_master_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
|
|
break;
|
|
case ompt_scope_end:
|
|
printf("%" PRIu64 ": ompt_event_master_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_parallel_begin(
|
|
ompt_data_t *encountering_task_data,
|
|
const omp_frame_t *encountering_task_frame,
|
|
ompt_data_t* parallel_data,
|
|
uint32_t requested_team_size,
|
|
ompt_invoker_t invoker,
|
|
const void *codeptr_ra)
|
|
{
|
|
if(parallel_data->ptr)
|
|
printf("0: parallel_data initially not null\n");
|
|
parallel_data->value = ompt_get_unique_id();
|
|
printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", ompt_get_thread_data()->value, encountering_task_data->value, encountering_task_frame->exit_frame, encountering_task_frame->enter_frame, parallel_data->value, requested_team_size, codeptr_ra, invoker);
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_parallel_end(
|
|
ompt_data_t *parallel_data,
|
|
ompt_data_t *encountering_task_data,
|
|
ompt_invoker_t invoker,
|
|
const void *codeptr_ra)
|
|
{
|
|
printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, encountering_task_data->value, invoker, codeptr_ra);
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_task_create(
|
|
ompt_data_t *encountering_task_data,
|
|
const omp_frame_t *encountering_task_frame,
|
|
ompt_data_t* new_task_data,
|
|
int type,
|
|
int has_dependences,
|
|
const void *codeptr_ra)
|
|
{
|
|
if(new_task_data->ptr)
|
|
printf("0: new_task_data initially not null\n");
|
|
new_task_data->value = ompt_get_unique_id();
|
|
char buffer[2048];
|
|
|
|
format_task_type(type, buffer);
|
|
|
|
//there is no parallel_begin callback for implicit parallel region
|
|
//thus it is initialized in initial task
|
|
if(type & ompt_task_initial)
|
|
{
|
|
ompt_data_t *parallel_data;
|
|
ompt_get_parallel_info(0, ¶llel_data, NULL);
|
|
if(parallel_data->ptr)
|
|
printf("%s\n", "0: parallel_data initially not null");
|
|
parallel_data->value = ompt_get_unique_id();
|
|
}
|
|
|
|
printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame : NULL, encountering_task_frame ? encountering_task_frame->enter_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no");
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_task_schedule(
|
|
ompt_data_t *first_task_data,
|
|
ompt_task_status_t prior_task_status,
|
|
ompt_data_t *second_task_data)
|
|
{
|
|
printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status);
|
|
if(prior_task_status == ompt_task_complete)
|
|
{
|
|
printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value);
|
|
}
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_task_dependences(
|
|
ompt_data_t *task_data,
|
|
const ompt_task_dependence_t *deps,
|
|
int ndeps)
|
|
{
|
|
printf("%" PRIu64 ": ompt_event_task_dependences: task_id=%" PRIu64 ", deps=%p, ndeps=%d\n", ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps);
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_task_dependence(
|
|
ompt_data_t *first_task_data,
|
|
ompt_data_t *second_task_data)
|
|
{
|
|
printf("%" PRIu64 ": ompt_event_task_dependence_pair: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value);
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_thread_begin(
|
|
ompt_thread_type_t thread_type,
|
|
ompt_data_t *thread_data)
|
|
{
|
|
if(thread_data->ptr)
|
|
printf("%s\n", "0: thread_data initially not null");
|
|
thread_data->value = ompt_get_unique_id();
|
|
printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value);
|
|
}
|
|
|
|
static void
|
|
on_ompt_callback_thread_end(
|
|
ompt_data_t *thread_data)
|
|
{
|
|
printf("%" PRIu64 ": ompt_event_thread_end: thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, thread_data->value);
|
|
}
|
|
|
|
static int
|
|
on_ompt_callback_control_tool(
|
|
uint64_t command,
|
|
uint64_t modifier,
|
|
void *arg,
|
|
const void *codeptr_ra)
|
|
{
|
|
omp_frame_t* omptTaskFrame;
|
|
ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
|
|
printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_frame, omptTaskFrame->enter_frame);
|
|
return 0; //success
|
|
}
|
|
|
|
#define register_callback_t(name, type) \
|
|
do{ \
|
|
type f_##name = &on_##name; \
|
|
if (ompt_set_callback(name, (ompt_callback_t)f_##name) == \
|
|
ompt_set_never) \
|
|
printf("0: Could not register callback '" #name "'\n"); \
|
|
}while(0)
|
|
|
|
#define register_callback(name) register_callback_t(name, name##_t)
|
|
|
|
int ompt_initialize(
|
|
ompt_function_lookup_t lookup,
|
|
ompt_data_t *tool_data)
|
|
{
|
|
ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
|
|
ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback");
|
|
ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state");
|
|
ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
|
|
ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
|
|
ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
|
|
ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
|
|
|
|
ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs");
|
|
ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
|
|
ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
|
|
ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
|
|
ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
|
|
ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
|
|
ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
|
|
ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
|
|
|
|
register_callback(ompt_callback_mutex_acquire);
|
|
register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
|
|
register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
|
|
register_callback(ompt_callback_nest_lock);
|
|
register_callback(ompt_callback_sync_region);
|
|
register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
|
|
register_callback(ompt_callback_control_tool);
|
|
register_callback(ompt_callback_flush);
|
|
register_callback(ompt_callback_cancel);
|
|
register_callback(ompt_callback_idle);
|
|
register_callback(ompt_callback_implicit_task);
|
|
register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
|
|
register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
|
|
register_callback(ompt_callback_work);
|
|
register_callback(ompt_callback_master);
|
|
register_callback(ompt_callback_parallel_begin);
|
|
register_callback(ompt_callback_parallel_end);
|
|
register_callback(ompt_callback_task_create);
|
|
register_callback(ompt_callback_task_schedule);
|
|
register_callback(ompt_callback_task_dependences);
|
|
register_callback(ompt_callback_task_dependence);
|
|
register_callback(ompt_callback_thread_begin);
|
|
register_callback(ompt_callback_thread_end);
|
|
printf("0: NULL_POINTER=%p\n", (void*)NULL);
|
|
return 1; //success
|
|
}
|
|
|
|
void ompt_finalize(ompt_data_t *tool_data)
|
|
{
|
|
printf("0: ompt_event_runtime_shutdown\n");
|
|
}
|
|
|
|
ompt_start_tool_result_t* ompt_start_tool(
|
|
unsigned int omp_version,
|
|
const char *runtime_version)
|
|
{
|
|
static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
|
|
return &ompt_start_tool_result;
|
|
}
|