Update implementation of OMPT to the specification OpenMP 5.0 Preview 1 (TR4).

The code is tested to work with latest clang, GNU and Intel compiler. The implementation
is optimized for low overhead when no tool is attached shifting the cost to execution with
tool attached.

This patch does not implement OMPT for libomptarget.

Patch by Simon Convent and Joachim Protze

Differential Revision: https://reviews.llvm.org/D38185

llvm-svn: 317085
This commit is contained in:
Joachim Protze 2017-11-01 10:08:30 +00:00
parent b93c06331e
commit 82e94a5934
91 changed files with 7249 additions and 1834 deletions

View File

@ -321,12 +321,11 @@ endif()
# OMPT-support
set(LIBOMP_OMPT_DEBUG FALSE CACHE BOOL
"Trace OMPT initialization?")
#after testing: turn on ompt support by default for OpenMP 5.0 and higher
set(LIBOMP_OMPT_SUPPORT FALSE CACHE BOOL
"OMPT-support?")
set(LIBOMP_OMPT_BLAME TRUE CACHE BOOL
"OMPT-blame?")
set(LIBOMP_OMPT_TRACE TRUE CACHE BOOL
"OMPT-trace?")
set(LIBOMP_OMPT_OPTIONAL TRUE CACHE BOOL
"OMPT-optional?")
if(LIBOMP_OMPT_SUPPORT AND (NOT LIBOMP_HAVE_OMPT_SUPPORT))
libomp_error_say("OpenMP Tools Interface requested but not available in this implementation")
endif()
@ -396,8 +395,7 @@ if(${LIBOMP_STANDALONE_BUILD})
libomp_say("Use ITT notify -- ${LIBOMP_USE_ITT_NOTIFY}")
libomp_say("Use OMPT-support -- ${LIBOMP_OMPT_SUPPORT}")
if(${LIBOMP_OMPT_SUPPORT})
libomp_say("Use OMPT-blame -- ${LIBOMP_OMPT_BLAME}")
libomp_say("Use OMPT-trace -- ${LIBOMP_OMPT_TRACE}")
libomp_say("Use OMPT-optional -- ${LIBOMP_OMPT_OPTIONAL}")
endif()
libomp_say("Use Adaptive locks -- ${LIBOMP_USE_ADAPTIVE_LOCKS}")
libomp_say("Use quad precision -- ${LIBOMP_USE_QUAD_PRECISION}")

View File

@ -25,8 +25,7 @@ VERSION {
#
# OMPT API
#
ompt_tool; # OMPT initialization interface
ompt_control; # OMPT control interface
ompt_start_tool; # OMPT start interface
# icc drops weak attribute at linking step without the following line:
Annotate*; # TSAN annotation

View File

@ -182,6 +182,23 @@
extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void);
extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void);
/* OpenMP 5.0 Tool Control */
typedef enum omp_control_tool_result_t {
omp_control_tool_notool = -2,
omp_control_tool_nocallback = -1,
omp_control_tool_success = 0,
omp_control_tool_ignored = 1
} omp_control_tool_result_t;
typedef enum omp_control_tool_t {
omp_control_tool_start = 1,
omp_control_tool_pause = 2,
omp_control_tool_flush = 3,
omp_control_tool_end = 4
} omp_control_tool_t;
extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*);
# undef __KAI_KMPC_CONVENTION
/* Warning:

View File

@ -32,6 +32,8 @@
integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
integer, parameter :: kmp_cancel_kind = omp_integer_kind
integer, parameter :: omp_lock_hint_kind = omp_integer_kind
integer, parameter :: omp_control_tool_kind = omp_integer_kind
integer, parameter :: omp_control_tool_result_kind = omp_integer_kind
end module omp_lib_kinds
@ -518,6 +520,13 @@
integer (kind=omp_lock_hint_kind) hint
end subroutine omp_init_nest_lock_with_hint
function omp_control_tool(command, modifier)
use omp_lib_kinds
integer (kind=omp_integer_kind) omp_control_tool
integer (kind=omp_control_tool_kind) command
integer (kind=omp_control_tool_kind) modifier
end function omp_control_tool
end interface
!dec$ if defined(_WIN32)
@ -563,6 +572,7 @@
!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation
!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
!dec$ attributes alias:'OMP_GET_MAX_TASK_PRIORITY' :: omp_get_max_task_priority
!dec$ attributes alias:'OMP_CONTROL_TOOL' :: omp_control_tool
!dec$ attributes alias:'omp_init_lock' :: omp_init_lock
!dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint
@ -643,6 +653,7 @@
!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation
!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
!dec$ attributes alias:'_OMP_GET_MAX_TASK_PRIORTY' :: omp_get_max_task_priority
!dec$ attributes alias:'_OMP_CONTROL_TOOL' :: omp_control_tool
!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock
!dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint
@ -739,6 +750,7 @@
!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock
!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock
!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock
!dec$ attributes alias:'omp_control_tool_'::omp_control_tool
!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize
!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s
@ -818,6 +830,7 @@
!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock
!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock
!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock
!dec$ attributes alias:'_omp_control_tool_'::omp_control_tool
!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize
!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s

View File

@ -28,6 +28,8 @@
integer, parameter :: kmp_affinity_mask_kind = c_intptr_t
integer, parameter :: kmp_cancel_kind = omp_integer_kind
integer, parameter :: omp_lock_hint_kind = omp_integer_kind
integer, parameter :: omp_control_tool_kind = omp_integer_kind
integer, parameter :: omp_control_tool_result_kind = omp_integer_kind
end module omp_lib_kinds
@ -68,6 +70,16 @@
integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072
integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144
integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_start = 1
integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_pause = 2
integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_flush = 3
integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_end = 4
integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_notool = -2
integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_nocallback = -1
integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0
integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1
interface
! ***
@ -519,6 +531,13 @@
integer (kind=omp_lock_hint_kind), value :: hint
end subroutine omp_init_nest_lock_with_hint
function omp_control_tool(command, modifier) bind(c)
use omp_lib_kinds
integer (kind=omp_integer_kind) omp_control_tool
integer (kind=omp_control_tool_kind), value :: command
integer (kind=omp_control_tool_kind), value :: modifier
end function omp_control_tool
end interface
end module omp_lib

View File

@ -29,6 +29,8 @@
integer, parameter :: kmp_size_t_kind = int_ptr_kind()
integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
integer, parameter :: omp_lock_hint_kind = omp_integer_kind
integer, parameter :: omp_control_tool_kind = omp_integer_kind
integer, parameter :: omp_control_tool_result_kind = omp_integer_kind
integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@
integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
@ -57,6 +59,16 @@
integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072
integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144
integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_start = 1
integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_pause = 2
integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_flush = 3
integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_end = 4
integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_notool = -2
integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_nocallback = -1
integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0
integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1
interface
! ***
@ -494,6 +506,13 @@
integer (kind=omp_lock_hint_kind), value :: hint
end subroutine omp_init_nest_lock_with_hint
function omp_control_tool(command, modifier) bind(c)
import
integer (kind=omp_integer_kind) omp_control_tool
integer (kind=omp_control_tool_kind), value :: command
integer (kind=omp_control_tool_kind), value :: modifier
end function omp_control_tool
end interface
!DIR$ IF DEFINED (__INTEL_OFFLOAD)

View File

@ -10,6 +10,7 @@
*****************************************************************************/
#include <stdint.h>
#include <stddef.h>
@ -17,21 +18,28 @@
* iteration macros
*****************************************************************************/
#define FOREACH_OMPT_INQUIRY_FN(macro) \
macro (ompt_enumerate_state) \
\
macro (ompt_set_callback) \
macro (ompt_get_callback) \
\
macro (ompt_get_idle_frame) \
macro (ompt_get_task_frame) \
\
macro (ompt_get_state) \
\
macro (ompt_get_parallel_id) \
macro (ompt_get_parallel_team_size) \
macro (ompt_get_task_id) \
macro (ompt_get_thread_id)
#define FOREACH_OMPT_INQUIRY_FN(macro) \
macro (ompt_enumerate_states) \
macro (ompt_enumerate_mutex_impls) \
\
macro (ompt_set_callback) \
macro (ompt_get_callback) \
\
macro (ompt_get_state) \
\
macro (ompt_get_parallel_info) \
macro (ompt_get_task_info) \
macro (ompt_get_thread_data) \
macro (ompt_get_unique_id) \
\
macro(ompt_get_num_places) \
macro(ompt_get_place_proc_ids) \
macro(ompt_get_place_num) \
macro(ompt_get_partition_place_nums) \
macro(ompt_get_proc_id) \
\
macro(ompt_get_target_info) \
macro(ompt_get_num_devices)
#define FOREACH_OMPT_PLACEHOLDER_FN(macro) \
macro (ompt_idle) \
@ -40,141 +48,107 @@
macro (ompt_task_wait) \
macro (ompt_mutex_wait)
#define FOREACH_OMPT_STATE(macro) \
#define FOREACH_OMP_STATE(macro) \
\
/* first */ \
macro (ompt_state_first, 0x71) /* initial enumeration state */ \
/* first available state */ \
macro (omp_state_undefined, 0x102) /* undefined thread state */ \
\
/* work states (0..15) */ \
macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \
macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \
macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \
macro (omp_state_work_serial, 0x000) /* working outside parallel */ \
macro (omp_state_work_parallel, 0x001) /* working within parallel */ \
macro (omp_state_work_reduction, 0x002) /* performing a reduction */ \
\
/* idle (16..31) */ \
macro (ompt_state_idle, 0x10) /* waiting for work */ \
/* barrier wait states (16..31) */ \
macro (omp_state_wait_barrier, 0x010) /* waiting at a barrier */ \
macro (omp_state_wait_barrier_implicit_parallel, 0x011) \
/* implicit barrier at the end of parallel region */\
macro (omp_state_wait_barrier_implicit_workshare, 0x012) \
/* implicit barrier at the end of worksharing */ \
macro (omp_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \
macro (omp_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \
\
/* overhead states (32..63) */ \
macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \
/* task wait states (32..63) */ \
macro (omp_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \
macro (omp_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \
\
/* barrier wait states (64..79) */ \
macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \
macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \
macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \
/* mutex wait states (64..127) */ \
macro (omp_state_wait_mutex, 0x040) \
macro (omp_state_wait_lock, 0x041) /* waiting for lock */ \
macro (omp_state_wait_critical, 0x042) /* waiting for critical */ \
macro (omp_state_wait_atomic, 0x043) /* waiting for atomic */ \
macro (omp_state_wait_ordered, 0x044) /* waiting for ordered */ \
\
/* task wait states (80..95) */ \
macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \
macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \
/* target wait states (128..255) */ \
macro (omp_state_wait_target, 0x080) /* waiting for target region */ \
macro (omp_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \
macro (omp_state_wait_target_update, 0x082) /* waiting for target update operation */ \
\
/* mutex wait states (96..111) */ \
macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \
macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \
macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \
macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \
macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \
macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \
/* misc (256..511) */ \
macro (omp_state_idle, 0x100) /* waiting for work */ \
macro (omp_state_overhead, 0x101) /* overhead excluding wait states */ \
\
/* misc (112..127) */ \
macro (ompt_state_undefined, 0x70) /* undefined thread state */
/* implementation-specific states (512..) */
#define FOREACH_OMPT_MUTEX_IMPL(macro) \
macro (ompt_mutex_impl_unknown, 0) /* unknown implementatin */ \
macro (ompt_mutex_impl_spin, 1) /* based on spin */ \
macro (ompt_mutex_impl_queuing, 2) /* based on some fair policy */ \
macro (ompt_mutex_impl_speculative, 3) /* based on HW-supported speculation */
#define FOREACH_OMPT_EVENT(macro) \
\
/*--- Mandatory Events ---*/ \
macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \
macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \
macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \
macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \
\
macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \
macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \
macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \
macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \
\
macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \
macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \
macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \
macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \
macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \
\
macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \
macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \
macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op*/ \
macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit*/ \
\
macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \
macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \
\
macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \
macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \
\
/*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \
macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \
macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \
\
macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \
macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \
macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 14) /* sync region wait begin or end*/ \
\
macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \
macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \
\
macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\
macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \
\
macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \
macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \
macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \
\
macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \
\
macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \
macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 15) /* mutex released */ \
\
/*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \
macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \
macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \
\
macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \
macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \
macro (ompt_callback_task_dependences, ompt_callback_task_dependences_t, 16) /* report task dependences */\
macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 17) /* report task dependence */\
\
macro (ompt_event_task_switch, ompt_task_pair_callback_t, 26) /* task switch */ \
macro (ompt_callback_work, ompt_callback_work_t, 18) /* task at work begin or end*/\
\
macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \
macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \
macro (ompt_callback_master, ompt_callback_master_t, 19) /* task at master begin or end */\
\
macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\
macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \
macro (ompt_callback_target_map, ompt_callback_target_map_t, 20) /* target map */ \
\
macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \
macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \
macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 21) /* sync region begin or end */ \
\
macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \
macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \
macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 22) /* lock init */ \
macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 23) /* lock destroy */ \
\
macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\
macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \
macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 24) /* mutex acquire */ \
macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 25) /* mutex acquired */ \
\
macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \
macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \
macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 26) /* nest lock */ \
\
macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \
macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \
macro (ompt_callback_flush, ompt_callback_flush_t, 27) /* after executing flush */ \
\
macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \
macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \
\
macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\
macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \
\
macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \
\
macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \
macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \
macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \
macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \
macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \
\
macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \
macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \
macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \
macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \
macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \
macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \
\
macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \
macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \
\
macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \
macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \
\
macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ \
\
macro (ompt_event_task_dependences, ompt_task_dependences_callback_t, 69) /* report task dependences */\
macro (ompt_event_task_dependence_pair, ompt_task_pair_callback_t, 70) /* report task dependence pair */
macro (ompt_callback_cancel, ompt_callback_cancel_t, 28) /*cancel innermost binding region*/\
macro (ompt_callback_idle, ompt_callback_idle_t, 29) /* begin or end idle state */\
@ -186,18 +160,20 @@
* identifiers
*---------------------*/
typedef uint64_t ompt_thread_id_t;
#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */
typedef uint64_t ompt_id_t;
#define ompt_id_none 0
typedef uint64_t ompt_task_id_t;
#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */
typedef union ompt_data_u {
uint64_t value; /* data initialized by runtime to unique id */
void *ptr; /* pointer under tool control */
} ompt_data_t;
typedef uint64_t ompt_parallel_id_t;
#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */
static const ompt_data_t ompt_data_none = {0};
typedef uint64_t ompt_wait_id_t;
#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */
static const ompt_wait_id_t ompt_wait_id_none = 0;
typedef void ompt_device_t;
/*---------------------
* ompt_frame_t
@ -235,35 +211,44 @@ typedef struct ompt_task_dependence_s {
*---------------------*/
typedef enum {
#define ompt_state_macro(state, code) state = code,
FOREACH_OMPT_STATE(ompt_state_macro)
#undef ompt_state_macro
} ompt_state_t;
#define omp_state_macro(state, code) state = code,
FOREACH_OMP_STATE(omp_state_macro)
#undef omp_state_macro
} omp_state_t;
/*---------------------
* runtime events
*---------------------*/
typedef enum {
typedef enum ompt_callbacks_e{
#define ompt_event_macro(event, callback, eventid) event = eventid,
FOREACH_OMPT_EVENT(ompt_event_macro)
#undef ompt_event_macro
} ompt_event_t;
} ompt_callbacks_t;
/*---------------------
* set callback results
*---------------------*/
typedef enum {
ompt_set_result_registration_error = 0,
ompt_set_result_event_may_occur_no_callback = 1,
ompt_set_result_event_never_occurs = 2,
ompt_set_result_event_may_occur_callback_some = 3,
ompt_set_result_event_may_occur_callback_always = 4,
typedef enum ompt_set_result_e {
ompt_set_error = 0,
ompt_set_never = 1,
ompt_set_sometimes = 2,
ompt_set_sometimes_paired = 3,
ompt_set_always = 4
} ompt_set_result_t;
/*----------------------
* mutex implementations
*----------------------*/
typedef enum ompt_mutex_impl_e {
#define ompt_mutex_impl_macro(impl, code) impl = code,
FOREACH_OMPT_MUTEX_IMPL(ompt_mutex_impl_macro)
#undef ompt_mutex_impl_macro
} ompt_mutex_impl_t;
/*****************************************************************************
* callback signatures
@ -273,14 +258,10 @@ typedef enum {
typedef void (*ompt_interface_fn_t)(void);
typedef ompt_interface_fn_t (*ompt_function_lookup_t)(
const char * /* entry point to look up */
const char * /* entry point to look up */
);
/* threads */
typedef void (*ompt_thread_callback_t) (
ompt_thread_id_t thread_id /* ID of thread */
);
typedef enum {
ompt_thread_initial = 1, // start the enumeration at 1
ompt_thread_worker = 2,
@ -288,78 +269,262 @@ typedef enum {
} ompt_thread_type_t;
typedef enum {
ompt_invoker_program = 0, /* program invokes master task */
ompt_invoker_runtime = 1 /* runtime invokes master task */
ompt_invoker_program = 1, /* program invokes master task */
ompt_invoker_runtime = 2 /* runtime invokes master task */
} ompt_invoker_t;
typedef void (*ompt_thread_type_callback_t) (
ompt_thread_type_t thread_type, /* type of thread */
ompt_thread_id_t thread_id /* ID of thread */
typedef void (*ompt_callback_thread_begin_t) (
ompt_thread_type_t thread_type, /* type of thread */
ompt_data_t *thread_data /* data of thread */
);
typedef void (*ompt_callback_thread_end_t) (
ompt_data_t *thread_data /* data of thread */
);
typedef void (*ompt_wait_callback_t) (
ompt_wait_id_t wait_id /* wait id */
ompt_wait_id_t wait_id /* wait data */
);
/* parallel and workshares */
typedef void (*ompt_parallel_callback_t) (
ompt_parallel_id_t parallel_id, /* id of parallel region */
ompt_task_id_t task_id /* id of task */
typedef enum ompt_scope_endpoint_e {
ompt_scope_begin = 1,
ompt_scope_end = 2
} ompt_scope_endpoint_t;
/* implicit task */
typedef void (*ompt_callback_implicit_task_t) (
ompt_scope_endpoint_t endpoint, /* endpoint of implicit task */
ompt_data_t *parallel_data, /* data of parallel region */
ompt_data_t *task_data, /* data of implicit task */
unsigned int team_size, /* team size */
unsigned int thread_num /* thread number of calling thread */
);
typedef void (*ompt_new_workshare_callback_t) (
ompt_parallel_id_t parallel_id, /* id of parallel region */
ompt_task_id_t parent_task_id, /* id of parent task */
void *workshare_function /* pointer to outlined function */
typedef void (*ompt_callback_parallel_begin_t) (
ompt_data_t *parent_task_data, /* data of parent task */
const ompt_frame_t *parent_frame, /* frame data of parent task */
ompt_data_t *parallel_data, /* data of parallel region */
unsigned int requested_team_size, /* requested number of threads in team */
ompt_invoker_t invoker, /* invoker of master task */
const void *codeptr_ra /* return address of runtime call */
);
typedef void (*ompt_new_parallel_callback_t) (
ompt_task_id_t parent_task_id, /* id of parent task */
ompt_frame_t *parent_task_frame, /* frame data of parent task */
ompt_parallel_id_t parallel_id, /* id of parallel region */
uint32_t requested_team_size, /* number of threads in team */
void *parallel_function, /* pointer to outlined function */
ompt_invoker_t invoker /* who invokes master task? */
);
typedef void (*ompt_end_parallel_callback_t) (
ompt_parallel_id_t parallel_id, /* id of parallel region */
ompt_task_id_t task_id, /* id of task */
ompt_invoker_t invoker /* who invokes master task? */
typedef void (*ompt_callback_parallel_end_t) (
ompt_data_t *parallel_data, /* data of parallel region */
ompt_data_t *task_data, /* data of task */
ompt_invoker_t invoker, /* invoker of master task */
const void *codeptr_ra /* return address of runtime call */
);
/* tasks */
typedef void (*ompt_task_callback_t) (
ompt_task_id_t task_id /* id of task */
typedef enum ompt_task_type_e {
ompt_task_initial = 0x1,
ompt_task_implicit = 0x2,
ompt_task_explicit = 0x4,
ompt_task_target = 0x8,
ompt_task_undeferred = 0x8000000,
ompt_task_untied = 0x10000000,
ompt_task_final = 0x20000000,
ompt_task_mergeable = 0x40000000,
ompt_task_merged = 0x80000000
} ompt_task_type_t;
typedef enum ompt_task_status_e {
ompt_task_complete = 1,
ompt_task_yield = 2,
ompt_task_cancel = 3,
ompt_task_others = 4
} ompt_task_status_t;
typedef void (*ompt_callback_task_schedule_t) (
ompt_data_t *prior_task_data, /* data of prior task */
ompt_task_status_t prior_task_status, /* status of prior task */
ompt_data_t *next_task_data /* data of next task */
);
typedef void (*ompt_task_pair_callback_t) (
ompt_task_id_t first_task_id,
ompt_task_id_t second_task_id
);
typedef void (*ompt_new_task_callback_t) (
ompt_task_id_t parent_task_id, /* id of parent task */
ompt_frame_t *parent_task_frame, /* frame data for parent task */
ompt_task_id_t new_task_id, /* id of created task */
void *task_function /* pointer to outlined function */
typedef void (*ompt_callback_task_create_t) (
ompt_data_t *parent_task_data, /* data of parent task */
const ompt_frame_t *parent_frame, /* frame data for parent task */
ompt_data_t *new_task_data, /* data of created task */
int type, /* type of created task */
int has_dependences, /* created task has dependences */
const void *codeptr_ra /* return address of runtime call */
);
/* task dependences */
typedef void (*ompt_task_dependences_callback_t) (
ompt_task_id_t task_id, /* ID of task with dependences */
const ompt_task_dependence_t *deps,/* vector of task dependences */
int ndeps /* number of dependences */
typedef void (*ompt_callback_task_dependences_t) (
ompt_data_t *task_data, /* data of task */
const ompt_task_dependence_t *deps, /* dependences of task */
int ndeps /* dependences count of task */
);
/* program */
typedef void (*ompt_control_callback_t) (
uint64_t command, /* command of control call */
uint64_t modifier /* modifier of control call */
typedef void (*ompt_callback_task_dependence_t) (
ompt_data_t *src_task_data, /* data of source task */
ompt_data_t *sink_task_data /* data of sink task */
);
typedef void (*ompt_callback_t)(void);
/* target and device */
typedef enum ompt_target_type_e {
ompt_target = 1,
ompt_target_enter_data = 2,
ompt_target_exit_data = 3,
ompt_target_update = 4
} ompt_target_type_t;
typedef void (*ompt_callback_target_t) (
ompt_target_type_t kind,
ompt_scope_endpoint_t endpoint,
uint64_t device_num,
ompt_data_t *task_data,
ompt_id_t target_id,
const void *codeptr_ra
);
typedef enum ompt_target_data_op_e {
ompt_target_data_alloc = 1,
ompt_target_data_transfer_to_dev = 2,
ompt_target_data_transfer_from_dev = 3,
ompt_target_data_delete = 4
} ompt_target_data_op_t;
typedef void (*ompt_callback_target_data_op_t) (
ompt_id_t target_id,
ompt_id_t host_op_id,
ompt_target_data_op_t optype,
void *host_addr,
void *device_addr,
size_t bytes
);
typedef void (*ompt_callback_target_submit_t) (
ompt_id_t target_id,
ompt_id_t host_op_id
);
typedef void (*ompt_callback_target_map_t) (
ompt_id_t target_id,
unsigned int nitems,
void **host_addr,
void **device_addr,
size_t *bytes,
unsigned int *mapping_flags
);
typedef void (*ompt_callback_device_initialize_t) (
uint64_t device_num,
const char *type,
ompt_device_t *device,
ompt_function_lookup_t lookup,
const char *documentation
);
typedef void (*ompt_callback_device_finalize_t) (
uint64_t device_num
);
/* control_tool */
typedef int (*ompt_callback_control_tool_t) (
uint64_t command, /* command of control call */
uint64_t modifier, /* modifier of control call */
void *arg, /* argument of control call */
const void *codeptr_ra /* return address of runtime call */
);
typedef enum ompt_mutex_kind_e {
ompt_mutex = 0x10,
ompt_mutex_lock = 0x11,
ompt_mutex_nest_lock = 0x12,
ompt_mutex_critical = 0x13,
ompt_mutex_atomic = 0x14,
ompt_mutex_ordered = 0x20
} ompt_mutex_kind_t;
typedef void (*ompt_callback_mutex_acquire_t) (
ompt_mutex_kind_t kind, /* mutex kind */
unsigned int hint, /* mutex hint */
unsigned int impl, /* mutex implementation */
ompt_wait_id_t wait_id, /* id of object being awaited */
const void *codeptr_ra /* return address of runtime call */
);
typedef void (*ompt_callback_mutex_t) (
ompt_mutex_kind_t kind, /* mutex kind */
ompt_wait_id_t wait_id, /* id of object being awaited */
const void *codeptr_ra /* return address of runtime call */
);
typedef void (*ompt_callback_nest_lock_t) (
ompt_scope_endpoint_t endpoint, /* endpoint of nested lock */
ompt_wait_id_t wait_id, /* id of object being awaited */
const void *codeptr_ra /* return address of runtime call */
);
typedef void (*ompt_callback_master_t) (
ompt_scope_endpoint_t endpoint, /* endpoint of master region */
ompt_data_t *parallel_data, /* data of parallel region */
ompt_data_t *task_data, /* data of task */
const void *codeptr_ra /* return address of runtime call */
);
typedef void (*ompt_callback_idle_t) (
ompt_scope_endpoint_t endpoint /* endpoint of idle time */
);
typedef enum ompt_work_type_e {
ompt_work_loop = 1,
ompt_work_sections = 2,
ompt_work_single_executor = 3,
ompt_work_single_other = 4,
ompt_work_workshare = 5,
ompt_work_distribute = 6,
ompt_work_taskloop = 7
} ompt_work_type_t;
typedef void (*ompt_callback_work_t) (
ompt_work_type_t wstype, /* type of work region */
ompt_scope_endpoint_t endpoint, /* endpoint of work region */
ompt_data_t *parallel_data, /* data of parallel region */
ompt_data_t *task_data, /* data of task */
uint64_t count, /* quantity of work */
const void *codeptr_ra /* return address of runtime call */
);
typedef enum ompt_sync_region_kind_e {
ompt_sync_region_barrier = 1,
ompt_sync_region_taskwait = 2,
ompt_sync_region_taskgroup = 3
} ompt_sync_region_kind_t;
typedef void (*ompt_callback_sync_region_t) (
ompt_sync_region_kind_t kind, /* kind of sync region */
ompt_scope_endpoint_t endpoint, /* endpoint of sync region */
ompt_data_t *parallel_data, /* data of parallel region */
ompt_data_t *task_data, /* data of task */
const void *codeptr_ra /* return address of runtime call */
);
typedef enum ompt_cancel_flag_e {
ompt_cancel_parallel = 0x1,
ompt_cancel_sections = 0x2,
ompt_cancel_do = 0x4,
ompt_cancel_taskgroup = 0x8,
ompt_cancel_activated = 0x10,
ompt_cancel_detected = 0x20,
ompt_cancel_discarded_task = 0x40
} ompt_cancel_flag_t;
typedef void (*ompt_callback_cancel_t) (
ompt_data_t *task_data, /* data of task */
int flags, /* cancel flags */
const void *codeptr_ra /* return address of runtime call */
);
typedef void (*ompt_callback_flush_t) (
ompt_data_t *thread_data, /* data of thread */
const void *codeptr_ra /* return address of runtime call */
);
/****************************************************************************
* ompt API
@ -381,33 +546,48 @@ extern "C" {
***************************************************************************/
/* state */
OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, (
ompt_wait_id_t *ompt_wait_id
OMPT_API_FUNCTION(omp_state_t, ompt_get_state, (
ompt_wait_id_t *wait_id
));
/* thread */
OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void));
OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void));
OMPT_API_FUNCTION(ompt_data_t*, ompt_get_thread_data, (void));
/* parallel region */
OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, (
int ancestor_level
));
OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, (
int ancestor_level
OMPT_API_FUNCTION(int, ompt_get_parallel_info, (
int ancestor_level,
ompt_data_t **parallel_data,
int *team_size
));
/* task */
OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, (
int depth
OMPT_API_FUNCTION(int, ompt_get_task_info, (
int ancestor_level,
int *type,
ompt_data_t **task_data,
ompt_frame_t **task_frame,
ompt_data_t **parallel_data,
int *thread_num
));
OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, (
int depth
/* places */
OMPT_API_FUNCTION(int, ompt_get_num_places, (void));
OMPT_API_FUNCTION(int, ompt_get_place_proc_ids, (
int place_num,
int ids_size,
int *ids
));
OMPT_API_FUNCTION(int, ompt_get_place_num, (void));
OMPT_API_FUNCTION(int, ompt_get_partition_place_nums, (
int place_nums_size,
int *place_nums
));
/* proc_id */
OMPT_API_FUNCTION(int, ompt_get_proc_id, (void));
/****************************************************************************
@ -445,25 +625,35 @@ OMPT_API_FUNCTION(void, ompt_mutex_wait, (
* INITIALIZATION FUNCTIONS
***************************************************************************/
OMPT_API_FUNCTION(void, ompt_initialize, (
typedef struct ompt_fns_t ompt_fns_t;
OMPT_API_FUNCTION(int, ompt_initialize, (
ompt_function_lookup_t ompt_fn_lookup,
const char *runtime_version,
unsigned int ompt_version
ompt_fns_t *fns
));
OMPT_API_FUNCTION(void, ompt_finalize, (
ompt_fns_t *fns
));
struct ompt_fns_t {
ompt_initialize_t initialize;
ompt_finalize_t finalize;
};
/* initialization interface to be defined by tool */
ompt_initialize_t ompt_tool(void);
#ifdef _WIN32
__declspec(dllexport)
#endif
ompt_fns_t * ompt_start_tool(
unsigned int omp_version,
const char * runtime_version
);
typedef enum opt_init_mode_e {
ompt_init_mode_never = 0,
ompt_init_mode_false = 1,
ompt_init_mode_true = 2,
ompt_init_mode_always = 3
} ompt_init_mode_t;
typedef void (*ompt_callback_t)(void);
OMPT_API_FUNCTION(int, ompt_set_callback, (
ompt_event_t event,
ompt_callbacks_t which,
ompt_callback_t callback
));
@ -477,7 +667,7 @@ typedef enum ompt_set_callback_rc_e { /* non-standard */
OMPT_API_FUNCTION(int, ompt_get_callback, (
ompt_event_t event,
ompt_callbacks_t which,
ompt_callback_t *callback
));
@ -487,29 +677,37 @@ OMPT_API_FUNCTION(int, ompt_get_callback, (
* MISCELLANEOUS FUNCTIONS
***************************************************************************/
/* control */
// FIXME: remove workaround for clang
#if !defined(__clang__) && defined(_OPENMP) && (_OPENMP >= 201307)
#pragma omp declare target
#endif
void ompt_control(
uint64_t command,
uint64_t modifier
);
#if !defined(__clang__) && defined(_OPENMP) && (_OPENMP >= 201307)
#pragma omp end declare target
#endif
/* state enumeration */
OMPT_API_FUNCTION(int, ompt_enumerate_state, (
OMPT_API_FUNCTION(int, ompt_enumerate_states, (
int current_state,
int *next_state,
const char **next_state_name
));
/* mutex implementation enumeration */
OMPT_API_FUNCTION(int, ompt_enumerate_mutex_impls, (
int current_impl,
int *next_impl,
const char **next_impl_name
));
/* get_unique_id */
OMPT_API_FUNCTION(uint64_t, ompt_get_unique_id, (void));
#ifdef __cplusplus
};
#endif
#endif
/****************************************************************************
* TARGET
***************************************************************************/
OMPT_API_FUNCTION(int, ompt_get_target_info, (
uint64_t *device_num,
ompt_id_t *target_id,
ompt_id_t *host_op_id
));
OMPT_API_FUNCTION(int, ompt_get_num_devices, (void));
#endif /* __OMPT__ */

View File

@ -200,6 +200,10 @@ Values for bit flags used in the ident_t to describe the fields.
#define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140
#define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0
#define KMP_IDENT_WORK_LOOP 0x200 // static loop
#define KMP_IDENT_WORK_SECTIONS 0x400 // sections
#define KMP_IDENT_WORK_DISTRIBUTE 0x800 // distribute
/*!
* The ident structure that describes a source location.
*/
@ -798,6 +802,10 @@ extern kmp_hws_item_t __kmp_hws_proc;
extern int __kmp_hws_requested;
extern int __kmp_hws_abs_flag; // absolute or per-item number requested
#if OMP_50_ENABLED && LIBOMP_OMPT_SUPPORT
extern char const *__kmp_tool_libraries;
#endif // OMP_50_ENABLED && LIBOMP_OMPT_SUPPORT
/* ------------------------------------------------------------------------ */
#define KMP_PAD(type, sz) \
@ -3314,7 +3322,7 @@ extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
extern kmp_team_t *
__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id,
ompt_data_t ompt_parallel_data,
#endif
kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
@ -3322,7 +3330,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
extern kmp_team_t *
__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id,
ompt_id_t ompt_parallel_id,
#endif
kmp_internal_control_t *new_icvs,
int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
@ -3362,9 +3370,6 @@ enum fork_context_e {
};
extern int __kmp_fork_call(ident_t *loc, int gtid,
enum fork_context_e fork_context, kmp_int32 argc,
#if OMPT_SUPPORT
void *unwrapped_task,
#endif
microtask_t microtask, launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX

View File

@ -361,19 +361,20 @@ typedef kmp_queuing_lock_t kmp_atomic_lock_t;
static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck,
kmp_int32 gtid) {
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) {
ompt_callbacks.ompt_callback(ompt_event_wait_atomic)((ompt_wait_id_t)lck);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_atomic, 0, ompt_mutex_impl_queuing, (ompt_wait_id_t)lck,
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
__kmp_acquire_queuing_lock(lck, gtid);
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled &&
ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) {
ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)(
(ompt_wait_id_t)lck);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
}
@ -386,10 +387,10 @@ static inline int __kmp_test_atomic_lock(kmp_atomic_lock_t *lck,
static inline void __kmp_release_atomic_lock(kmp_atomic_lock_t *lck,
kmp_int32 gtid) {
__kmp_release_queuing_lock(lck, gtid);
#if OMPT_SUPPORT && OMPT_BLAME
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_atomic)) {
ompt_callbacks.ompt_callback(ompt_event_release_atomic)(
(ompt_wait_id_t)lck);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
}

View File

@ -16,6 +16,9 @@
#include "kmp_itt.h"
#include "kmp_os.h"
#include "kmp_stats.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
#if KMP_MIC
#include <immintrin.h>
@ -1224,8 +1227,9 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
int status = 0;
ident_t *loc = __kmp_threads[gtid]->th.th_ident;
#if OMPT_SUPPORT
ompt_task_id_t my_task_id;
ompt_parallel_id_t my_parallel_id;
ompt_data_t *my_task_data;
ompt_data_t *my_parallel_data;
void *return_address;
#endif
KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
@ -1233,28 +1237,26 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
#if OMPT_SUPPORT
if (ompt_enabled) {
#if OMPT_BLAME
my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
my_parallel_id = team->t.ompt_team_info.parallel_id;
#if OMPT_TRACE
if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
my_parallel_id, my_task_id);
}
if (ompt_enabled.enabled) {
#if OMPT_OPTIONAL
my_task_data = OMPT_CUR_TASK_DATA(this_thr);
my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
my_task_data, return_address);
}
#endif
if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(my_parallel_id,
my_task_id);
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
my_task_data, return_address);
}
#endif
// It is OK to report the barrier state after the barrier begin callback.
// According to the OMPT specification, a compliant implementation may
// even delay reporting this state until the barrier begins to wait.
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
this_thr->th.ompt_thread_info.state = omp_state_wait_barrier;
}
#endif
@ -1489,14 +1491,20 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
__kmp_tid_from_gtid(gtid), status));
#if OMPT_SUPPORT
if (ompt_enabled) {
#if OMPT_BLAME
if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
ompt_callbacks.ompt_callback(ompt_event_barrier_end)(my_parallel_id,
my_task_id);
if (ompt_enabled.enabled) {
#if OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
my_task_data, return_address);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
my_task_data, return_address);
}
#endif
this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
}
#endif
ANNOTATE_BARRIER_END(&team->t.t_bar);
@ -1593,14 +1601,31 @@ void __kmp_join_barrier(int gtid) {
ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
#if OMPT_SUPPORT
#if OMPT_TRACE
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
team->t.ompt_team_info.parallel_id,
team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
}
ompt_data_t *my_task_data;
ompt_data_t *my_parallel_data;
if (ompt_enabled.enabled) {
#if OMPT_OPTIONAL
void *codeptr = NULL;
int ds_tid = this_thr->th.th_info.ds.ds_tid;
if (KMP_MASTER_TID(ds_tid) &&
(ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
codeptr = team->t.ompt_team_info.master_return_address;
my_task_data = OMPT_CUR_TASK_DATA(this_thr);
my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
my_task_data, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
my_task_data, codeptr);
}
#endif
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit;
}
#endif
if (__kmp_tasking_mode == tskm_extra_barrier) {
@ -1758,20 +1783,6 @@ void __kmp_join_barrier(int gtid) {
KA_TRACE(10,
("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
#if OMPT_SUPPORT
if (ompt_enabled) {
#if OMPT_BLAME
if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
team->t.ompt_team_info.parallel_id,
team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
}
#endif
// return to default state
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
ANNOTATE_BARRIER_END(&team->t.t_bar);
}
@ -1869,6 +1880,39 @@ void __kmp_fork_barrier(int gtid, int tid) {
}
}
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
if (this_thr->th.ompt_thread_info.state ==
omp_state_wait_barrier_implicit) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
ompt_data_t *tId = (team) ? OMPT_CUR_TASK_DATA(this_thr)
: &(this_thr->th.ompt_thread_info.task_data);
this_thr->th.ompt_thread_info.state = omp_state_overhead;
#if OMPT_OPTIONAL
void *codeptr = NULL;
if (KMP_MASTER_TID(ds_tid) &&
(ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
codeptr = team->t.ompt_team_info.master_return_address;
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, tId, 0, ds_tid);
}
// return to idle state
this_thr->th.ompt_thread_info.state = omp_state_overhead;
}
}
#endif
// Early exit for reaping threads releasing forkjoin barrier
if (TCR_4(__kmp_global.g.g_done)) {
this_thr->th.th_task_team = NULL;

View File

@ -12,6 +12,9 @@
#include "kmp_i18n.h"
#include "kmp_io.h"
#include "kmp_str.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
#if OMP_40_ENABLED
@ -51,11 +54,25 @@ kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(
&(this_team->t.t_cancel_request), cancel_noreq, cncl_kind);
if (old == cancel_noreq || old == cncl_kind) {
// printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n",
// this_team->t.t_cancel_request,
// &(this_team->t.t_cancel_request));
// we do not have a cancellation request in this team or we do have
// one that matches the current request -> cancel
// we do not have a cancellation request in this team or we do have
// one that matches the current request -> cancel
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_cancel) {
ompt_data_t *task_data;
__ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL,
NULL);
ompt_cancel_flag_t type = ompt_cancel_parallel;
if (cncl_kind == cancel_parallel)
type = ompt_cancel_parallel;
else if (cncl_kind == cancel_loop)
type = ompt_cancel_do;
else if (cncl_kind == cancel_sections)
type = ompt_cancel_sections;
ompt_callbacks.ompt_callback(ompt_callback_cancel)(
task_data, type | ompt_cancel_activated,
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
return 1 /* true */;
}
break;
@ -75,8 +92,18 @@ kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(
&(taskgroup->cancel_request), cancel_noreq, cncl_kind);
if (old == cancel_noreq || old == cncl_kind) {
// we do not have a cancellation request in this taskgroup or we do
// have one that matches the current request -> cancel
// we do not have a cancellation request in this taskgroup or we do
// have one that matches the current request -> cancel
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_cancel) {
ompt_data_t *task_data;
__ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL,
NULL);
ompt_callbacks.ompt_callback(ompt_callback_cancel)(
task_data, ompt_cancel_taskgroup | ompt_cancel_activated,
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
return 1 /* true */;
}
} else {
@ -134,8 +161,25 @@ kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
KMP_DEBUG_ASSERT(this_team);
if (this_team->t.t_cancel_request) {
if (cncl_kind == this_team->t.t_cancel_request) {
// the request in the team structure matches the type of
// cancellation point so we can cancel
// the request in the team structure matches the type of
// cancellation point so we can cancel
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_cancel) {
ompt_data_t *task_data;
__ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL,
NULL);
ompt_cancel_flag_t type = ompt_cancel_parallel;
if (cncl_kind == cancel_parallel)
type = ompt_cancel_parallel;
else if (cncl_kind == cancel_loop)
type = ompt_cancel_do;
else if (cncl_kind == cancel_sections)
type = ompt_cancel_sections;
ompt_callbacks.ompt_callback(ompt_callback_cancel)(
task_data, type | ompt_cancel_detected,
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
return 1 /* true */;
}
KMP_ASSERT(0 /* false */);
@ -158,7 +202,18 @@ kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
taskgroup = task->td_taskgroup;
if (taskgroup) {
// return the current status of cancellation for the taskgroup
// return the current status of cancellation for the taskgroup
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_cancel &&
!!taskgroup->cancel_request) {
ompt_data_t *task_data;
__ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL,
NULL);
ompt_callbacks.ompt_callback(ompt_callback_cancel)(
task_data, ompt_cancel_taskgroup | ompt_cancel_detected,
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
return !!taskgroup->cancel_request;
} else {
// if a cancellation point is encountered by a task that does not

View File

@ -45,10 +45,8 @@
#define OMPT_DEBUG LIBOMP_OMPT_DEBUG
#cmakedefine01 LIBOMP_OMPT_SUPPORT
#define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT
#cmakedefine01 LIBOMP_OMPT_BLAME
#define OMPT_BLAME LIBOMP_OMPT_BLAME
#cmakedefine01 LIBOMP_OMPT_TRACE
#define OMPT_TRACE LIBOMP_OMPT_TRACE
#cmakedefine01 LIBOMP_OMPT_OPTIONAL
#define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL
#cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS
#define KMP_USE_ADAPTIVE_LOCKS LIBOMP_USE_ADAPTIVE_LOCKS
#define KMP_DEBUG_ADAPTIVE_LOCKS 0

File diff suppressed because it is too large Load Diff

View File

@ -36,7 +36,6 @@
#endif
#if OMPT_SUPPORT
#include "ompt-internal.h"
#include "ompt-specific.h"
#endif
@ -1230,12 +1229,14 @@ __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
}
#endif // ( KMP_STATIC_STEAL_ENABLED )
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
team_info->parallel_id, task_info->task_id, team_info->microtask);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
kmp_info_t *thr = __kmp_threads[gtid];
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
}
#endif
}
@ -1390,16 +1391,18 @@ static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
/* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more
work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini()
is not called. */
#if OMPT_SUPPORT && OMPT_TRACE
#if OMPT_SUPPORT && OMPT_OPTIONAL
#define OMPT_LOOP_END \
if (status == 0) { \
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \
if (ompt_enabled.ompt_callback_work) { \
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \
ompt_callbacks.ompt_callback(ompt_event_loop_end)( \
team_info->parallel_id, task_info->task_id); \
ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
ompt_callbacks.ompt_callback(ompt_callback_work)( \
ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \
&(task_info->task_data), 0, codeptr); \
} \
}
// TODO: implement count
#else
#define OMPT_LOOP_END // no-op
#endif
@ -1407,7 +1410,12 @@ static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
template <typename T>
static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
T *p_lb, T *p_ub,
typename traits_t<T>::signed_t *p_st) {
typename traits_t<T>::signed_t *p_st
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
void *codeptr
#endif
) {
typedef typename traits_t<T>::unsigned_t UT;
typedef typename traits_t<T>::signed_t ST;
@ -2527,6 +2535,9 @@ void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
enum sched_type schedule, kmp_int32 lb,
kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
}
/*!
@ -2536,6 +2547,9 @@ void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
enum sched_type schedule, kmp_uint32 lb,
kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
}
@ -2546,6 +2560,9 @@ void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
enum sched_type schedule, kmp_int64 lb,
kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
}
@ -2556,6 +2573,9 @@ void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
enum sched_type schedule, kmp_uint64 lb,
kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
}
@ -2573,6 +2593,9 @@ void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
kmp_int32 chunk) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
__kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
}
@ -2582,6 +2605,9 @@ void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
kmp_int32 chunk) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
__kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
}
@ -2591,6 +2617,9 @@ void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
kmp_int64 chunk) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
__kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
}
@ -2600,6 +2629,9 @@ void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
kmp_int64 chunk) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
__kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
}
@ -2619,7 +2651,15 @@ If there is no more work, then the lb,ub and stride need not be modified.
*/
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
OMPT_LOAD_RETURN_ADDRESS(gtid)
#endif
);
}
/*!
@ -2628,7 +2668,15 @@ See @ref __kmpc_dispatch_next_4
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
kmp_uint32 *p_lb, kmp_uint32 *p_ub,
kmp_int32 *p_st) {
return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
OMPT_LOAD_RETURN_ADDRESS(gtid)
#endif
);
}
/*!
@ -2636,7 +2684,15 @@ See @ref __kmpc_dispatch_next_4
*/
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
OMPT_LOAD_RETURN_ADDRESS(gtid)
#endif
);
}
/*!
@ -2645,7 +2701,15 @@ See @ref __kmpc_dispatch_next_4
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
kmp_uint64 *p_lb, kmp_uint64 *p_ub,
kmp_int64 *p_st) {
return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st);
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
OMPT_LOAD_RETURN_ADDRESS(gtid)
#endif
);
}
/*!

View File

@ -21,6 +21,10 @@
#include "kmp_i18n.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
@ -340,6 +344,26 @@ int FTN_STDCALL xexpand(FTN_GET_MAX_THREADS)(void) {
#endif
}
#if OMP_50_ENABLED
int FTN_STDCALL FTN_CONTROL_TOOL(uint64_t command, uint64_t modifier,
void *arg) {
#if defined(KMP_STUB) || !OMPT_SUPPORT
return -2;
#else
OMPT_STORE_RETURN_ADDRESS(__kmp_entry_gtid());
if (!TCR_4(__kmp_init_middle)) {
return -2;
}
kmp_info_t *this_thr = __kmp_threads[__kmp_entry_gtid()];
ompt_task_info_t *parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
int ret = __kmp_control_tool(command, modifier, arg);
parent_task_info->frame.reenter_runtime_frame = 0;
return ret;
#endif
}
#endif
int FTN_STDCALL xexpand(FTN_GET_THREAD_NUM)(void) {
#ifdef KMP_STUB
return 0;
@ -873,8 +897,11 @@ void FTN_STDCALL FTN_INIT_LOCK_WITH_HINT(void **user_lock,
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNLOCKED;
#else
__kmpc_init_lock_with_hint(NULL, __kmp_entry_gtid(), user_lock,
KMP_DEREF hint);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_init_lock_with_hint(NULL, gtid, user_lock, KMP_DEREF hint);
#endif
}
@ -883,8 +910,11 @@ void FTN_STDCALL FTN_INIT_NEST_LOCK_WITH_HINT(void **user_lock,
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNLOCKED;
#else
__kmpc_init_nest_lock_with_hint(NULL, __kmp_entry_gtid(), user_lock,
KMP_DEREF hint);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_init_nest_lock_with_hint(NULL, gtid, user_lock, KMP_DEREF hint);
#endif
}
#endif
@ -894,7 +924,11 @@ void FTN_STDCALL xexpand(FTN_INIT_LOCK)(void **user_lock) {
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNLOCKED;
#else
__kmpc_init_lock(NULL, __kmp_entry_gtid(), user_lock);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_init_lock(NULL, gtid, user_lock);
#endif
}
@ -903,7 +937,11 @@ void FTN_STDCALL xexpand(FTN_INIT_NEST_LOCK)(void **user_lock) {
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNLOCKED;
#else
__kmpc_init_nest_lock(NULL, __kmp_entry_gtid(), user_lock);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_init_nest_lock(NULL, gtid, user_lock);
#endif
}
@ -911,7 +949,11 @@ void FTN_STDCALL xexpand(FTN_DESTROY_LOCK)(void **user_lock) {
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNINIT;
#else
__kmpc_destroy_lock(NULL, __kmp_entry_gtid(), user_lock);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_destroy_lock(NULL, gtid, user_lock);
#endif
}
@ -919,7 +961,11 @@ void FTN_STDCALL xexpand(FTN_DESTROY_NEST_LOCK)(void **user_lock) {
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNINIT;
#else
__kmpc_destroy_nest_lock(NULL, __kmp_entry_gtid(), user_lock);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_destroy_nest_lock(NULL, gtid, user_lock);
#endif
}
@ -933,7 +979,11 @@ void FTN_STDCALL xexpand(FTN_SET_LOCK)(void **user_lock) {
}
*((kmp_stub_lock_t *)user_lock) = LOCKED;
#else
__kmpc_set_lock(NULL, __kmp_entry_gtid(), user_lock);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_set_lock(NULL, gtid, user_lock);
#endif
}
@ -944,7 +994,11 @@ void FTN_STDCALL xexpand(FTN_SET_NEST_LOCK)(void **user_lock) {
}
(*((int *)user_lock))++;
#else
__kmpc_set_nest_lock(NULL, __kmp_entry_gtid(), user_lock);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_set_nest_lock(NULL, gtid, user_lock);
#endif
}
@ -958,7 +1012,11 @@ void FTN_STDCALL xexpand(FTN_UNSET_LOCK)(void **user_lock) {
}
*((kmp_stub_lock_t *)user_lock) = UNLOCKED;
#else
__kmpc_unset_lock(NULL, __kmp_entry_gtid(), user_lock);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_unset_lock(NULL, gtid, user_lock);
#endif
}
@ -972,7 +1030,11 @@ void FTN_STDCALL xexpand(FTN_UNSET_NEST_LOCK)(void **user_lock) {
}
(*((int *)user_lock))--;
#else
__kmpc_unset_nest_lock(NULL, __kmp_entry_gtid(), user_lock);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_unset_nest_lock(NULL, gtid, user_lock);
#endif
}
@ -987,7 +1049,11 @@ int FTN_STDCALL xexpand(FTN_TEST_LOCK)(void **user_lock) {
*((kmp_stub_lock_t *)user_lock) = LOCKED;
return 1;
#else
return __kmpc_test_lock(NULL, __kmp_entry_gtid(), user_lock);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
return __kmpc_test_lock(NULL, gtid, user_lock);
#endif
}
@ -998,7 +1064,11 @@ int FTN_STDCALL xexpand(FTN_TEST_NEST_LOCK)(void **user_lock) {
}
return ++(*((int *)user_lock));
#else
return __kmpc_test_nest_lock(NULL, __kmp_entry_gtid(), user_lock);
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
return __kmpc_test_nest_lock(NULL, gtid, user_lock);
#endif
}

View File

@ -133,6 +133,10 @@
#endif
#endif
#if OMP_50_ENABLED
#define FTN_CONTROL_TOOL omp_control_tool
#endif
#endif /* KMP_FTN_PLAIN */
/* ------------------------------------------------------------------------ */
@ -251,6 +255,10 @@
#endif
#endif
#if OMP_50_ENABLED
#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL
#endif
#endif /* KMP_FTN_APPEND */
/* ------------------------------------------------------------------------ */
@ -369,6 +377,10 @@
#endif
#endif
#if OMP_50_ENABLED
#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL
#endif
#endif /* KMP_FTN_UPPER */
/* ------------------------------------------------------------------------ */
@ -487,6 +499,10 @@
#endif
#endif
#if OMP_50_ENABLED
#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL_
#endif
#endif /* KMP_FTN_UAPPEND */
/* -------------------------- GOMP API NAMES ------------------------ */

View File

@ -303,6 +303,10 @@ kmp_int32 __kmp_max_task_priority = 0;
kmp_uint64 __kmp_taskloop_min_tasks = 0;
#endif
#if OMP_50_ENABLED && OMPT_SUPPORT
char const *__kmp_tool_libraries = NULL;
#endif
/* This check ensures that the compiler is passing the correct data type for the
flags formal parameter of the function kmpc_omp_task_alloc(). If the type is
not a 4-byte type, then give an error message about a non-positive length

View File

@ -31,14 +31,20 @@ void xexpand(KMP_API_NAME_GOMP_BARRIER)(void) {
int gtid = __kmp_entry_gtid();
MKLOC(loc, "GOMP_barrier");
KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
#if OMPT_SUPPORT && OMPT_TRACE
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_frame_t *ompt_frame;
if (ompt_enabled) {
ompt_frame = __ompt_get_task_frame_internal(0);
ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
__kmpc_barrier(&loc, gtid);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->reenter_runtime_frame = NULL;
}
#endif
}
// Mutual exclusion
@ -56,6 +62,9 @@ void xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void) {
int gtid = __kmp_entry_gtid();
MKLOC(loc, "GOMP_critical_start");
KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid));
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr);
}
@ -63,6 +72,9 @@ void xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void) {
int gtid = __kmp_get_gtid();
MKLOC(loc, "GOMP_critical_end");
KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid));
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr);
}
@ -111,7 +123,40 @@ int xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void) {
// 3rd parameter == FALSE prevents kmp_enter_single from pushing a
// workshare when USE_CHECKS is defined. We need to avoid the push,
// as there is no corresponding GOMP_single_end() call.
return __kmp_enter_single(gtid, &loc, FALSE);
kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE);
#if OMPT_SUPPORT && OMPT_OPTIONAL
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = this_thr->th.th_team;
int tid = __kmp_tid_from_gtid(gtid);
if (ompt_enabled.enabled) {
if (rc) {
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_single_executor, ompt_scope_begin,
&(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1, OMPT_GET_RETURN_ADDRESS(0));
}
} else {
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_single_other, ompt_scope_begin,
&(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1, OMPT_GET_RETURN_ADDRESS(0));
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_single_other, ompt_scope_end,
&(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1, OMPT_GET_RETURN_ADDRESS(0));
}
}
}
#endif
return rc;
}
void *xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {
@ -129,14 +174,33 @@ void *xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {
if (__kmp_enter_single(gtid, &loc, FALSE))
return NULL;
// Wait for the first thread to set the copyprivate data pointer,
// and for all other threads to reach this point.
// Wait for the first thread to set the copyprivate data pointer,
// and for all other threads to reach this point.
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
// Retrieve the value of the copyprivate data point, and wait for all
// threads to do likewise, then return.
retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data;
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->reenter_runtime_frame = NULL;
}
#endif
return retval;
}
@ -149,14 +213,35 @@ void xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) {
// continuing, so that the know that the copyprivate data pointer has been
// propagated to all threads before trying to reuse the t_copypriv_data field.
__kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->reenter_runtime_frame = NULL;
}
#endif
}
void xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) {
int gtid = __kmp_entry_gtid();
MKLOC(loc, "GOMP_ordered_start");
KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_ordered(&loc, gtid);
}
@ -164,6 +249,9 @@ void xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void) {
int gtid = __kmp_get_gtid();
MKLOC(loc, "GOMP_ordered_end");
KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_end_ordered(&loc, gtid);
}
@ -197,26 +285,26 @@ static
#if OMPT_SUPPORT
kmp_info_t *thr;
ompt_frame_t *ompt_frame;
ompt_state_t enclosing_state;
omp_state_t enclosing_state;
if (ompt_enabled) {
if (ompt_enabled.enabled) {
// get pointer to thread data structure
thr = __kmp_threads[*gtid];
// save enclosing task state; set current state for task
enclosing_state = thr->th.ompt_thread_info.state;
thr->th.ompt_thread_info.state = ompt_state_work_parallel;
thr->th.ompt_thread_info.state = omp_state_work_parallel;
// set task frame
ompt_frame = __ompt_get_task_frame_internal(0);
ompt_frame->exit_runtime_frame = __builtin_frame_address(0);
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
ompt_frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
task(data);
#if OMPT_SUPPORT
if (ompt_enabled) {
if (ompt_enabled.enabled) {
// clear task frame
ompt_frame->exit_runtime_frame = NULL;
@ -236,24 +324,29 @@ static
enum sched_type schedule, long start,
long end, long incr,
long chunk_size) {
// Intialize the loop worksharing construct.
// Intialize the loop worksharing construct.
#if OMPT_SUPPORT
if (ompt_enabled.enabled)
OMPT_STORE_RETURN_ADDRESS(*gtid);
#endif
KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,
schedule != kmp_sch_static);
#if OMPT_SUPPORT
kmp_info_t *thr;
ompt_frame_t *ompt_frame;
ompt_state_t enclosing_state;
omp_state_t enclosing_state;
if (ompt_enabled) {
if (ompt_enabled.enabled) {
thr = __kmp_threads[*gtid];
// save enclosing task state; set current state for task
enclosing_state = thr->th.ompt_thread_info.state;
thr->th.ompt_thread_info.state = ompt_state_work_parallel;
thr->th.ompt_thread_info.state = omp_state_work_parallel;
// set task frame
ompt_frame = __ompt_get_task_frame_internal(0);
ompt_frame->exit_runtime_frame = __builtin_frame_address(0);
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
ompt_frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
@ -261,7 +354,7 @@ static
task(data);
#if OMPT_SUPPORT
if (ompt_enabled) {
if (ompt_enabled.enabled) {
// clear task frame
ompt_frame->exit_runtime_frame = NULL;
@ -285,11 +378,8 @@ static
va_list ap;
va_start(ap, argc);
rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc,
#if OMPT_SUPPORT
VOLATILE_CAST(void *) unwrapped_task,
#endif
wrapper, __kmp_invoke_task_func,
rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper,
__kmp_invoke_task_func,
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
&ap
#else
@ -304,18 +394,19 @@ static
}
#if OMPT_SUPPORT
if (ompt_enabled) {
#if OMPT_TRACE
int ompt_team_size;
if (ompt_enabled.enabled) {
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
// implicit task callback
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
team_info->parallel_id, task_info->task_id);
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc;
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid));
}
#endif
thr->th.ompt_thread_info.state = ompt_state_work_parallel;
thr->th.ompt_thread_info.state = omp_state_work_parallel;
}
#endif
}
@ -323,47 +414,9 @@ static
static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid,
void (*task)(void *)) {
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id;
if (ompt_enabled) {
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
ompt_parallel_id = __ompt_parallel_id_new(gtid);
// parallel region callback
if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
int team_size = 1;
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
task_info->task_id, &task_info->frame, ompt_parallel_id, team_size,
(void *)task, OMPT_INVOKER(fork_context_gnu));
}
}
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_serialized_parallel(loc, gtid);
#if OMPT_SUPPORT
if (ompt_enabled) {
kmp_info_t *thr = __kmp_threads[gtid];
ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid);
// set up lightweight task
ompt_lw_taskteam_t *lwt =
(ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
__ompt_lw_taskteam_init(lwt, thr, gtid, (void *)task, ompt_parallel_id);
lwt->ompt_task_info.task_id = my_ompt_task_id;
__ompt_lw_taskteam_link(lwt, thr);
#if OMPT_TRACE
// implicit task callback
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
ompt_parallel_id, my_ompt_task_id);
}
thr->th.ompt_thread_info.state = ompt_state_work_parallel;
#endif
}
#endif
}
void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data,
@ -373,10 +426,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data,
#if OMPT_SUPPORT
ompt_frame_t *parent_frame, *frame;
if (ompt_enabled) {
parent_frame = __ompt_get_task_frame_internal(0);
parent_frame->reenter_runtime_frame = __builtin_frame_address(1);
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
}
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
MKLOC(loc, "GOMP_parallel_start");
@ -394,9 +448,9 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data,
}
#if OMPT_SUPPORT
if (ompt_enabled) {
frame = __ompt_get_task_frame_internal(0);
frame->exit_runtime_frame = __builtin_frame_address(1);
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);
frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
}
#endif
}
@ -404,44 +458,23 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data,
void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
int gtid = __kmp_get_gtid();
kmp_info_t *thr;
int ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc;
thr = __kmp_threads[gtid];
MKLOC(loc, "GOMP_parallel_end");
KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid));
#if OMPT_SUPPORT
ompt_parallel_id_t parallel_id;
ompt_task_id_t serialized_task_id;
ompt_frame_t *ompt_frame = NULL;
if (ompt_enabled) {
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
parallel_id = team_info->parallel_id;
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
serialized_task_id = task_info->task_id;
// unlink if necessary. no-op if there is not a lightweight task.
ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr);
// GOMP allocates/frees lwt since it can't be kept on the stack
if (lwt) {
__kmp_free(lwt);
}
}
#endif
if (!thr->th.th_team->t.t_serialized) {
__kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
thr->th.th_team);
#if OMPT_SUPPORT
if (ompt_enabled) {
if (ompt_enabled.enabled) {
// Implicit task is finished here, in the barrier we might schedule
// deferred tasks,
// these don't see the implicit task on the stack
ompt_frame = __ompt_get_task_frame_internal(0);
ompt_frame->exit_runtime_frame = NULL;
OMPT_CUR_TASK_INFO(thr)->frame.exit_runtime_frame = NULL;
}
#endif
@ -452,35 +485,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
#endif
);
} else {
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
parallel_id, serialized_task_id);
}
#endif
__kmpc_end_serialized_parallel(&loc, gtid);
#if OMPT_SUPPORT
if (ompt_enabled) {
// Record that we re-entered the runtime system in the frame that
// created the parallel region.
ompt_task_info_t *parent_task_info = __ompt_get_taskinfo(0);
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
parallel_id, parent_task_info->task_id,
OMPT_INVOKER(fork_context_gnu));
}
parent_task_info->frame.reenter_runtime_frame = NULL;
thr->th.ompt_thread_info.state =
(((thr->th.th_team)->t.t_serialized) ? ompt_state_work_serial
: ompt_state_work_parallel);
}
#endif
}
}
@ -508,6 +513,12 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
// num and calculate the iteration space using the result. It doesn't do this
// with ordered static loop, so they can be checked.
#if OMPT_SUPPORT
#define IF_OMPT_SUPPORT(code) code
#else
#define IF_OMPT_SUPPORT(code)
#endif
#define LOOP_START(func, schedule) \
int func(long lb, long ub, long str, long chunk_sz, long *p_lb, \
long *p_ub) { \
@ -520,9 +531,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
gtid, lb, ub, str, chunk_sz)); \
\
if ((str > 0) ? (lb < ub) : (lb > ub)) { \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
(schedule) != kmp_sch_static); \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
(kmp_int *)p_ub, (kmp_int *)&stride); \
if (status) { \
@ -551,8 +564,10 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
gtid, lb, ub, str, chunk_sz)); \
\
if ((str > 0) ? (lb < ub) : (lb > ub)) { \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
(kmp_int *)p_ub, (kmp_int *)&stride); \
if (status) { \
@ -577,6 +592,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
MKLOC(loc, #func); \
KA_TRACE(20, (#func ": T#%d\n", gtid)); \
\
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
(kmp_int *)p_ub, (kmp_int *)&stride); \
if (status) { \
@ -621,7 +637,20 @@ void xexpand(KMP_API_NAME_GOMP_LOOP_END)(void) {
int gtid = __kmp_get_gtid();
KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
ompt_frame->reenter_runtime_frame = NULL;
}
#endif
KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid))
}
@ -796,17 +825,18 @@ LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT),
KA_TRACE(20, (#func " exit: T#%d\n", gtid)); \
}
#if OMPT_SUPPORT
#if OMPT_SUPPORT && OMPT_OPTIONAL
#define OMPT_LOOP_PRE() \
ompt_frame_t *parent_frame; \
if (ompt_enabled) { \
parent_frame = __ompt_get_task_frame_internal(0); \
parent_frame->reenter_runtime_frame = __builtin_frame_address(1); \
}
if (ompt_enabled.enabled) { \
__ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \
parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); \
} \
OMPT_STORE_RETURN_ADDRESS(gtid);
#define OMPT_LOOP_POST() \
if (ompt_enabled) { \
if (ompt_enabled.enabled) { \
parent_frame->reenter_runtime_frame = NULL; \
}
@ -878,6 +908,16 @@ void xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
}
}
#if OMPT_SUPPORT
kmp_taskdata_t *current_task;
if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
current_task = __kmp_threads[gtid]->th.th_current_task;
current_task->ompt_task_info.frame.reenter_runtime_frame =
OMPT_GET_FRAME_ADDRESS(1);
}
#endif
if (if_cond) {
#if OMP_40_ENABLED
if (gomp_flags & 8) {
@ -893,23 +933,26 @@ void xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
dep_list[i].flags.out = (i < nout);
}
__kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL);
} else
} else {
#endif
__kmpc_omp_task(&loc, gtid, task);
}
} else {
#if OMPT_SUPPORT
ompt_thread_info_t oldInfo;
kmp_info_t *thread;
kmp_taskdata_t *taskdata;
if (ompt_enabled) {
kmp_taskdata_t *current_task;
if (ompt_enabled.enabled) {
// Store the threads states and restore them after the task
thread = __kmp_threads[gtid];
taskdata = KMP_TASK_TO_TASKDATA(task);
oldInfo = thread->th.ompt_thread_info;
thread->th.ompt_thread_info.wait_id = 0;
thread->th.ompt_thread_info.state = ompt_state_work_parallel;
thread->th.ompt_thread_info.state = omp_state_work_parallel;
taskdata->ompt_task_info.frame.exit_runtime_frame =
__builtin_frame_address(0);
OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
@ -918,12 +961,17 @@ void xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
__kmpc_omp_task_complete_if0(&loc, gtid, task);
#if OMPT_SUPPORT
if (ompt_enabled) {
if (ompt_enabled.enabled) {
thread->th.ompt_thread_info = oldInfo;
taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
}
#endif
}
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
current_task->ompt_task_info.frame.reenter_runtime_frame = NULL;
}
#endif
KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid));
}
@ -932,6 +980,11 @@ void xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void) {
MKLOC(loc, "GOMP_taskwait");
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT
if (ompt_enabled.enabled)
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid));
__kmpc_omp_taskwait(&loc, gtid);
@ -1001,10 +1054,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task)(void *),
#if OMPT_SUPPORT
ompt_frame_t *parent_frame;
if (ompt_enabled) {
parent_frame = __ompt_get_task_frame_internal(0);
parent_frame->reenter_runtime_frame = __builtin_frame_address(1);
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
}
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
MKLOC(loc, "GOMP_parallel_sections_start");
@ -1023,7 +1077,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task)(void *),
}
#if OMPT_SUPPORT
if (ompt_enabled) {
if (ompt_enabled.enabled) {
parent_frame->reenter_runtime_frame = NULL;
}
#endif
@ -1037,7 +1091,20 @@ void xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void) {
int gtid = __kmp_get_gtid();
KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
#if OMPT_SUPPORT
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
ompt_frame->reenter_runtime_frame = NULL;
}
#endif
KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid))
}
@ -1063,10 +1130,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data,
#if OMPT_SUPPORT
ompt_task_info_t *parent_task_info, *task_info;
if (ompt_enabled) {
parent_task_info = __ompt_get_taskinfo(0);
parent_task_info->frame.reenter_runtime_frame = __builtin_frame_address(1);
if (ompt_enabled.enabled) {
parent_task_info = __ompt_get_task_info_object(0);
parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
}
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
if (num_threads != 0) {
@ -1082,15 +1150,20 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data,
__kmp_GOMP_serialized_parallel(&loc, gtid, task);
}
#if OMPT_SUPPORT
if (ompt_enabled) {
task_info = __ompt_get_taskinfo(0);
task_info->frame.exit_runtime_frame = __builtin_frame_address(0);
if (ompt_enabled.enabled) {
task_info = __ompt_get_task_info_object(0);
task_info->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
task(data);
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
xexpand(KMP_API_NAME_GOMP_PARALLEL_END)();
#if OMPT_SUPPORT
if (ompt_enabled) {
if (ompt_enabled.enabled) {
task_info->frame.exit_runtime_frame = NULL;
parent_task_info->frame.reenter_runtime_frame = NULL;
}
@ -1106,6 +1179,10 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
MKLOC(loc, "GOMP_parallel_sections");
KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid));
#if OMPT_SUPPORT
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
if (num_threads != 0) {
__kmp_push_num_threads(&loc, gtid, num_threads);
@ -1153,6 +1230,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
__kmp_GOMP_serialized_parallel(&loc, gtid, task); \
} \
\
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
(schedule) != kmp_sch_static); \
@ -1177,6 +1255,11 @@ void xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void) {
MKLOC(loc, "GOMP_taskgroup_start");
KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid));
#if OMPT_SUPPORT
if (ompt_enabled.enabled)
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_taskgroup(&loc, gtid);
return;
@ -1187,6 +1270,11 @@ void xexpand(KMP_API_NAME_GOMP_TASKGROUP_END)(void) {
MKLOC(loc, "GOMP_taskgroup_end");
KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid));
#if OMPT_SUPPORT
if (ompt_enabled.enabled)
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_end_taskgroup(&loc, gtid);
return;

View File

@ -1135,7 +1135,7 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
kmp_int32 need_mf = 1;
#if OMPT_SUPPORT
ompt_state_t prev_state = ompt_state_undefined;
omp_state_t prev_state = omp_state_undefined;
#endif
KA_TRACE(1000,
@ -1243,7 +1243,7 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
#endif
#if OMPT_SUPPORT
if (ompt_enabled && prev_state != ompt_state_undefined) {
if (ompt_enabled.enabled && prev_state != omp_state_undefined) {
/* change the state before clearing wait_id */
this_thr->th.ompt_thread_info.state = prev_state;
this_thr->th.ompt_thread_info.wait_id = 0;
@ -1258,11 +1258,11 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
}
#if OMPT_SUPPORT
if (ompt_enabled && prev_state == ompt_state_undefined) {
if (ompt_enabled.enabled && prev_state == omp_state_undefined) {
/* this thread will spin; set wait_id before entering wait state */
prev_state = this_thr->th.ompt_thread_info.state;
this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
this_thr->th.ompt_thread_info.state = omp_state_wait_lock;
}
#endif
@ -2911,11 +2911,11 @@ static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
kmp_dyna_lockseq_t tag);
static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
kmp_int32);
static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
kmp_int32);
static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
kmp_int32);
static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
@ -2938,14 +2938,13 @@ void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) = {
// set/acquire functions
#define expand(l, op) \
0, (void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
__kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
#undef expand
#define expand(l, op) \
0, (void (*)(kmp_dyna_lock_t *, \
kmp_int32))__kmp_##op##_##l##_lock_with_checks,
static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
__kmp_set_indirect_lock_with_checks, 0,
KMP_FOREACH_D_LOCK(expand, acquire)};
#undef expand
@ -2968,7 +2967,7 @@ static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
#undef expand
// Exposes only one set of jump tables (*lock or *lock_with_checks).
void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
@ -2982,13 +2981,13 @@ void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = {
// set/acquire functions
#define expand(l, op) \
(void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = {
KMP_FOREACH_I_LOCK(expand, acquire)};
(int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
static int (*indirect_set[])(kmp_user_lock_p,
kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
#undef expand
#define expand(l, op) \
(void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
(int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
KMP_FOREACH_I_LOCK(expand, acquire)};
#undef expand
@ -3009,7 +3008,7 @@ static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
#undef expand
// Exposes only one jump tables (*lock or *lock_with_checks).
void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
@ -3164,9 +3163,9 @@ static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
__kmp_release_lock(&__kmp_global_lock, gtid);
}
static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
}
static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
@ -3179,11 +3178,11 @@ static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
}
static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
kmp_int32 gtid) {
static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
kmp_int32 gtid) {
kmp_indirect_lock_t *l =
__kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
}
static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,

View File

@ -1142,7 +1142,7 @@ typedef struct {
// with/without consistency checking.
extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *);
extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32);
extern int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32);
extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32);
extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32);
@ -1150,7 +1150,7 @@ extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32);
// with/withuot consistency checking.
extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p);
extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32);
extern int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32);
extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32);
extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32);

View File

@ -722,16 +722,6 @@ void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
/* TODO replace with general release procedure */
team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
#if OMPT_SUPPORT && OMPT_BLAME
if (ompt_enabled &&
ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
/* accept blame for "ordered" waiting */
kmp_info_t *this_thread = __kmp_threads[gtid];
ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
this_thread->th.ompt_thread_info.wait_id);
}
#endif
KMP_MB(); /* Flush all pending memory write invalidates. */
}
#endif /* BUILD_PARALLEL_ORDERED */
@ -1204,6 +1194,28 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
this_thr->th.th_set_proc_bind = proc_bind_default;
#endif /* OMP_40_ENABLED */
#if OMPT_SUPPORT
ompt_data_t ompt_parallel_data;
ompt_parallel_data.ptr = NULL;
ompt_data_t *implicit_task_data;
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
if (ompt_enabled.enabled &&
this_thr->th.ompt_thread_info.state != omp_state_overhead) {
ompt_task_info_t *parent_task_info;
parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
if (ompt_enabled.ompt_callback_parallel_begin) {
int team_size = 1;
ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
&(parent_task_info->task_data), &(parent_task_info->frame),
&ompt_parallel_data, team_size, ompt_invoker_program, codeptr);
}
}
#endif // OMPT_SUPPORT
if (this_thr->th.th_team != serial_team) {
// Nested level will be an index in the nested nthreads array
int level = this_thr->th.th_team->t.t_level;
@ -1215,13 +1227,9 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
#endif
new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
#if OMPT_SUPPORT
ompt_parallel_id,
ompt_parallel_data,
#endif
#if OMP_40_ENABLED
proc_bind,
@ -1316,11 +1324,6 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
}
this_thr->th.th_dispatch = serial_team->t.t_dispatch;
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
__ompt_team_assign_id(serial_team, ompt_parallel_id);
#endif
KMP_MB();
} else {
@ -1364,17 +1367,41 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
if (__kmp_env_consistency_check)
__kmp_push_parallel(global_tid, NULL);
#if OMPT_SUPPORT
serial_team->t.ompt_team_info.master_return_address = codeptr;
if (ompt_enabled.enabled &&
this_thr->th.ompt_thread_info.state != omp_state_overhead) {
OMPT_CUR_TASK_INFO(this_thr)
->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
ompt_lw_taskteam_t lw_taskteam;
__ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
&ompt_parallel_data, codeptr);
__ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
// don't use lw_taskteam after linking. content was swaped
/* OMPT implicit task begin */
implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
}
/* OMPT state */
this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
OMPT_CUR_TASK_INFO(this_thr)
->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
}
#endif
}
/* most of the work for a fork */
/* return true if we really went parallel, false if serialized */
int __kmp_fork_call(ident_t *loc, int gtid,
enum fork_context_e call_context, // Intel, GNU, ...
kmp_int32 argc,
#if OMPT_SUPPORT
void *unwrapped_task,
#endif
microtask_t microtask, launch_t invoker,
kmp_int32 argc, microtask_t microtask, launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
va_list *ap
@ -1432,16 +1459,17 @@ int __kmp_fork_call(ident_t *loc, int gtid,
master_set_numthreads = master_th->th.th_set_nproc;
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id;
ompt_task_id_t ompt_task_id;
ompt_data_t ompt_parallel_data;
ompt_parallel_data.ptr = NULL;
ompt_data_t *parent_task_data;
ompt_frame_t *ompt_frame;
ompt_task_id_t my_task_id;
ompt_parallel_id_t my_parallel_id;
ompt_data_t *implicit_task_data;
void *return_address = NULL;
if (ompt_enabled) {
ompt_parallel_id = __ompt_parallel_id_new(gtid);
ompt_task_id = __ompt_get_task_id_internal(0);
ompt_frame = __ompt_get_task_frame_internal(0);
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
NULL, NULL);
return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
}
#endif
@ -1465,13 +1493,16 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#endif
#if OMPT_SUPPORT
if (ompt_enabled &&
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
int team_size = master_set_numthreads;
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task,
OMPT_INVOKER(call_context));
if (ompt_enabled.enabled) {
if (ompt_enabled.ompt_callback_parallel_begin) {
int team_size = master_set_numthreads
? master_set_numthreads
: get__nproc_2(parent_team, master_tid);
ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
OMPT_INVOKER(call_context), return_address);
}
master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@ -1508,27 +1539,25 @@ int __kmp_fork_call(ident_t *loc, int gtid,
ompt_lw_taskteam_t lw_taskteam;
if (ompt_enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task,
ompt_parallel_id);
lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
if (ompt_enabled.enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
&ompt_parallel_data, return_address);
exit_runtime_p =
&(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
__ompt_lw_taskteam_link(&lw_taskteam, master_th);
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
// don't use lw_taskteam after linking. content was swaped
#if OMPT_TRACE
/* OMPT implicit task begin */
my_task_id = lw_taskteam.ompt_task_info.task_id;
my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
my_parallel_id, my_task_id);
implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
}
#endif
/* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
master_th->th.ompt_thread_info.state = omp_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@ -1547,34 +1576,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
*exit_runtime_p = NULL;
if (ompt_enabled) {
#if OMPT_TRACE
lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
ompt_parallel_id, ompt_task_id);
if (ompt_enabled.enabled) {
OMPT_CUR_TASK_INFO(master_th)->frame.exit_runtime_frame = NULL;
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, implicit_task_data, 1,
__kmp_tid_from_gtid(gtid));
}
__ompt_lw_taskteam_unlink(master_th);
// reset clear the task id only after unlinking the task
lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
#endif
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
OMPT_INVOKER(call_context), return_address);
}
master_th->th.ompt_thread_info.state = ompt_state_overhead;
master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
return TRUE;
}
parent_team->t.t_pkfn = microtask;
#if OMPT_SUPPORT
parent_team->t.ompt_team_info.microtask = unwrapped_task;
#endif
parent_team->t.t_invoke = invoker;
KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
parent_team->t.t_active_level++;
@ -1726,28 +1748,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
ompt_task_info_t *task_info;
ompt_lw_taskteam_t lw_taskteam;
if (ompt_enabled) {
if (ompt_enabled.enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
unwrapped_task, ompt_parallel_id);
lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
exit_runtime_p =
&(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
&ompt_parallel_data, return_address);
__ompt_lw_taskteam_link(&lw_taskteam, master_th);
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
// don't use lw_taskteam after linking. content was swaped
#if OMPT_TRACE
my_task_id = lw_taskteam.ompt_task_info.task_id;
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
ompt_parallel_id, my_task_id);
task_info = OMPT_CUR_TASK_INFO(master_th);
exit_runtime_p = &(task_info->frame.exit_runtime_frame);
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
&(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
}
#endif
/* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
master_th->th.ompt_thread_info.state = omp_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@ -1766,26 +1787,21 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#if OMPT_SUPPORT
*exit_runtime_p = NULL;
if (ompt_enabled) {
lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
#if OMPT_TRACE
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
ompt_parallel_id, ompt_task_id);
if (ompt_enabled.enabled) {
exit_runtime_p = NULL;
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, &(task_info->task_data), 1,
__kmp_tid_from_gtid(gtid));
}
#endif
__ompt_lw_taskteam_unlink(master_th);
// reset clear the task id only after unlinking the task
lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
OMPT_INVOKER(call_context), return_address);
}
master_th->th.ompt_thread_info.state = ompt_state_overhead;
master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
} else if (microtask == (microtask_t)__kmp_teams_master) {
@ -1834,30 +1850,28 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
ompt_task_info_t *task_info;
ompt_lw_taskteam_t lw_taskteam;
if (ompt_enabled) {
if (ompt_enabled.enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
unwrapped_task, ompt_parallel_id);
lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
exit_runtime_p =
&(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
&ompt_parallel_data, return_address);
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
// don't use lw_taskteam after linking. content was swaped
task_info = OMPT_CUR_TASK_INFO(master_th);
exit_runtime_p = &(task_info->frame.exit_runtime_frame);
__ompt_lw_taskteam_link(&lw_taskteam, master_th);
#if OMPT_TRACE
/* OMPT implicit task begin */
my_task_id = lw_taskteam.ompt_task_info.task_id;
my_parallel_id = ompt_parallel_id;
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
my_parallel_id, my_task_id);
implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
}
#endif
/* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
master_th->th.ompt_thread_info.state = omp_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@ -1875,26 +1889,22 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#if OMPT_SUPPORT
*exit_runtime_p = NULL;
if (ompt_enabled) {
#if OMPT_TRACE
lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
my_parallel_id, my_task_id);
if (ompt_enabled.enabled) {
*exit_runtime_p = NULL;
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, &(task_info->task_data), 1,
__kmp_tid_from_gtid(gtid));
}
#endif
ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
__ompt_lw_taskteam_unlink(master_th);
// reset clear the task id only after unlinking the task
lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
&ompt_parallel_data, parent_task_data,
OMPT_INVOKER(call_context), return_address);
}
master_th->th.ompt_thread_info.state = ompt_state_overhead;
master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
#if OMP_40_ENABLED
@ -1902,14 +1912,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#endif /* OMP_40_ENABLED */
} else if (call_context == fork_context_gnu) {
#if OMPT_SUPPORT
ompt_lw_taskteam_t *lwt =
(ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
__ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task,
ompt_parallel_id);
ompt_lw_taskteam_t lwt;
__ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
return_address);
lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
__ompt_lw_taskteam_link(lwt, master_th);
lwt.ompt_task_info.frame.exit_runtime_frame = NULL;
__ompt_lw_taskteam_link(&lwt, master_th, 1);
// don't use lw_taskteam after linking. content was swaped
#endif
// we were called from GNU native code
@ -2004,7 +2013,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT
ompt_parallel_id,
ompt_parallel_data,
#endif
#if OMP_40_ENABLED
proc_bind,
@ -2015,7 +2024,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT
ompt_parallel_id,
ompt_parallel_data,
#endif
#if OMP_40_ENABLED
proc_bind,
@ -2033,7 +2042,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
#if OMPT_SUPPORT
KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
return_address);
#endif
KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
// TODO: parent_team->t.t_level == INT_MAX ???
@ -2167,7 +2177,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
&master_th->th.th_current_task->td_icvs, loc);
#if OMPT_SUPPORT
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
master_th->th.ompt_thread_info.state = omp_state_work_parallel;
#endif
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
@ -2251,8 +2261,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
#if OMPT_SUPPORT
if (ompt_enabled) {
master_th->th.ompt_thread_info.state = ompt_state_overhead;
if (ompt_enabled.enabled) {
master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@ -2264,17 +2274,18 @@ static inline void __kmp_join_restore_state(kmp_info_t *thread,
kmp_team_t *team) {
// restore state outside the region
thread->th.ompt_thread_info.state =
((team->t.t_serialized) ? ompt_state_work_serial
: ompt_state_work_parallel);
((team->t.t_serialized) ? omp_state_work_serial
: omp_state_work_parallel);
}
static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team,
ompt_parallel_id_t parallel_id,
fork_context_e fork_context) {
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
kmp_team_t *team, ompt_data_t *parallel_data,
fork_context_e fork_context, void *codeptr) {
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
codeptr);
}
task_info->frame.reenter_runtime_frame = NULL;
@ -2311,8 +2322,8 @@ void __kmp_join_call(ident_t *loc, int gtid
master_th->th.th_ident = loc;
#if OMPT_SUPPORT
if (ompt_enabled) {
master_th->th.ompt_thread_info.state = ompt_state_overhead;
if (ompt_enabled.enabled) {
master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@ -2349,7 +2360,7 @@ void __kmp_join_call(ident_t *loc, int gtid
__kmpc_end_serialized_parallel(loc, gtid);
#if OMPT_SUPPORT
if (ompt_enabled) {
if (ompt_enabled.enabled) {
__kmp_join_restore_state(master_th, parent_team);
}
#endif
@ -2377,7 +2388,8 @@ void __kmp_join_call(ident_t *loc, int gtid
KMP_MB();
#if OMPT_SUPPORT
ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
void *codeptr = team->t.ompt_team_info.master_return_address;
#endif
#if USE_ITT_BUILD
@ -2449,8 +2461,9 @@ void __kmp_join_call(ident_t *loc, int gtid
}
#if OMPT_SUPPORT
if (ompt_enabled) {
__kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
if (ompt_enabled.enabled) {
__kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
codeptr);
}
#endif
@ -2479,15 +2492,18 @@ void __kmp_join_call(ident_t *loc, int gtid
}
KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled) {
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
parallel_id, task_info->task_id);
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
if (ompt_enabled.ompt_callback_implicit_task) {
int ompt_team_size = team->t.t_nproc;
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
__kmp_tid_from_gtid(gtid));
}
task_info->frame.exit_runtime_frame = NULL;
task_info->task_id = 0;
task_info->task_data = ompt_data_none;
}
#endif
@ -2558,8 +2574,9 @@ void __kmp_join_call(ident_t *loc, int gtid
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
#if OMPT_SUPPORT
if (ompt_enabled) {
__kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
if (ompt_enabled.enabled) {
__kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
codeptr);
}
#endif
@ -3154,7 +3171,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
1, // new_nproc
1, // max_nproc
#if OMPT_SUPPORT
0, // root parallel id
ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
__kmp_nested_proc_bind.bind_types[0],
@ -3195,7 +3212,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
1, // new_nproc
__kmp_dflt_team_nth_ub * 2, // max_nproc
#if OMPT_SUPPORT
0, // root parallel id
ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
__kmp_nested_proc_bind.bind_types[0],
@ -3734,6 +3751,9 @@ int __kmp_register_root(int initial_thread) {
__kmp_print_thread_storage_map(root_thread, gtid);
}
root_thread->th.th_info.ds.ds_gtid = gtid;
#if OMPT_SUPPORT
root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
#endif
root_thread->th.th_root = root;
if (__kmp_env_consistency_check) {
root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
@ -3756,7 +3776,7 @@ int __kmp_register_root(int initial_thread) {
root_thread->th.th_serial_team =
__kmp_allocate_team(root, 1, 1,
#if OMPT_SUPPORT
0, // root parallel id
ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
proc_bind_default,
@ -3826,6 +3846,29 @@ int __kmp_register_root(int initial_thread) {
__kmp_root_counter++;
#if OMPT_SUPPORT
if (!initial_thread && ompt_enabled.enabled) {
ompt_thread_t *root_thread = ompt_get_thread();
ompt_set_thread_state(root_thread, omp_state_overhead);
if (ompt_enabled.ompt_callback_thread_begin) {
ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
ompt_thread_initial, __ompt_get_thread_data_internal());
}
ompt_data_t *task_data;
__ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
if (ompt_enabled.ompt_callback_task_create) {
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
NULL, NULL, task_data, ompt_task_initial, 0, NULL);
// initial task has nothing to return to
}
ompt_set_thread_state(root_thread, omp_state_work_serial);
}
#endif
KMP_MB();
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
@ -3909,9 +3952,9 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) {
#endif /* KMP_OS_WINDOWS */
#if OMPT_SUPPORT
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
int gtid = __kmp_get_gtid();
__ompt_thread_end(ompt_thread_initial, gtid);
if (ompt_enabled.ompt_callback_thread_end) {
ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
&(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
}
#endif
@ -3961,7 +4004,7 @@ void __kmp_unregister_root_current_thread(int gtid) {
if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
#if OMPT_SUPPORT
// the runtime is shutting down so we won't report any events
thread->th.ompt_thread_info.state = ompt_state_undefined;
thread->th.ompt_thread_info.state = omp_state_undefined;
#endif
__kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
}
@ -4282,7 +4325,7 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
new_thr->th.th_serial_team = serial_team =
(kmp_team_t *)__kmp_allocate_team(root, 1, 1,
#if OMPT_SUPPORT
0, // root parallel id
ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
proc_bind_default,
@ -4813,7 +4856,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_team_t *
__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id,
ompt_data_t ompt_parallel_data,
#endif
#if OMP_40_ENABLED
kmp_proc_bind_t new_proc_bind,
@ -5180,7 +5223,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#endif
#if OMPT_SUPPORT
__ompt_team_assign_id(team, ompt_parallel_id);
__ompt_team_assign_id(team, ompt_parallel_data);
#endif
KMP_MB();
@ -5232,7 +5275,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
team->t.t_id));
#if OMPT_SUPPORT
__ompt_team_assign_id(team, ompt_parallel_id);
__ompt_team_assign_id(team, ompt_parallel_data);
#endif
KMP_MB();
@ -5296,7 +5339,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#endif
#if OMPT_SUPPORT
__ompt_team_assign_id(team, ompt_parallel_id);
__ompt_team_assign_id(team, ompt_parallel_data);
team->t.ompt_serialized_team_info = NULL;
#endif
@ -5563,16 +5606,26 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
}
#if OMPT_SUPPORT
if (ompt_enabled) {
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
ompt_data_t *thread_data;
if (ompt_enabled.enabled) {
thread_data = &(this_thr->th.ompt_thread_info.thread_data);
thread_data->ptr = NULL;
this_thr->th.ompt_thread_info.state = omp_state_overhead;
this_thr->th.ompt_thread_info.wait_id = 0;
this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
__ompt_thread_begin(ompt_thread_worker, gtid);
this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
if (ompt_enabled.ompt_callback_thread_begin) {
ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
ompt_thread_worker, thread_data);
}
}
#endif
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
this_thr->th.ompt_thread_info.state = omp_state_idle;
}
#endif
/* This is the place where threads wait for work */
while (!TCR_4(__kmp_global.g.g_done)) {
KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
@ -5581,18 +5634,12 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
/* wait for work to do */
KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
#if OMPT_SUPPORT
if (ompt_enabled) {
this_thr->th.ompt_thread_info.state = ompt_state_idle;
}
#endif
/* No tid yet since not part of a team */
__kmp_fork_barrier(gtid, KMP_GTID_DNE);
#if OMPT_SUPPORT
if (ompt_enabled) {
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
if (ompt_enabled.enabled) {
this_thr->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@ -5600,14 +5647,6 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
/* have we been allocated? */
if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
#if OMPT_SUPPORT
ompt_task_info_t *task_info;
ompt_parallel_id_t my_parallel_id;
if (ompt_enabled) {
task_info = __ompt_get_taskinfo(0);
my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
}
#endif
/* we were just woken up, so run our new task */
if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
int rc;
@ -5619,11 +5658,8 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
updateHWFPControl(*pteam);
#if OMPT_SUPPORT
if (ompt_enabled) {
this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
// Initialize OMPT task id for implicit task.
int tid = __kmp_tid_from_gtid(gtid);
task_info->task_id = __ompt_task_id_new(tid);
if (ompt_enabled.enabled) {
this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
}
#endif
@ -5634,40 +5670,29 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
}
KMP_ASSERT(rc);
#if OMPT_SUPPORT
if (ompt_enabled) {
/* no frame set while outside task */
task_info->frame.exit_runtime_frame = NULL;
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
KMP_MB();
KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
(*pteam)->t.t_pkfn));
}
/* join barrier after parallel region */
__kmp_join_barrier(gtid);
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled) {
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
// don't access *pteam here: it may have already been freed
// by the master thread behind the barrier (possible race)
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
my_parallel_id, task_info->task_id);
}
task_info->frame.exit_runtime_frame = NULL;
task_info->task_id = 0;
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
/* no frame set while outside task */
__ompt_get_task_info_object(0)->frame.exit_runtime_frame = NULL;
this_thr->th.ompt_thread_info.state = omp_state_overhead;
this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
}
#endif
/* join barrier after parallel region */
__kmp_join_barrier(gtid);
}
}
TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
#if OMPT_SUPPORT
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
__ompt_thread_end(ompt_thread_worker, gtid);
if (ompt_enabled.ompt_callback_thread_end) {
ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
}
#endif
@ -6925,25 +6950,26 @@ int __kmp_invoke_task_func(int gtid) {
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
ompt_task_id_t my_task_id;
ompt_parallel_id_t my_parallel_id;
ompt_data_t *my_task_data;
ompt_data_t *my_parallel_data;
int ompt_team_size;
if (ompt_enabled) {
if (ompt_enabled.enabled) {
exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]
.ompt_task_info.frame.exit_runtime_frame);
} else {
exit_runtime_p = &dummy;
}
#if OMPT_TRACE
my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
my_parallel_id = team->t.ompt_team_info.parallel_id;
if (ompt_enabled &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id,
my_task_id);
my_task_data =
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
my_parallel_data = &(team->t.ompt_team_info.parallel_data);
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_team_size = team->t.t_nproc;
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
__kmp_tid_from_gtid(gtid));
}
#endif
#endif
{
@ -6991,9 +7017,6 @@ void __kmp_teams_master(int gtid) {
SSC_MARK_FORKING();
#endif
__kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
#if OMPT_SUPPORT
(void *)thr->th.th_teams_microtask, // "unwrapped" task
#endif
(microtask_t)thr->th.th_teams_microtask, // "wrapped" task
VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
#if INCLUDE_SSC_MARKS
@ -7170,6 +7193,36 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
#endif /* KMP_DEBUG */
__kmp_join_barrier(gtid); /* wait for everyone */
#if OMPT_SUPPORT
int ds_tid = this_thr->th.th_info.ds.ds_tid;
if (this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
ompt_data_t *tId = OMPT_CUR_TASK_DATA(this_thr);
ompt_data_t *pId = OMPT_CUR_TEAM_DATA(this_thr);
this_thr->th.ompt_thread_info.state = omp_state_overhead;
#if OMPT_OPTIONAL
void *codeptr = NULL;
if (KMP_MASTER_TID(ds_tid) &&
(ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, tId, 0, ds_tid);
}
// return to idle state
this_thr->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
KMP_MB(); /* Flush all pending memory write invalidates. */
KMP_ASSERT(this_thr->th.th_team == team);

View File

@ -44,7 +44,12 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
T *plower, T *pupper,
typename traits_t<T>::signed_t *pstride,
typename traits_t<T>::signed_t incr,
typename traits_t<T>::signed_t chunk) {
typename traits_t<T>::signed_t chunk
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
void *codeptr
#endif
) {
KMP_COUNT_BLOCK(OMP_FOR_static);
KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
@ -58,14 +63,29 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
kmp_team_t *team;
kmp_info_t *th = __kmp_threads[gtid];
#if OMPT_SUPPORT && OMPT_TRACE
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_team_info_t *team_info = NULL;
ompt_task_info_t *task_info = NULL;
ompt_work_type_t ompt_work_type;
if (ompt_enabled) {
if (ompt_enabled.enabled) {
// Only fully initialize variables needed by OMPT if OMPT is enabled.
team_info = __ompt_get_teaminfo(0, NULL);
task_info = __ompt_get_taskinfo(0);
task_info = __ompt_get_task_info_object(0);
// Determine workshare type
if (loc != NULL) {
if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
ompt_work_type = ompt_work_loop;
} else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
ompt_work_type = ompt_work_sections;
} else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
ompt_work_type = ompt_work_distribute;
} else {
KMP_ASSERT2(0,
"__kmpc_for_static_init: can't determine workshare type");
}
KMP_DEBUG_ASSERT(ompt_work_type);
}
}
#endif
@ -119,10 +139,11 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
#endif
KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
team_info->parallel_id, task_info->task_id, team_info->microtask);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), 0, codeptr);
}
#endif
KMP_COUNT_VALUE(FOR_static_iterations, 0);
@ -170,10 +191,11 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
#endif
KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
team_info->parallel_id, task_info->task_id, team_info->microtask);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), *pstride, codeptr);
}
#endif
return;
@ -198,10 +220,11 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
#endif
KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
team_info->parallel_id, task_info->task_id, team_info->microtask);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), *pstride, codeptr);
}
#endif
return;
@ -354,10 +377,11 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
#endif
KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
team_info->parallel_id, task_info->task_id, team_info->microtask);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), trip_count, codeptr);
}
#endif
@ -745,7 +769,12 @@ void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
kmp_int32 *pupper, kmp_int32 *pstride,
kmp_int32 incr, kmp_int32 chunk) {
__kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
pupper, pstride, incr, chunk);
pupper, pstride, incr, chunk
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
OMPT_GET_RETURN_ADDRESS(0)
#endif
);
}
/*!
@ -757,7 +786,12 @@ void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
kmp_int32 *pstride, kmp_int32 incr,
kmp_int32 chunk) {
__kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
pupper, pstride, incr, chunk);
pupper, pstride, incr, chunk
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
OMPT_GET_RETURN_ADDRESS(0)
#endif
);
}
/*!
@ -768,7 +802,12 @@ void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
kmp_int64 *pupper, kmp_int64 *pstride,
kmp_int64 incr, kmp_int64 chunk) {
__kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
pupper, pstride, incr, chunk);
pupper, pstride, incr, chunk
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
OMPT_GET_RETURN_ADDRESS(0)
#endif
);
}
/*!
@ -780,7 +819,12 @@ void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
kmp_int64 *pstride, kmp_int64 incr,
kmp_int64 chunk) {
__kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
pupper, pstride, incr, chunk);
pupper, pstride, incr, chunk
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
OMPT_GET_RETURN_ADDRESS(0)
#endif
);
}
/*!
@}

View File

@ -334,13 +334,11 @@ static void __kmp_stg_parse_size(char const *name, char const *value,
}
} // __kmp_stg_parse_size
#if KMP_AFFINITY_SUPPORTED
static void __kmp_stg_parse_str(char const *name, char const *value,
char const **out) {
__kmp_str_free(out);
*out = __kmp_str_format("%s", value);
} // __kmp_stg_parse_str
#endif
static void __kmp_stg_parse_int(
char const
@ -4354,7 +4352,29 @@ static void __kmp_stg_print_omp_cancellation(kmp_str_buf_t *buffer,
#endif
// -----------------------------------------------------------------------------
#if OMP_50_ENABLED && OMPT_SUPPORT
static void __kmp_stg_parse_omp_tool_libraries(char const *name,
char const *value, void *data) {
__kmp_stg_parse_str(name, value, &__kmp_tool_libraries);
} // __kmp_stg_parse_omp_tool_libraries
static void __kmp_stg_print_omp_tool_libraries(kmp_str_buf_t *buffer,
char const *name, void *data) {
if (__kmp_tool_libraries)
__kmp_stg_print_str(buffer, name, __kmp_tool_libraries);
else {
if (__kmp_env_format) {
KMP_STR_BUF_PRINT_NAME;
} else {
__kmp_str_buf_print(buffer, " %s", name);
}
__kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined));
}
} // __kmp_stg_print_omp_tool_libraries
#endif
// Table.
static kmp_setting_t __kmp_stg_table[] = {
@ -4598,6 +4618,12 @@ static kmp_setting_t __kmp_stg_table[] = {
{"OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation,
__kmp_stg_print_omp_cancellation, NULL, 0, 0},
#endif
#if OMP_50_ENABLED && OMPT_SUPPORT
{"OMP_TOOL_LIBRARIES", __kmp_stg_parse_omp_tool_libraries,
__kmp_stg_print_omp_tool_libraries, NULL, 0, 0},
#endif
{"", NULL, NULL, NULL, 0, 0}}; // settings
static int const __kmp_stg_count =

View File

@ -16,6 +16,9 @@
#include "kmp.h"
#include "kmp_io.h"
#include "kmp_wait_release.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
#if OMP_40_ENABLED
@ -217,18 +220,19 @@ static inline void __kmp_track_dependence(kmp_depnode_t *source,
task_source->td_ident->psource, sink->dn.id,
task_sink->td_ident->psource);
#endif
#if OMPT_SUPPORT && OMPT_TRACE
// OMPT tracks dependences between task (a=source, b=sink) in which
// task a blocks the execution of b through the ompt_new_dependence_callback
if (ompt_enabled &&
ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
/* OMPT tracks dependences between task (a=source, b=sink) in which
task a blocks the execution of b through the ompt_new_dependence_callback
*/
if (ompt_enabled.ompt_callback_task_dependence) {
kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)(
task_source->ompt_task_info.task_id, task_sink->ompt_task_info.task_id);
ompt_callbacks.ompt_callback(ompt_callback_task_dependence)(
&(task_source->ompt_task_info.task_data),
&(task_sink->ompt_task_info.task_data));
}
#endif /* OMPT_SUPPORT && OMPT_TRACE */
#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
}
template <bool filter>
@ -470,10 +474,29 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *current_task = thread->th.th_current_task;
#if OMPT_SUPPORT && OMPT_TRACE
#if OMPT_SUPPORT
OMPT_STORE_RETURN_ADDRESS(gtid);
if (ompt_enabled.enabled) {
if (ompt_enabled.ompt_callback_task_create) {
kmp_taskdata_t *parent = new_taskdata->td_parent;
ompt_data_t task_data = ompt_data_none;
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
parent ? &(parent->ompt_task_info.task_data) : &task_data,
parent ? &(parent->ompt_task_info.frame) : NULL,
&(new_taskdata->ompt_task_info.task_data),
ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1,
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
OMPT_GET_FRAME_ADDRESS(0);
}
#if OMPT_OPTIONAL
/* OMPT grab all dependences if requested by the tool */
if (ompt_enabled && ndeps + ndeps_noalias > 0 &&
ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
if (ndeps + ndeps_noalias > 0 &&
ompt_enabled.ompt_callback_task_dependences) {
kmp_int32 i;
new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias;
@ -509,8 +532,17 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
ompt_task_dependence_type_in;
}
ompt_callbacks.ompt_callback(ompt_callback_task_dependences)(
&(new_taskdata->ompt_task_info.task_data),
new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps);
/* We can now free the allocated memory for the dependencies */
/* For OMPD we might want to delay the free until task_end */
KMP_OMPT_DEPS_FREE(thread, new_taskdata->ompt_task_info.deps);
new_taskdata->ompt_task_info.deps = NULL;
new_taskdata->ompt_task_info.ndeps = 0;
}
#endif /* OMPT_SUPPORT && OMPT_TRACE */
#endif /* OMPT_OPTIONAL */
#endif /* OMPT_SUPPORT */
bool serial = current_task->td_flags.team_serial ||
current_task->td_flags.tasking_ser ||
@ -557,7 +589,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
"loc=%p task=%p, transferring to __kmpc_omp_task\n",
gtid, loc_ref, new_taskdata));
return __kmpc_omp_task(loc_ref, gtid, new_task);
return __kmp_omp_task(gtid, new_task, true);
}
/*!

View File

@ -446,40 +446,78 @@ static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
#if OMPT_SUPPORT
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
kmp_taskdata_t *parent = taskdata->td_parent;
ompt_callbacks.ompt_callback(ompt_event_task_begin)(
parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
parent ? &(parent->ompt_task_info.frame) : NULL,
taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.function);
}
#endif
#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
/* OMPT emit all dependences if requested by the tool */
if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.deps,
taskdata->ompt_task_info.ndeps);
/* We can now free the allocated memory for the dependencies */
KMP_OMPT_DEPS_FREE(thread, taskdata->ompt_task_info.deps);
taskdata->ompt_task_info.deps = NULL;
taskdata->ompt_task_info.ndeps = 0;
}
#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
return;
}
// __kmpc_omp_task_begin_if0: report that a given serialized task has started
// execution
//
// loc_ref: source location information; points to beginning of task block.
// gtid: global thread number.
// task: task thunk for the started task.
void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task) {
#if OMPT_SUPPORT
//------------------------------------------------------------------------------
// __ompt_task_init:
// Initialize OMPT fields maintained by a task. This will only be called after
// ompt_start_tool, so we already know whether ompt is enabled or not.
static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
// The calls to __ompt_task_init already have the ompt_enabled condition.
task->ompt_task_info.task_data.value = 0;
task->ompt_task_info.frame.exit_runtime_frame = NULL;
task->ompt_task_info.frame.reenter_runtime_frame = NULL;
#if OMP_40_ENABLED
task->ompt_task_info.ndeps = 0;
task->ompt_task_info.deps = NULL;
#endif /* OMP_40_ENABLED */
}
// __ompt_task_start:
// Build and trigger task-begin event
static inline void __ompt_task_start(kmp_task_t *task,
kmp_taskdata_t *current_task,
kmp_int32 gtid) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
ompt_task_status_t status = ompt_task_others;
if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
status = ompt_task_yield;
__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
}
/* let OMPT know that we're about to run this task */
if (ompt_enabled.ompt_callback_task_schedule) {
ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
&(current_task->ompt_task_info.task_data), status,
&(taskdata->ompt_task_info.task_data));
}
taskdata->ompt_task_info.scheduling_parent = current_task;
}
// __ompt_task_finish:
// Build and trigger final task-schedule event
static inline void __ompt_task_finish(kmp_task_t *task,
kmp_taskdata_t *resumed_task) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
ompt_task_status_t status = ompt_task_complete;
if (taskdata->td_flags.tiedness == TASK_UNTIED &&
KMP_TEST_THEN_ADD32(&(taskdata->td_untied_count), 0) > 1)
status = ompt_task_others;
if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
status = ompt_task_cancel;
}
/* let OMPT know that we're returning to the callee task */
if (ompt_enabled.ompt_callback_task_schedule) {
ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
&(taskdata->ompt_task_info.task_data), status,
&((resumed_task ? resumed_task
: (taskdata->ompt_task_info.scheduling_parent
? taskdata->ompt_task_info.scheduling_parent
: taskdata->td_parent))
->ompt_task_info.task_data));
}
}
#endif
template <bool ompt>
static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task,
void *frame_address,
void *return_address) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
@ -500,10 +538,57 @@ void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
1; // Execute this task immediately, not deferred.
__kmp_task_start(gtid, task, current_task);
#if OMPT_SUPPORT
if (ompt) {
if (current_task->ompt_task_info.frame.reenter_runtime_frame == NULL) {
current_task->ompt_task_info.frame.reenter_runtime_frame =
taskdata->ompt_task_info.frame.exit_runtime_frame = frame_address;
}
if (ompt_enabled.ompt_callback_task_create) {
ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
&(parent_info->task_data), &(parent_info->frame),
&(taskdata->ompt_task_info.task_data),
ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
return_address);
}
__ompt_task_start(task, current_task, gtid);
}
#endif // OMPT_SUPPORT
KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
loc_ref, taskdata));
}
return;
#if OMPT_SUPPORT
OMPT_NOINLINE
static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task,
void *frame_address,
void *return_address) {
__kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
return_address);
}
#endif // OMPT_SUPPORT
// __kmpc_omp_task_begin_if0: report that a given serialized task has started
// execution
//
// loc_ref: source location information; points to beginning of task block.
// gtid: global thread number.
// task: task thunk for the started task.
void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task) {
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled)) {
OMPT_STORE_RETURN_ADDRESS(gtid);
__kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
OMPT_GET_FRAME_ADDRESS(1),
OMPT_LOAD_RETURN_ADDRESS(gtid));
return;
}
#endif
__kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
}
#ifdef TASK_UNUSED
@ -623,14 +708,6 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
thread->th.th_task_team; // might be NULL for serial teams...
kmp_int32 children = 0;
#if OMPT_SUPPORT
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_end)) {
kmp_taskdata_t *parent = taskdata->td_parent;
ompt_callbacks.ompt_callback(ompt_event_task_end)(
taskdata->ompt_task_info.task_id);
}
#endif
KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
"task %p\n",
gtid, taskdata, resumed_task));
@ -760,13 +837,10 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
return;
}
// __kmpc_omp_task_complete_if0: report that a task has completed execution
//
// loc_ref: source location information; points to end of task block.
// gtid: global thread number.
// task: task thunk for the completed task.
void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task) {
template <bool ompt>
static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref,
kmp_int32 gtid,
kmp_task_t *task) {
KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
// this routine will provide task to resume
@ -774,9 +848,43 @@ void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
#if OMPT_SUPPORT
if (ompt) {
__ompt_task_finish(task, NULL);
ompt_frame_t *ompt_frame;
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
ompt_frame->reenter_runtime_frame = NULL;
}
#endif
return;
}
#if OMPT_SUPPORT
OMPT_NOINLINE
void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task) {
__kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
}
#endif // OMPT_SUPPORT
// __kmpc_omp_task_complete_if0: report that a task has completed execution
//
// loc_ref: source location information; points to end of task block.
// gtid: global thread number.
// task: task thunk for the completed task.
void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task) {
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled)) {
__kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
return;
}
#endif
__kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
}
#ifdef TASK_UNUSED
// __kmpc_omp_task_complete: report that a task has completed execution
// NEVER GENERATED BY COMPILER, DEPRECATED!!!
@ -793,25 +901,6 @@ void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
}
#endif // TASK_UNUSED
#if OMPT_SUPPORT
// __kmp_task_init_ompt: Initialize OMPT fields maintained by a task. This will
// only be called after ompt_tool, so we already know whether ompt is enabled
// or not.
static inline void __kmp_task_init_ompt(kmp_taskdata_t *task, int tid,
void *function) {
if (ompt_enabled) {
task->ompt_task_info.task_id = __ompt_task_id_new(tid);
task->ompt_task_info.function = function;
task->ompt_task_info.frame.exit_runtime_frame = NULL;
task->ompt_task_info.frame.reenter_runtime_frame = NULL;
#if OMP_40_ENABLED
task->ompt_task_info.ndeps = 0;
task->ompt_task_info.deps = NULL;
#endif /* OMP_40_ENABLED */
}
}
#endif
// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit
// task for a given thread
//
@ -876,7 +965,8 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
}
#if OMPT_SUPPORT
__kmp_task_init_ompt(task, tid, NULL);
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_init(task, tid);
#endif
KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
@ -1121,7 +1211,8 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
ANNOTATE_HAPPENS_BEFORE(task);
#if OMPT_SUPPORT
__kmp_task_init_ompt(taskdata, gtid, (void *)task_entry);
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_init(taskdata, gtid);
#endif
return task;
@ -1207,7 +1298,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
if (taskdata->td_flags.proxy != TASK_PROXY) {
#endif
ANNOTATE_HAPPENS_AFTER(task);
__kmp_task_start(gtid, task, current_task);
__kmp_task_start(gtid, task, current_task); // OMPT only if not discarded
#if OMP_45_ENABLED
}
#endif
@ -1215,14 +1306,16 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
#if OMPT_SUPPORT
ompt_thread_info_t oldInfo;
kmp_info_t *thread;
if (ompt_enabled) {
if (UNLIKELY(ompt_enabled.enabled)) {
// Store the threads states and restore them after the task
thread = __kmp_threads[gtid];
oldInfo = thread->th.ompt_thread_info;
thread->th.ompt_thread_info.wait_id = 0;
thread->th.ompt_thread_info.state = ompt_state_work_parallel;
thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
? omp_state_work_serial
: omp_state_work_parallel;
taskdata->ompt_task_info.frame.exit_runtime_frame =
__builtin_frame_address(0);
OMPT_GET_FRAME_ADDRESS(0);
}
#endif
@ -1236,6 +1329,18 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
if ((taskgroup && taskgroup->cancel_request) ||
(this_team->t.t_cancel_request == cancel_parallel)) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_data_t *task_data;
if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
__ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
ompt_callbacks.ompt_callback(ompt_callback_cancel)(
task_data,
((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
: ompt_cancel_parallel) |
ompt_cancel_discarded_task,
NULL);
}
#endif
KMP_COUNT_BLOCK(TASK_cancelled);
// this task belongs to a task group and we need to cancel it
discard = 1 /* true */;
@ -1270,13 +1375,10 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
#endif // KMP_STATS_ENABLED
#endif // OMP_40_ENABLED
#if OMPT_SUPPORT && OMPT_TRACE
/* let OMPT know that we're about to run this task */
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
ompt_callbacks.ompt_callback(ompt_event_task_switch)(
current_task->ompt_task_info.task_id,
taskdata->ompt_task_info.task_id);
}
// OMPT task begin
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_start(task, current_task, gtid);
#endif
#ifdef KMP_GOMP_COMPAT
@ -1289,21 +1391,16 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
}
KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_TRACE
/* let OMPT know that we're returning to the callee task */
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
ompt_callbacks.ompt_callback(ompt_event_task_switch)(
taskdata->ompt_task_info.task_id,
current_task->ompt_task_info.task_id);
}
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_finish(task, current_task);
#endif
#if OMP_40_ENABLED
}
#endif // OMP_40_ENABLED
#if OMPT_SUPPORT
if (ompt_enabled) {
if (UNLIKELY(ompt_enabled.enabled)) {
thread->th.ompt_thread_info = oldInfo;
taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
}
@ -1314,7 +1411,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
if (taskdata->td_flags.proxy != TASK_PROXY) {
#endif
ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
__kmp_task_finish(gtid, task, current_task);
__kmp_task_finish(gtid, task, current_task); // OMPT only if not discarded
#if OMP_45_ENABLED
}
#endif
@ -1352,6 +1449,21 @@ kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
loc_ref, new_taskdata));
#if OMPT_SUPPORT
kmp_taskdata_t *parent;
if (UNLIKELY(ompt_enabled.enabled)) {
parent = new_taskdata->td_parent;
if (ompt_enabled.ompt_callback_task_create) {
ompt_data_t task_data = ompt_data_none;
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
parent ? &(parent->ompt_task_info.task_data) : &task_data,
parent ? &(parent->ompt_task_info.frame) : NULL,
&(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
OMPT_GET_RETURN_ADDRESS(0));
}
}
#endif
/* Should we execute the new task or queue it? For now, let's just always try
to queue it. If the queue fills up, then we'll execute it. */
@ -1369,6 +1481,11 @@ kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
gtid, loc_ref, new_taskdata));
ANNOTATE_HAPPENS_BEFORE(new_task);
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled)) {
parent->ompt_task_info.frame.reenter_runtime_frame = NULL;
}
#endif
return TASK_CURRENT_NOT_QUEUED;
}
@ -1387,13 +1504,6 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
bool serialize_immediate) {
kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
#if OMPT_SUPPORT
if (ompt_enabled) {
new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
__builtin_frame_address(1);
}
#endif
/* Should we execute the new task or queue it? For now, let's just always try to
queue it. If the queue fills up, then we'll execute it. */
#if OMP_45_ENABLED
@ -1409,12 +1519,6 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
__kmp_invoke_task(gtid, new_task, current_task);
}
#if OMPT_SUPPORT
if (ompt_enabled) {
new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
}
#endif
ANNOTATE_HAPPENS_BEFORE(new_task);
return TASK_CURRENT_NOT_QUEUED;
}
@ -1436,23 +1540,49 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 res;
KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
#if KMP_DEBUG
#if KMP_DEBUG || OMPT_SUPPORT
kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
#endif
KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
new_taskdata));
#if OMPT_SUPPORT
kmp_taskdata_t *parent = NULL;
if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
OMPT_STORE_RETURN_ADDRESS(gtid);
parent = new_taskdata->td_parent;
if (!parent->ompt_task_info.frame.reenter_runtime_frame)
parent->ompt_task_info.frame.reenter_runtime_frame =
OMPT_GET_FRAME_ADDRESS(1);
if (ompt_enabled.ompt_callback_task_create) {
ompt_data_t task_data = ompt_data_none;
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
parent ? &(parent->ompt_task_info.task_data) : &task_data,
parent ? &(parent->ompt_task_info.frame) : NULL,
&(new_taskdata->ompt_task_info.task_data),
ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
}
#endif
res = __kmp_omp_task(gtid, new_task, true);
KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
"TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
gtid, loc_ref, new_taskdata));
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
parent->ompt_task_info.frame.reenter_runtime_frame = NULL;
}
#endif
return res;
}
// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
// complete
kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
template <bool ompt>
static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
void *frame_address,
void *return_address) {
kmp_taskdata_t *taskdata;
kmp_info_t *thread;
int thread_finished = FALSE;
@ -1463,23 +1593,30 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
if (__kmp_tasking_mode != tskm_immediate_exec) {
thread = __kmp_threads[gtid];
taskdata = thread->th.th_current_task;
#if OMPT_SUPPORT && OMPT_TRACE
ompt_task_id_t my_task_id;
ompt_parallel_id_t my_parallel_id;
if (ompt_enabled) {
kmp_team_t *team = thread->th.th_team;
my_task_id = taskdata->ompt_task_info.task_id;
my_parallel_id = team->t.ompt_team_info.parallel_id;
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_data_t *my_task_data;
ompt_data_t *my_parallel_data;
taskdata->ompt_task_info.frame.reenter_runtime_frame =
__builtin_frame_address(1);
if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(my_parallel_id,
my_task_id);
if (ompt) {
my_task_data = &(taskdata->ompt_task_info.task_data);
my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
taskdata->ompt_task_info.frame.reenter_runtime_frame = frame_address;
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
my_task_data, return_address);
}
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
my_task_data, return_address);
}
}
#endif
#endif // OMPT_SUPPORT && OMPT_OPTIONAL
// Debugger: The taskwait is active. Store location and thread encountered the
// taskwait.
@ -1522,15 +1659,22 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
// negated.
taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled) {
if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(my_parallel_id,
my_task_id);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt) {
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
my_task_data, return_address);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
my_task_data, return_address);
}
taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
}
#endif
#endif // OMPT_SUPPORT && OMPT_OPTIONAL
ANNOTATE_HAPPENS_AFTER(taskdata);
}
@ -1541,6 +1685,29 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
return TASK_CURRENT_NOT_QUEUED;
}
#if OMPT_SUPPORT
OMPT_NOINLINE
static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid,
void *frame_address,
void *return_address) {
return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
return_address);
}
#endif // OMPT_SUPPORT
// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
// complete
kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.enabled)) {
OMPT_STORE_RETURN_ADDRESS(gtid);
return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(1),
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
#endif
return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
}
// __kmpc_omp_taskyield: switch to a different task
kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
kmp_taskdata_t *taskdata;
@ -1575,10 +1742,18 @@ kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
kmp_task_team_t *task_team = thread->th.th_task_team;
if (task_team != NULL) {
if (KMP_TASKING_ENABLED(task_team)) {
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
thread->th.ompt_thread_info.ompt_task_yielded = 1;
#endif
__kmp_execute_tasks_32(
thread, gtid, NULL, FALSE,
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint);
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled))
thread->th.ompt_thread_info.ompt_task_yielded = 0;
#endif
}
}
}
@ -1809,6 +1984,22 @@ void __kmpc_taskgroup(ident_t *loc, int gtid) {
tg_new->reduce_num_data = 0;
#endif
taskdata->td_taskgroup = tg_new;
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
kmp_team_t *team = thread->th.th_team;
ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
// FIXME: I think this is wrong for lwt!
ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
&(my_task_data), codeptr);
}
#endif
}
// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
@ -1819,6 +2010,22 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
int thread_finished = FALSE;
#if OMPT_SUPPORT && OMPT_OPTIONAL
kmp_team_t *team;
ompt_data_t my_task_data;
ompt_data_t my_parallel_data;
void *codeptr;
if (UNLIKELY(ompt_enabled.enabled)) {
team = thread->th.th_team;
my_task_data = taskdata->ompt_task_info.task_data;
// FIXME: I think this is wrong for lwt!
my_parallel_data = team->t.ompt_team_info.parallel_data;
codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (!codeptr)
codeptr = OMPT_GET_RETURN_ADDRESS(0);
}
#endif
KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
KMP_DEBUG_ASSERT(taskgroup != NULL);
KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
@ -1832,6 +2039,14 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
__kmp_itt_taskwait_starting(gtid, itt_sync_obj);
#endif /* USE_ITT_BUILD */
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
&(my_task_data), codeptr);
}
#endif
#if OMP_45_ENABLED
if (!taskdata->td_flags.team_serial ||
(thread->th.th_task_team != NULL &&
@ -1848,6 +2063,14 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
}
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
&(my_task_data), codeptr);
}
#endif
#if USE_ITT_BUILD
if (itt_sync_obj != NULL)
__kmp_itt_taskwait_finished(gtid, itt_sync_obj);
@ -1867,6 +2090,14 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
gtid, taskdata));
ANNOTATE_HAPPENS_AFTER(taskdata);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
&(my_task_data), codeptr);
}
#endif
}
#endif
@ -3255,8 +3486,8 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
thread, taskdata, taskdata->td_parent));
#if OMPT_SUPPORT
__kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid,
(void *)task->routine);
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
#endif
return task;
}
@ -3539,8 +3770,22 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
"grain %llu(%d), dup %p\n",
gtid, taskdata, *lb, *ub, st, grainsize, sched, task_dup));
if (nogroup == 0)
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
if (nogroup == 0) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_taskgroup(loc, gtid);
}
// =========================================================================
// calculate loop parameters
@ -3614,6 +3859,9 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
if (if_val == 0) { // if(0) specified, mark task as serial
taskdata->td_flags.task_serial = 1;
taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
// always start serial tasks linearly
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, tc, task_dup);
@ -3621,18 +3869,35 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, tc, num_tasks_min, task_dup);
} else {
KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, tc, task_dup);
}
if (nogroup == 0)
if (nogroup == 0) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
__kmpc_end_taskgroup(loc, gtid);
}
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid));
}

View File

@ -17,6 +17,9 @@
#include "kmp.h"
#include "kmp_itt.h"
#include "kmp_stats.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
/*!
@defgroup WAIT_RELEASE Wait/Release operations
@ -85,6 +88,44 @@ public:
*/
};
#if OMPT_SUPPORT
static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
omp_state_t omp_state,
ompt_data_t *tId,
ompt_data_t *pId) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
if (omp_state == omp_state_wait_barrier_implicit) {
this_thr->th.ompt_thread_info.state = omp_state_overhead;
#if OMPT_OPTIONAL
void *codeptr = NULL;
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid)) {
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, tId, 0, ds_tid);
}
#if OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_idle) {
ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
}
#endif
// return to idle state
this_thr->th.ompt_thread_info.state = omp_state_idle;
} else {
this_thr->th.ompt_thread_info.state = omp_state_overhead;
}
}
}
#endif
/* Spin wait loop that first does pause, then yield, then sleep. A thread that
calls __kmp_wait_* must make certain that another thread calls __kmp_release
to wake it back up to prevent deadlocks! */
@ -116,30 +157,88 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
stats_state_e thread_state = KMP_GET_THREAD_STATE();
#endif
#if OMPT_SUPPORT && OMPT_BLAME
ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
if (ompt_enabled && ompt_state != ompt_state_undefined) {
if (ompt_state == ompt_state_idle) {
if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
}
} else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
ompt_state == ompt_state_wait_barrier_implicit ||
ompt_state == ompt_state_wait_barrier_explicit);
/* OMPT Behavior:
THIS function is called from
__kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
these have join / fork behavior
In these cases, we don't change the state or trigger events in THIS
function.
Events are triggered in the calling code (__kmp_barrier):
state := omp_state_overhead
barrier-begin
barrier-wait-begin
state := omp_state_wait_barrier
call join-barrier-implementation (finally arrive here)
{}
call fork-barrier-implementation (finally arrive here)
{}
state := omp_state_overhead
barrier-wait-end
barrier-end
state := omp_state_work_parallel
__kmp_fork_barrier (after thread creation, before executing implicit task)
call fork-barrier-implementation (finally arrive here)
{} // worker arrive here with state = omp_state_idle
__kmp_join_barrier (implicit barrier at end of parallel region)
state := omp_state_barrier_implicit
barrier-begin
barrier-wait-begin
call join-barrier-implementation (finally arrive here
final_spin=FALSE)
{
}
__kmp_fork_barrier (implicit barrier at end of parallel region)
call fork-barrier-implementation (finally arrive here final_spin=TRUE)
Worker after task-team is finished:
barrier-wait-end
barrier-end
implicit-task-end
idle-begin
state := omp_state_idle
Before leaving, if state = omp_state_idle
idle-end
state := omp_state_overhead
*/
#if OMPT_SUPPORT
omp_state_t ompt_entry_state;
ompt_data_t *pId = NULL;
ompt_data_t *tId;
if (ompt_enabled.enabled) {
ompt_entry_state = this_thr->th.ompt_thread_info.state;
if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
ompt_lw_taskteam_t *team =
this_thr->th.th_team->t.ompt_serialized_team_info;
ompt_parallel_id_t pId;
ompt_task_id_t tId;
if (team) {
pId = team->ompt_team_info.parallel_id;
tId = team->ompt_task_info.task_id;
pId = &(team->ompt_team_info.parallel_data);
tId = &(team->ompt_task_info.task_data);
} else {
pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
tId = this_thr->th.th_current_task->ompt_task_info.task_id;
pId = OMPT_CUR_TEAM_DATA(this_thr);
tId = OMPT_CUR_TASK_DATA(this_thr);
}
ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
} else {
pId = NULL;
tId = &(this_thr->th.ompt_thread_info.task_data);
}
#if OMPT_OPTIONAL
if (ompt_entry_state == omp_state_idle) {
if (ompt_enabled.ompt_callback_idle) {
ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
}
} else
#endif
if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
this_thr->th.th_task_team == NULL)) {
// implicit task is done. Either no taskqueue, or task-team finished
__ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
}
}
#endif
@ -206,6 +305,11 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
} else {
KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
#if OMPT_SUPPORT
// task-team is done now, other cases should be catched above
if (final_spin && ompt_enabled.enabled)
__ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
#endif
this_thr->th.th_task_team = NULL;
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
}
@ -293,29 +397,22 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
// TODO: If thread is done with work and times out, disband/free
}
#if OMPT_SUPPORT && OMPT_BLAME
if (ompt_enabled && ompt_state != ompt_state_undefined) {
if (ompt_state == ompt_state_idle) {
if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
#if OMPT_SUPPORT
omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
#if OMPT_OPTIONAL
if (final_spin) {
__ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
ompt_exit_state = this_thr->th.ompt_thread_info.state;
}
#endif
if (ompt_exit_state == omp_state_idle) {
#if OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_idle) {
ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_end);
}
} else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
ompt_state == ompt_state_wait_barrier_implicit ||
ompt_state == ompt_state_wait_barrier_explicit);
ompt_lw_taskteam_t *team =
this_thr->th.th_team->t.ompt_serialized_team_info;
ompt_parallel_id_t pId;
ompt_task_id_t tId;
if (team) {
pId = team->ompt_team_info.parallel_id;
tId = team->ompt_task_info.task_id;
} else {
pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
tId = this_thr->th.th_current_task->ompt_task_info.task_id;
}
ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
#endif
this_thr->th.ompt_thread_info.state = omp_state_overhead;
}
}
#endif

View File

@ -22,132 +22,84 @@
| the OMPT TR. They are exposed to tools through ompt_set_callback.
+--------------------------------------------------------------------------*/
#define ompt_event_NEVER ompt_set_result_event_never_occurs
#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback
#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some
#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always
#define ompt_event_UNIMPLEMENTED ompt_set_never
#define ompt_event_MAY_CONVENIENT ompt_set_sometimes
#define ompt_event_MAY_ALWAYS ompt_set_always
#if OMPT_TRACE
#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS
#if OMPT_OPTIONAL
#define ompt_event_MAY_ALWAYS_OPTIONAL ompt_event_MAY_ALWAYS
#else
#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED
#endif
#if OMPT_BLAME
#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS
#else
#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED
#define ompt_event_MAY_ALWAYS_OPTIONAL ompt_event_UNIMPLEMENTED
#endif
/*----------------------------------------------------------------------------
| Mandatory Events
+--------------------------------------------------------------------------*/
#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS
#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS
#define ompt_callback_parallel_begin_implemented ompt_event_MAY_ALWAYS
#define ompt_callback_parallel_end_implemented ompt_event_MAY_ALWAYS
#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS
#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS
#define ompt_callback_task_create_implemented ompt_event_MAY_ALWAYS
#define ompt_callback_task_schedule_implemented ompt_event_MAY_ALWAYS
#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS
#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS
#define ompt_callback_thread_begin_implemented ompt_event_MAY_ALWAYS
#define ompt_callback_thread_end_implemented ompt_event_MAY_ALWAYS
#define ompt_event_control_implemented ompt_event_MAY_ALWAYS
#define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS
#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS
#define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS
/*----------------------------------------------------------------------------
| Target Related Events (not yet implemented)
+--------------------------------------------------------------------------*/
#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_device_initialize_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_device_finalize_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_target_map_implemented ompt_event_UNIMPLEMENTED
/*----------------------------------------------------------------------------
| Optional Events (blame shifting)
+--------------------------------------------------------------------------*/
#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME
#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME
#define ompt_callback_idle_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME
#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME
#define ompt_callback_sync_region_wait_implemented \
ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME
#define ompt_event_release_nest_lock_last_implemented \
ompt_event_MAY_ALWAYS_BLAME
#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME
#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME
#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME
#define ompt_callback_mutex_released_implemented ompt_event_MAY_ALWAYS_OPTIONAL
/*----------------------------------------------------------------------------
| Optional Events (synchronous events)
+--------------------------------------------------------------------------*/
#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_callback_work_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_master_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_callback_sync_region_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_callback_mutex_acquire_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_callback_mutex_acquired_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_callback_nest_lock_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_lock_init_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_callback_lock_destroy_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_callback_flush_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_release_nest_lock_prev_implemented \
ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_acquired_nest_lock_first_implemented \
ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_acquired_nest_lock_next_implemented \
ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_cancel_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#if OMP_40_ENABLED
#define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE
#define ompt_callback_task_dependences_implemented \
ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_callback_task_dependence_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#else
#define ompt_event_task_dependences_implemented ompt_event_UNIMPLEMENTED
#define ompt_event_task_dependence_pair_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_task_dependences_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_task_dependence_implemented ompt_event_UNIMPLEMENTED
#endif /* OMP_40_ENABLED */
#endif

View File

@ -8,6 +8,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if KMP_OS_UNIX
#include <dlfcn.h>
#endif
/*****************************************************************************
* ompt include files
@ -36,8 +39,13 @@
typedef struct {
const char *state_name;
ompt_state_t state_id;
} ompt_state_info_t;
omp_state_t state_id;
} omp_state_info_t;
typedef struct {
const char *name;
ompt_mutex_impl_t id;
} ompt_mutex_impl_info_t;
enum tool_setting_e {
omp_tool_error,
@ -46,25 +54,27 @@ enum tool_setting_e {
omp_tool_enabled
};
typedef void (*ompt_initialize_t)(ompt_function_lookup_t ompt_fn_lookup,
const char *version,
unsigned int ompt_version);
/*****************************************************************************
* global variables
****************************************************************************/
int ompt_enabled = 0;
ompt_callbacks_active_t ompt_enabled;
ompt_state_info_t ompt_state_info[] = {
#define ompt_state_macro(state, code) {#state, state},
FOREACH_OMPT_STATE(ompt_state_macro)
#undef ompt_state_macro
omp_state_info_t omp_state_info[] = {
#define omp_state_macro(state, code) {#state, state},
FOREACH_OMP_STATE(omp_state_macro)
#undef omp_state_macro
};
ompt_callbacks_t ompt_callbacks;
ompt_mutex_impl_info_t ompt_mutex_impl_info[] = {
#define ompt_mutex_impl_macro(name, id) {#name, name},
FOREACH_OMPT_MUTEX_IMPL(ompt_mutex_impl_macro)
#undef ompt_mutex_impl_macro
};
static ompt_initialize_t ompt_initialize_fn = NULL;
ompt_callbacks_internal_t ompt_callbacks;
static ompt_fns_t *ompt_fns = NULL;
/*****************************************************************************
* forward declarations
@ -72,48 +82,71 @@ static ompt_initialize_t ompt_initialize_fn = NULL;
static ompt_interface_fn_t ompt_fn_lookup(const char *s);
OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void);
OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void);
/*****************************************************************************
* initialization and finalization (private operations)
****************************************************************************/
/* On Unix-like systems that support weak symbols the following implementation
* of ompt_tool() will be used in case no tool-supplied implementation of
* of ompt_start_tool() will be used in case no tool-supplied implementation of
* this function is present in the address space of a process.
*
* On Windows, the ompt_tool_windows function is used to find the
* ompt_tool symbol across all modules loaded by a process. If ompt_tool is
* found, ompt_tool's return value is used to initialize the tool. Otherwise,
* NULL is returned and OMPT won't be enabled */
typedef ompt_fns_t *(*ompt_start_tool_t)(unsigned int, const char *);
#if KMP_OS_UNIX
#if OMPT_HAVE_WEAK_ATTRIBUTE
_OMP_EXTERN __attribute__((weak))
#elif defined KMP_DYNAMIC_LIB
_OMP_EXTERN
__attribute__((weak)) ompt_initialize_t ompt_tool() {
#warning Activation of OMPT is might fail for tools statically linked into the application.
#else
#error Activation of OMPT is not supported on this platform.
#endif
ompt_fns_t *
ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
#ifdef KMP_DYNAMIC_LIB
ompt_fns_t *ret = NULL;
// Try next symbol in the address space
ompt_start_tool_t next_tool = NULL;
next_tool = (ompt_start_tool_t)dlsym(RTLD_NEXT, "ompt_start_tool");
if (next_tool)
ret = (next_tool)(omp_version, runtime_version);
return ret;
#else
#if OMPT_DEBUG
printf("ompt_tool() is called from the RTL\n");
printf("ompt_start_tool() is called from the RTL\n");
#endif
return NULL;
#endif
}
#elif OMPT_HAVE_PSAPI
#include <psapi.h>
#pragma comment(lib, "psapi.lib")
#define ompt_tool ompt_tool_windows
#define ompt_start_tool ompt_tool_windows
// The number of loaded modules to start enumeration with EnumProcessModules()
#define NUM_MODULES 128
static ompt_initialize_t ompt_tool_windows() {
static ompt_fns_t *ompt_tool_windows(unsigned int omp_version,
const char *runtime_version) {
int i;
DWORD needed, new_size;
HMODULE *modules;
HANDLE process = GetCurrentProcess();
modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE));
ompt_initialize_t (*ompt_tool_p)() = NULL;
ompt_start_tool_t ompt_tool_p = NULL;
#if OMPT_DEBUG
printf("ompt_tool_windows(): looking for ompt_tool\n");
printf("ompt_tool_windows(): looking for ompt_start_tool\n");
#endif
if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE),
&needed)) {
@ -135,21 +168,22 @@ static ompt_initialize_t ompt_tool_windows() {
}
}
for (i = 0; i < new_size; ++i) {
(FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool");
(FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_start_tool");
if (ompt_tool_p) {
#if OMPT_DEBUG
TCHAR modName[MAX_PATH];
if (GetModuleFileName(modules[i], modName, MAX_PATH))
printf("ompt_tool_windows(): ompt_tool found in module %s\n", modName);
printf("ompt_tool_windows(): ompt_start_tool found in module %s\n",
modName);
#endif
free(modules);
return ompt_tool_p();
return (*ompt_tool_p)(omp_version, runtime_version);
}
#if OMPT_DEBUG
else {
TCHAR modName[MAX_PATH];
if (GetModuleFileName(modules[i], modName, MAX_PATH))
printf("ompt_tool_windows(): ompt_tool not found in module %s\n",
printf("ompt_tool_windows(): ompt_start_tool not found in module %s\n",
modName);
}
#endif
@ -161,6 +195,49 @@ static ompt_initialize_t ompt_tool_windows() {
#error Either __attribute__((weak)) or psapi.dll are required for OMPT support
#endif // OMPT_HAVE_WEAK_ATTRIBUTE
static ompt_fns_t *ompt_try_start_tool(unsigned int omp_version,
const char *runtime_version) {
ompt_fns_t *ret = NULL;
ompt_start_tool_t start_tool = NULL;
#if KMP_OS_WINDOWS
// Cannot use colon to describe a list of absolute paths on Windows
const char *sep = ";";
#else
const char *sep = ":";
#endif
// Try in the current address space
if ((ret = ompt_start_tool(omp_version, runtime_version)))
return ret;
// Try tool-libraries-var ICV
const char *tool_libs = getenv("OMP_TOOL_LIBRARIES");
if (tool_libs) {
const char *libs = __kmp_str_format("%s", tool_libs);
char *buf;
char *fname = __kmp_str_token(CCAST(char *, libs), sep, &buf);
while (fname) {
#if KMP_OS_UNIX
void *h = dlopen(fname, RTLD_LAZY);
if (h) {
start_tool = (ompt_start_tool_t)dlsym(h, "ompt_start_tool");
#elif KMP_OS_WINDOWS
HMODULE h = LoadLibrary(fname);
if (h) {
start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool");
#else
#error Activation of OMPT is not supported on this platform.
#endif
if (start_tool && (ret = (*start_tool)(omp_version, runtime_version)))
break;
}
fname = __kmp_str_token(NULL, sep, &buf);
}
__kmp_str_free(&libs);
}
return ret;
}
void ompt_pre_init() {
//--------------------------------------------------
// Execute the pre-initialization logic only once.
@ -194,10 +271,14 @@ void ompt_pre_init() {
case omp_tool_unset:
case omp_tool_enabled:
ompt_initialize_fn = ompt_tool();
if (ompt_initialize_fn) {
ompt_enabled = 1;
}
//--------------------------------------------------
// Load tool iff specified in environment variable
//--------------------------------------------------
ompt_fns =
ompt_try_start_tool(__kmp_openmp_version, ompt_get_runtime_version());
memset(&ompt_enabled, 0, sizeof(ompt_enabled));
break;
case omp_tool_error:
@ -226,31 +307,34 @@ void ompt_post_init() {
//--------------------------------------------------
// Initialize the tool if so indicated.
//--------------------------------------------------
if (ompt_enabled) {
ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(),
OMPT_VERSION);
if (ompt_fns) {
ompt_enabled.enabled = !!ompt_fns->initialize(ompt_fn_lookup, ompt_fns);
ompt_thread_t *root_thread = ompt_get_thread();
ompt_set_thread_state(root_thread, ompt_state_overhead);
ompt_set_thread_state(root_thread, omp_state_overhead);
if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
ompt_thread_initial, ompt_get_thread_id());
if (ompt_enabled.ompt_callback_thread_begin) {
ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
ompt_thread_initial, __ompt_get_thread_data_internal());
}
ompt_data_t *task_data;
__ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
if (ompt_enabled.ompt_callback_task_create) {
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
NULL, NULL, task_data, ompt_task_initial, 0, NULL);
}
ompt_set_thread_state(root_thread, ompt_state_work_serial);
ompt_set_thread_state(root_thread, omp_state_work_serial);
}
}
void ompt_fini() {
if (ompt_enabled) {
if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) {
ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)();
}
if (ompt_enabled.enabled) {
ompt_fns->finalize(ompt_fns);
}
ompt_enabled = 0;
memset(&ompt_enabled, 0, sizeof(ompt_enabled));
}
/*****************************************************************************
@ -261,15 +345,15 @@ void ompt_fini() {
* state
****************************************************************************/
OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state,
const char **next_state_name) {
const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t);
OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state,
const char **next_state_name) {
const static int len = sizeof(omp_state_info) / sizeof(omp_state_info_t);
int i = 0;
for (i = 0; i < len - 1; i++) {
if (ompt_state_info[i].state_id == current_state) {
*next_state = ompt_state_info[i + 1].state_id;
*next_state_name = ompt_state_info[i + 1].state_name;
if (omp_state_info[i].state_id == current_state) {
*next_state = omp_state_info[i + 1].state_id;
*next_state_name = omp_state_info[i + 1].state_name;
return 1;
}
}
@ -277,17 +361,35 @@ OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state,
return 0;
}
OMPT_API_ROUTINE int ompt_enumerate_mutex_impls(int current_impl,
int *next_impl,
const char **next_impl_name) {
const static int len =
sizeof(ompt_mutex_impl_info) / sizeof(ompt_mutex_impl_info_t);
int i = 0;
for (i = 0; i < len - 1; i++) {
if (ompt_mutex_impl_info[i].id != current_impl)
continue;
*next_impl = ompt_mutex_impl_info[i + 1].id;
*next_impl_name = ompt_mutex_impl_info[i + 1].name;
return 1;
}
return 0;
}
/*****************************************************************************
* callbacks
****************************************************************************/
OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) {
switch (evid) {
OMPT_API_ROUTINE int ompt_set_callback(ompt_callbacks_t which,
ompt_callback_t callback) {
switch (which) {
#define ompt_event_macro(event_name, callback_type, event_id) \
case event_name: \
if (ompt_event_implementation_status(event_name)) { \
ompt_callbacks.ompt_callback(event_name) = (callback_type)cb; \
ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \
ompt_enabled.event_name = 1; \
} \
return ompt_event_implementation_status(event_name);
@ -296,12 +398,13 @@ OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) {
#undef ompt_event_macro
default:
return ompt_set_result_registration_error;
return ompt_set_error;
}
}
OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) {
switch (evid) {
OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which,
ompt_callback_t *callback) {
switch (which) {
#define ompt_event_macro(event_name, callback_type, event_id) \
case event_name: \
@ -309,7 +412,7 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) {
ompt_callback_t mycb = \
(ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \
if (mycb) { \
*cb = mycb; \
*callback = mycb; \
return ompt_get_callback_success; \
} \
} \
@ -328,54 +431,149 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) {
* parallel regions
****************************************************************************/
OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) {
return __ompt_get_parallel_id_internal(ancestor_level);
OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level,
ompt_data_t **parallel_data,
int *team_size) {
return __ompt_get_parallel_info_internal(ancestor_level, parallel_data,
team_size);
}
OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) {
return __ompt_get_parallel_team_size_internal(ancestor_level);
}
OMPT_API_ROUTINE omp_state_t ompt_get_state(ompt_wait_id_t *wait_id) {
omp_state_t thread_state = __ompt_get_state_internal(wait_id);
OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) {
return __ompt_get_parallel_function_internal(ancestor_level);
}
OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) {
ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id);
if (thread_state == ompt_state_undefined) {
thread_state = ompt_state_work_serial;
if (thread_state == omp_state_undefined) {
thread_state = omp_state_work_serial;
}
return thread_state;
}
/*****************************************************************************
* threads
****************************************************************************/
OMPT_API_ROUTINE void *ompt_get_idle_frame() {
return __ompt_get_idle_frame_internal();
}
/*****************************************************************************
* tasks
****************************************************************************/
OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) {
return __ompt_get_thread_id_internal();
OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) {
return __ompt_get_thread_data_internal();
}
OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) {
return __ompt_get_task_id_internal(depth);
OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type,
ompt_data_t **task_data,
ompt_frame_t **task_frame,
ompt_data_t **parallel_data,
int *thread_num) {
return __ompt_get_task_info_internal(ancestor_level, type, task_data,
task_frame, parallel_data, thread_num);
}
OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) {
return __ompt_get_task_frame_internal(depth);
/*****************************************************************************
* places
****************************************************************************/
OMPT_API_ROUTINE int ompt_get_num_places(void) {
// copied from kmp_ftn_entry.h (but modified)
#if !KMP_AFFINITY_SUPPORTED
return 0;
#else
if (!KMP_AFFINITY_CAPABLE())
return 0;
return __kmp_affinity_num_masks;
#endif
}
OMPT_API_ROUTINE void *ompt_get_task_function(int depth) {
return __ompt_get_task_function_internal(depth);
OMPT_API_ROUTINE int ompt_get_place_proc_ids(int place_num, int ids_size,
int *ids) {
// copied from kmp_ftn_entry.h (but modified)
#if !KMP_AFFINITY_SUPPORTED
return 0;
#else
int i, count;
int tmp_ids[ids_size];
if (!KMP_AFFINITY_CAPABLE())
return 0;
if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
return 0;
/* TODO: Is this safe for asynchronous call from signal handler during runtime
* shutdown? */
kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
count = 0;
KMP_CPU_SET_ITERATE(i, mask) {
if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) ||
(!KMP_CPU_ISSET(i, mask))) {
continue;
}
if (count < ids_size)
tmp_ids[count] = i;
count++;
}
if (ids_size >= count) {
for (i = 0; i < count; i++) {
ids[i] = tmp_ids[i];
}
}
return count;
#endif
}
OMPT_API_ROUTINE int ompt_get_place_num(void) {
// copied from kmp_ftn_entry.h (but modified)
#if !KMP_AFFINITY_SUPPORTED
return -1;
#else
int gtid;
kmp_info_t *thread;
if (!KMP_AFFINITY_CAPABLE())
return -1;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
if (thread == NULL || thread->th.th_current_place < 0)
return -1;
return thread->th.th_current_place;
#endif
}
OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size,
int *place_nums) {
// copied from kmp_ftn_entry.h (but modified)
#if !KMP_AFFINITY_SUPPORTED
return 0;
#else
int i, gtid, place_num, first_place, last_place, start, end;
kmp_info_t *thread;
if (!KMP_AFFINITY_CAPABLE())
return 0;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
if (thread == NULL)
return 0;
first_place = thread->th.th_first_place;
last_place = thread->th.th_last_place;
if (first_place < 0 || last_place < 0)
return 0;
if (first_place <= last_place) {
start = first_place;
end = last_place;
} else {
start = last_place;
end = first_place;
}
if (end - start <= place_nums_size)
for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) {
place_nums[i] = place_num;
}
return end - start;
#endif
}
/*****************************************************************************
* places
****************************************************************************/
OMPT_API_ROUTINE int ompt_get_proc_id(void) {
#if KMP_OS_LINUX
return sched_getcpu();
#else
return -1;
#endif
}
/*****************************************************************************
@ -435,19 +633,49 @@ OMPT_API_PLACEHOLDER void ompt_mutex_wait(void) {
OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; }
/*****************************************************************************
* application-facing API
* application-facing API
****************************************************************************/
/*----------------------------------------------------------------------------
| control
---------------------------------------------------------------------------*/
_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) {
if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) {
ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier);
int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg) {
if (ompt_enabled.enabled) {
if (ompt_enabled.ompt_callback_control_tool) {
return ompt_callbacks.ompt_callback(ompt_callback_control_tool)(
command, modifier, arg, OMPT_LOAD_RETURN_ADDRESS(__kmp_entry_gtid()));
} else {
return -1;
}
} else {
return -2;
}
}
/*****************************************************************************
* misc
****************************************************************************/
OMPT_API_ROUTINE uint64_t ompt_get_unique_id(void) {
return __ompt_get_unique_id_internal();
}
/*****************************************************************************
* Target
****************************************************************************/
OMPT_API_ROUTINE int ompt_get_target_info(uint64_t *device_num,
ompt_id_t *target_id,
ompt_id_t *host_op_id) {
return 0; // thread is not in a target region
}
OMPT_API_ROUTINE int ompt_get_num_devices(void) {
return 1; // only one device (the current device) is available
}
/*****************************************************************************
* API inquiry for tool
****************************************************************************/
@ -455,8 +683,9 @@ _OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) {
static ompt_interface_fn_t ompt_fn_lookup(const char *s) {
#define ompt_interface_fn(fn) \
fn##_t fn##_f = fn; \
if (strcmp(s, #fn) == 0) \
return (ompt_interface_fn_t)fn;
return (ompt_interface_fn_t)fn##_f;
FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn)

View File

@ -13,19 +13,36 @@
#define ompt_callback(e) e##_callback
typedef struct ompt_callbacks_s {
typedef struct ompt_callbacks_internal_s {
#define ompt_event_macro(event, callback, eventid) \
callback ompt_callback(event);
FOREACH_OMPT_EVENT(ompt_event_macro)
#undef ompt_event_macro
} ompt_callbacks_t;
} ompt_callbacks_internal_t;
typedef struct ompt_callbacks_active_s {
unsigned int enabled : 1;
#define ompt_event_macro(event, callback, eventid) unsigned int event : 1;
FOREACH_OMPT_EVENT(ompt_event_macro)
#undef ompt_event_macro
} ompt_callbacks_active_t;
#define TASK_TYPE_DETAILS_FORMAT(info) \
((info->td_flags.task_serial || info->td_flags.tasking_ser) \
? ompt_task_undeferred \
: 0x0) | \
((!(info->td_flags.tiedness)) ? ompt_task_untied : 0x0) | \
(info->td_flags.final ? ompt_task_final : 0x0) | \
(info->td_flags.merged_if0 ? ompt_task_mergeable : 0x0)
typedef struct {
ompt_frame_t frame;
void *function;
ompt_task_id_t task_id;
ompt_data_t task_data;
struct kmp_taskdata *scheduling_parent;
#if OMP_40_ENABLED
int ndeps;
ompt_task_dependence_t *deps;
@ -33,32 +50,31 @@ typedef struct {
} ompt_task_info_t;
typedef struct {
ompt_parallel_id_t parallel_id;
void *microtask;
ompt_data_t parallel_data;
void *master_return_address;
} ompt_team_info_t;
typedef struct ompt_lw_taskteam_s {
ompt_team_info_t ompt_team_info;
ompt_task_info_t ompt_task_info;
int heap;
struct ompt_lw_taskteam_s *parent;
} ompt_lw_taskteam_t;
typedef struct ompt_parallel_info_s {
ompt_task_id_t parent_task_id; /* id of parent task */
ompt_parallel_id_t parallel_id; /* id of parallel region */
ompt_frame_t *parent_task_frame; /* frame data of parent task */
void *parallel_function; /* pointer to outlined function */
} ompt_parallel_info_t;
typedef struct {
ompt_state_t state;
ompt_data_t thread_data;
ompt_data_t task_data; /* stored here from implicit barrier-begin until
implicit-task-end */
void *return_address; /* stored here on entry of runtime */
omp_state_t state;
ompt_wait_id_t wait_id;
int ompt_task_yielded;
void *idle_frame;
} ompt_thread_info_t;
extern ompt_callbacks_t ompt_callbacks;
extern ompt_callbacks_internal_t ompt_callbacks;
#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_OPTIONAL
#if USE_FAST_MEMORY
#define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate
#define KMP_OMPT_DEPS_FREE __kmp_fast_free
@ -66,7 +82,7 @@ extern ompt_callbacks_t ompt_callbacks;
#define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc
#define KMP_OMPT_DEPS_FREE __kmp_thread_free
#endif
#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_OPTIONAL */
#ifdef __cplusplus
extern "C" {
@ -76,7 +92,20 @@ void ompt_pre_init(void);
void ompt_post_init(void);
void ompt_fini(void);
extern int ompt_enabled;
#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level)
#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level)
int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg);
extern ompt_callbacks_active_t ompt_enabled;
#if KMP_OS_WINDOWS
#define UNLIKELY(x) (x)
#define OMPT_NOINLINE __declspec(noinline)
#else
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#define OMPT_NOINLINE __attribute__((noinline))
#endif
#ifdef __cplusplus
};

View File

@ -3,42 +3,33 @@
//******************************************************************************
#include "kmp.h"
#include "ompt-internal.h"
#include "ompt-specific.h"
#if KMP_OS_UNIX
#include <dlfcn.h>
#endif
#if KMP_OS_WINDOWS
#define THREAD_LOCAL __declspec(thread)
#else
#define THREAD_LOCAL __thread
#endif
//******************************************************************************
// macros
//******************************************************************************
#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t)(id >= 0) ? id + 1 : 0)
#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info;
#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info
#define OMPT_THREAD_ID_BITS 16
// 2013 08 24 - John Mellor-Crummey
// ideally, a thread should assign its own ids based on thread private data.
// however, the way the intel runtime reinitializes thread data structures
// when it creates teams makes it difficult to maintain persistent thread
// data. using a shared variable instead is simple. I leave it to intel to
// sort out how to implement a higher performance version in their runtime.
// when using fetch_and_add to generate the IDs, there isn't any reason to waste
// bits for thread id.
#if 0
#define NEXT_ID(id_ptr, tid) \
((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid))
#else
#define NEXT_ID(id_ptr, tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr))
#endif
//******************************************************************************
// private operations
//******************************************************************************
//----------------------------------------------------------
// traverse the team and task hierarchy
// note: __ompt_get_teaminfo and __ompt_get_taskinfo
// note: __ompt_get_teaminfo and __ompt_get_task_info_object
// traverse the hierarchy similarly and need to be
// kept consistent
//----------------------------------------------------------
@ -51,7 +42,7 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) {
if (team == NULL)
return NULL;
ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team);
ompt_lw_taskteam_t *next_lwt = LWT_FROM_TEAM(team), *lwt = NULL;
while (depth > 0) {
// next lightweight team (if any)
@ -61,9 +52,14 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) {
// next heavyweight team (if any) after
// lightweight teams are exhausted
if (!lwt && team) {
team = team->t.t_parent;
if (team) {
lwt = LWT_FROM_TEAM(team);
if (next_lwt) {
lwt = next_lwt;
next_lwt = NULL;
} else {
team = team->t.t_parent;
if (team) {
next_lwt = LWT_FROM_TEAM(team);
}
}
}
@ -90,13 +86,14 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) {
return NULL;
}
ompt_task_info_t *__ompt_get_taskinfo(int depth) {
ompt_task_info_t *__ompt_get_task_info_object(int depth) {
ompt_task_info_t *info = NULL;
kmp_info_t *thr = ompt_get_thread();
if (thr) {
kmp_taskdata_t *taskdata = thr->th.th_current_task;
ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team);
ompt_lw_taskteam_t *lwt = NULL,
*next_lwt = LWT_FROM_TEAM(taskdata->td_team);
while (depth > 0) {
// next lightweight team (if any)
@ -106,9 +103,59 @@ ompt_task_info_t *__ompt_get_taskinfo(int depth) {
// next heavyweight team (if any) after
// lightweight teams are exhausted
if (!lwt && taskdata) {
taskdata = taskdata->td_parent;
if (taskdata) {
lwt = LWT_FROM_TEAM(taskdata->td_team);
if (next_lwt) {
lwt = next_lwt;
next_lwt = NULL;
} else {
taskdata = taskdata->td_parent;
if (taskdata) {
next_lwt = LWT_FROM_TEAM(taskdata->td_team);
}
}
}
depth--;
}
if (lwt) {
info = &lwt->ompt_task_info;
} else if (taskdata) {
info = &taskdata->ompt_task_info;
}
}
return info;
}
ompt_task_info_t *__ompt_get_scheduling_taskinfo(int depth) {
ompt_task_info_t *info = NULL;
kmp_info_t *thr = ompt_get_thread();
if (thr) {
kmp_taskdata_t *taskdata = thr->th.th_current_task;
ompt_lw_taskteam_t *lwt = NULL,
*next_lwt = LWT_FROM_TEAM(taskdata->td_team);
while (depth > 0) {
// next lightweight team (if any)
if (lwt)
lwt = lwt->parent;
// next heavyweight team (if any) after
// lightweight teams are exhausted
if (!lwt && taskdata) {
// first try scheduling parent (for explicit task scheduling)
if (taskdata->ompt_task_info.scheduling_parent) {
taskdata = taskdata->ompt_task_info.scheduling_parent;
} else if (next_lwt) {
lwt = next_lwt;
next_lwt = NULL;
} else {
// then go for implicit tasks
taskdata = taskdata->td_parent;
if (taskdata) {
next_lwt = LWT_FROM_TEAM(taskdata->td_team);
}
}
}
depth--;
@ -132,29 +179,14 @@ ompt_task_info_t *__ompt_get_taskinfo(int depth) {
// thread support
//----------------------------------------------------------
ompt_parallel_id_t __ompt_thread_id_new() {
static uint64_t ompt_thread_id = 1;
return NEXT_ID(&ompt_thread_id, 0);
}
void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) {
ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
}
void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid) {
ompt_callbacks.ompt_callback(ompt_event_thread_end)(
thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
}
ompt_thread_id_t __ompt_get_thread_id_internal() {
// FIXME: until we have a better way of assigning ids, use __kmp_get_gtid
// since the return value might be negative, we need to test that before
// assigning it to an ompt_thread_id_t, which is unsigned.
int id = __kmp_get_gtid();
assert(id >= 0);
return GTID_TO_OMPT_THREAD_ID(id);
ompt_data_t *__ompt_get_thread_data_internal() {
if (__kmp_get_gtid() >= 0) {
kmp_info_t *thread = ompt_get_thread();
if (thread == NULL)
return NULL;
return &(thread->th.ompt_thread_info.thread_data);
}
return NULL;
}
//----------------------------------------------------------
@ -162,13 +194,12 @@ ompt_thread_id_t __ompt_get_thread_id_internal() {
//----------------------------------------------------------
void __ompt_thread_assign_wait_id(void *variable) {
int gtid = __kmp_gtid_get_specific();
kmp_info_t *ti = ompt_get_thread_gtid(gtid);
kmp_info_t *ti = ompt_get_thread();
ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable;
}
ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) {
omp_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) {
kmp_info_t *ti = ompt_get_thread();
if (ti) {
@ -176,46 +207,26 @@ ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) {
*ompt_wait_id = ti->th.ompt_thread_info.wait_id;
return ti->th.ompt_thread_info.state;
}
return ompt_state_undefined;
}
//----------------------------------------------------------
// idle frame support
//----------------------------------------------------------
void *__ompt_get_idle_frame_internal(void) {
kmp_info_t *ti = ompt_get_thread();
return ti ? ti->th.ompt_thread_info.idle_frame : NULL;
return omp_state_undefined;
}
//----------------------------------------------------------
// parallel region support
//----------------------------------------------------------
ompt_parallel_id_t __ompt_parallel_id_new(int gtid) {
static uint64_t ompt_parallel_id = 1;
return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0;
}
void *__ompt_get_parallel_function_internal(int depth) {
ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
void *function = info ? info->microtask : NULL;
return function;
}
ompt_parallel_id_t __ompt_get_parallel_id_internal(int depth) {
ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
ompt_parallel_id_t id = info ? info->parallel_id : 0;
return id;
}
int __ompt_get_parallel_team_size_internal(int depth) {
// initialize the return value with the error value.
// if there is a team at the specified depth, the default
// value will be overwritten the size of that team.
int size = -1;
(void)__ompt_get_teaminfo(depth, &size);
return size;
int __ompt_get_parallel_info_internal(int ancestor_level,
ompt_data_t **parallel_data,
int *team_size) {
ompt_team_info_t *info;
if (team_size) {
info = __ompt_get_teaminfo(ancestor_level, team_size);
} else {
info = __ompt_get_teaminfo(ancestor_level, NULL);
}
if (parallel_data) {
*parallel_data = info ? &(info->parallel_data) : NULL;
}
return info ? 2 : 0;
}
//----------------------------------------------------------
@ -223,60 +234,182 @@ int __ompt_get_parallel_team_size_internal(int depth) {
//----------------------------------------------------------
void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid,
void *microtask, ompt_parallel_id_t ompt_pid) {
lwt->ompt_team_info.parallel_id = ompt_pid;
lwt->ompt_team_info.microtask = microtask;
lwt->ompt_task_info.task_id = 0;
ompt_data_t *ompt_pid, void *codeptr) {
// initialize parallel_data with input, return address to parallel_data on
// exit
lwt->ompt_team_info.parallel_data = *ompt_pid;
lwt->ompt_team_info.master_return_address = codeptr;
lwt->ompt_task_info.task_data.value = 0;
lwt->ompt_task_info.frame.reenter_runtime_frame = NULL;
lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
lwt->ompt_task_info.function = NULL;
lwt->ompt_task_info.scheduling_parent = NULL;
lwt->ompt_task_info.deps = NULL;
lwt->ompt_task_info.ndeps = 0;
lwt->heap = 0;
lwt->parent = 0;
}
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) {
ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info;
lwt->parent = my_parent;
thr->th.th_team->t.ompt_serialized_team_info = lwt;
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
int on_heap) {
ompt_lw_taskteam_t *link_lwt = lwt;
if (thr->th.th_team->t.t_serialized >
1) { // we already have a team, so link the new team and swap values
if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap
link_lwt =
(ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
}
link_lwt->heap = on_heap;
// would be swap in the (on_stack) case.
ompt_team_info_t tmp_team = lwt->ompt_team_info;
link_lwt->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr);
*OMPT_CUR_TEAM_INFO(thr) = tmp_team;
ompt_task_info_t tmp_task = lwt->ompt_task_info;
link_lwt->ompt_task_info = *OMPT_CUR_TASK_INFO(thr);
*OMPT_CUR_TASK_INFO(thr) = tmp_task;
// link the taskteam into the list of taskteams:
ompt_lw_taskteam_t *my_parent =
thr->th.th_team->t.ompt_serialized_team_info;
link_lwt->parent = my_parent;
thr->th.th_team->t.ompt_serialized_team_info = link_lwt;
} else {
// this is the first serialized team, so we just store the values in the
// team and drop the taskteam-object
*OMPT_CUR_TEAM_INFO(thr) = lwt->ompt_team_info;
*OMPT_CUR_TASK_INFO(thr) = lwt->ompt_task_info;
}
}
ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(kmp_info_t *thr) {
void __ompt_lw_taskteam_unlink(kmp_info_t *thr) {
ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
if (lwtask)
if (lwtask) {
thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent;
return lwtask;
ompt_team_info_t tmp_team = lwtask->ompt_team_info;
lwtask->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr);
*OMPT_CUR_TEAM_INFO(thr) = tmp_team;
ompt_task_info_t tmp_task = lwtask->ompt_task_info;
lwtask->ompt_task_info = *OMPT_CUR_TASK_INFO(thr);
*OMPT_CUR_TASK_INFO(thr) = tmp_task;
if (lwtask->heap) {
__kmp_free(lwtask);
lwtask = NULL;
}
}
// return lwtask;
}
//----------------------------------------------------------
// task support
//----------------------------------------------------------
ompt_task_id_t __ompt_task_id_new(int gtid) {
static uint64_t ompt_task_id = 1;
return NEXT_ID(&ompt_task_id, gtid);
}
int __ompt_get_task_info_internal(int ancestor_level, int *type,
ompt_data_t **task_data,
ompt_frame_t **task_frame,
ompt_data_t **parallel_data,
int *thread_num) {
if (ancestor_level < 0)
return 0;
ompt_task_id_t __ompt_get_task_id_internal(int depth) {
ompt_task_info_t *info = __ompt_get_taskinfo(depth);
ompt_task_id_t task_id = info ? info->task_id : 0;
return task_id;
}
// copied from __ompt_get_scheduling_taskinfo
ompt_task_info_t *info = NULL;
ompt_team_info_t *team_info = NULL;
kmp_info_t *thr = ompt_get_thread();
void *__ompt_get_task_function_internal(int depth) {
ompt_task_info_t *info = __ompt_get_taskinfo(depth);
void *function = info ? info->function : NULL;
return function;
}
if (thr) {
kmp_taskdata_t *taskdata = thr->th.th_current_task;
if (taskdata == NULL)
return 0;
kmp_team *team = thr->th.th_team;
if (team == NULL)
return 0;
ompt_lw_taskteam_t *lwt = NULL,
*next_lwt = LWT_FROM_TEAM(taskdata->td_team);
ompt_frame_t *__ompt_get_task_frame_internal(int depth) {
ompt_task_info_t *info = __ompt_get_taskinfo(depth);
ompt_frame_t *frame = info ? frame = &info->frame : NULL;
return frame;
while (ancestor_level > 0) {
// next lightweight team (if any)
if (lwt)
lwt = lwt->parent;
// next heavyweight team (if any) after
// lightweight teams are exhausted
if (!lwt && taskdata) {
// first try scheduling parent (for explicit task scheduling)
if (taskdata->ompt_task_info.scheduling_parent) {
taskdata = taskdata->ompt_task_info.scheduling_parent;
} else if (next_lwt) {
lwt = next_lwt;
next_lwt = NULL;
} else {
// then go for implicit tasks
taskdata = taskdata->td_parent;
if (team == NULL)
return 0;
team = team->t.t_parent;
if (taskdata) {
next_lwt = LWT_FROM_TEAM(taskdata->td_team);
}
}
}
ancestor_level--;
}
if (lwt) {
info = &lwt->ompt_task_info;
team_info = &lwt->ompt_team_info;
if (type) {
*type = ompt_task_implicit;
}
} else if (taskdata) {
info = &taskdata->ompt_task_info;
team_info = &team->t.ompt_team_info;
if (type) {
if (taskdata->td_parent) {
*type = (taskdata->td_flags.tasktype ? ompt_task_explicit
: ompt_task_implicit) |
TASK_TYPE_DETAILS_FORMAT(taskdata);
} else {
*type = ompt_task_initial;
}
}
}
if (task_data) {
*task_data = info ? &info->task_data : NULL;
}
if (task_frame) {
// OpenMP spec asks for the scheduling task to be returned.
*task_frame = info ? &info->frame : NULL;
}
if (parallel_data) {
*parallel_data = team_info ? &(team_info->parallel_data) : NULL;
}
return info ? 2 : 0;
}
return 0;
}
//----------------------------------------------------------
// team support
//----------------------------------------------------------
void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) {
team->t.ompt_team_info.parallel_id = ompt_pid;
void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid) {
team->t.ompt_team_info.parallel_data = ompt_pid;
}
//----------------------------------------------------------
// misc
//----------------------------------------------------------
static uint64_t __ompt_get_unique_id_internal() {
static uint64_t thread = 1;
static THREAD_LOCAL uint64_t ID = 0;
if (ID == 0) {
uint64_t new_thread = KMP_TEST_THEN_INC64((kmp_int64 *)&thread);
ID = new_thread << (sizeof(uint64_t) * 8 - OMPT_THREAD_ID_BITS);
}
return ++ID;
}

View File

@ -13,42 +13,63 @@ typedef kmp_info_t ompt_thread_t;
* forward declarations
****************************************************************************/
void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid);
void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid);
void __ompt_thread_assign_wait_id(void *variable);
void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr,
int gtid, void *microtask,
ompt_parallel_id_t ompt_pid);
int gtid, ompt_data_t *ompt_pid, void *codeptr);
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr);
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr,
int on_heap);
ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(ompt_thread_t *thr);
ompt_parallel_id_t __ompt_parallel_id_new(int gtid);
ompt_task_id_t __ompt_task_id_new(int gtid);
void __ompt_lw_taskteam_unlink(ompt_thread_t *thr);
ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size);
ompt_task_info_t *__ompt_get_taskinfo(int depth);
ompt_task_info_t *__ompt_get_task_info_object(int depth);
void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid);
int __ompt_get_parallel_info_internal(int ancestor_level,
ompt_data_t **parallel_data,
int *team_size);
void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid);
int __ompt_get_task_info_internal(int ancestor_level, int *type,
ompt_data_t **task_data,
ompt_frame_t **task_frame,
ompt_data_t **parallel_data, int *thread_num);
int __ompt_get_parallel_team_size_internal(int ancestor_level);
ompt_data_t *__ompt_get_thread_data_internal();
ompt_task_id_t __ompt_get_task_id_internal(int depth);
ompt_frame_t *__ompt_get_task_frame_internal(int depth);
static uint64_t __ompt_get_get_unique_id_internal();
/*****************************************************************************
* macros
****************************************************************************/
#define OMPT_CUR_TASK_INFO(thr) (&(thr->th.th_current_task->ompt_task_info))
#define OMPT_CUR_TASK_DATA(thr) \
(&(thr->th.th_current_task->ompt_task_info.task_data))
#define OMPT_CUR_TEAM_INFO(thr) (&(thr->th.th_team->t.ompt_team_info))
#define OMPT_CUR_TEAM_DATA(thr) \
(&(thr->th.th_team->t.ompt_team_info.parallel_data))
#define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE
#define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI
#define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle)
inline void *__ompt_load_return_address(int gtid) {
kmp_info_t *thr = __kmp_threads[gtid];
void *return_address = thr->th.ompt_thread_info.return_address;
thr->th.ompt_thread_info.return_address = NULL;
return return_address;
}
#define OMPT_STORE_RETURN_ADDRESS(gtid) \
if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \
!__kmp_threads[gtid]->th.ompt_thread_info.return_address) \
__kmp_threads[gtid]->th.ompt_thread_info.return_address = \
__builtin_return_address(0)
#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid)
//******************************************************************************
// inline functions
//******************************************************************************
@ -62,7 +83,7 @@ inline ompt_thread_t *ompt_get_thread() {
return ompt_get_thread_gtid(gtid);
}
inline void ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) {
inline void ompt_set_thread_state(ompt_thread_t *thread, omp_state_t state) {
thread->th.ompt_thread_info.state = state;
}

View File

@ -2280,7 +2280,7 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
#endif
) {
#if OMPT_SUPPORT
*exit_frame_ptr = __builtin_frame_address(0);
*exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
#endif
switch (argc) {

View File

@ -34,8 +34,7 @@ endmacro()
pythonize_bool(LIBOMP_USE_HWLOC)
pythonize_bool(LIBOMP_OMPT_SUPPORT)
pythonize_bool(LIBOMP_OMPT_BLAME)
pythonize_bool(LIBOMP_OMPT_TRACE)
pythonize_bool(LIBOMP_OMPT_OPTIONAL)
pythonize_bool(LIBOMP_HAVE_LIBM)
pythonize_bool(LIBOMP_HAVE_LIBATOMIC)

View File

@ -92,15 +92,15 @@ if config.has_ompt:
# for callback.h
config.test_cflags += " -I " + config.test_source_root + "/ompt"
if 'Linux' in config.operating_system:
config.available_features.add("linux")
# to run with icc INTEL_LICENSE_FILE must be set
if 'INTEL_LICENSE_FILE' in os.environ:
config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE']
# substitutions
if config.has_ompt:
config.substitutions.append(("FileCheck", config.test_filecheck))
config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable"))
# substitutions
config.substitutions.append(("%libomp-compile-and-run", \
"%libomp-compile && %libomp-run"))
config.substitutions.append(("%libomp-cxx-compile-and-run", \
@ -109,9 +109,14 @@ config.substitutions.append(("%libomp-cxx-compile", \
"%clangXX %cflags -std=c++11 %s -o %t" + libs))
config.substitutions.append(("%libomp-compile", \
"%clang %cflags %s -o %t" + libs))
config.substitutions.append(("%libomp-tool", \
"%clang %cflags -shared -fPIC -o %T/tool.so" + libs))
config.substitutions.append(("%libomp-run", "%t"))
config.substitutions.append(("%clangXX", config.test_cxx_compiler))
config.substitutions.append(("%clang", config.test_compiler))
config.substitutions.append(("%openmp_flag", config.test_openmp_flag))
config.substitutions.append(("%cflags", config.test_cflags))
if config.has_ompt:
config.substitutions.append(("FileCheck", config.test_filecheck))
config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable"))

View File

@ -11,7 +11,7 @@ config.omp_header_directory = "@LIBOMP_BINARY_DIR@/src"
config.operating_system = "@CMAKE_SYSTEM_NAME@"
config.hwloc_library_dir = "@LIBOMP_HWLOC_LIBRARY_DIR@"
config.using_hwloc = @LIBOMP_USE_HWLOC@
config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_BLAME@ and @LIBOMP_OMPT_TRACE@
config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_OPTIONAL@
config.has_libm = @LIBOMP_HAVE_LIBM@
config.has_libatomic = @LIBOMP_HAVE_LIBATOMIC@

671
openmp/runtime/test/ompt/callback.h Normal file → Executable file
View File

@ -1,119 +1,660 @@
#define _BSD_SOURCE
#include <stdio.h>
#include <inttypes.h>
#include <omp.h>
#include <ompt.h>
#include "ompt-signal.h"
static ompt_get_task_id_t ompt_get_task_id;
static ompt_get_task_frame_t ompt_get_task_frame;
static ompt_get_thread_id_t ompt_get_thread_id;
static ompt_get_parallel_id_t ompt_get_parallel_id;
static const char* ompt_thread_type_t_values[] = {
NULL,
"ompt_thread_initial",
"ompt_thread_worker",
"ompt_thread_other"
};
static const char* ompt_task_status_t_values[] = {
NULL,
"ompt_task_complete",
"ompt_task_yield",
"ompt_task_cancel",
"ompt_task_others"
};
static const char* ompt_cancel_flag_t_values[] = {
"ompt_cancel_parallel",
"ompt_cancel_sections",
"ompt_cancel_do",
"ompt_cancel_taskgroup",
"ompt_cancel_activated",
"ompt_cancel_detected",
"ompt_cancel_discarded_task"
};
static ompt_set_callback_t ompt_set_callback;
static ompt_get_task_info_t ompt_get_task_info;
static ompt_get_thread_data_t ompt_get_thread_data;
static ompt_get_parallel_info_t ompt_get_parallel_info;
static ompt_get_unique_id_t ompt_get_unique_id;
static ompt_get_num_places_t ompt_get_num_places;
static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
static ompt_get_place_num_t ompt_get_place_num;
static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
static ompt_get_proc_id_t ompt_get_proc_id;
static ompt_enumerate_states_t ompt_enumerate_states;
static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
static void print_ids(int level)
{
ompt_frame_t* frame = ompt_get_task_frame(level);
printf("%" PRIu64 ": level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p\n", ompt_get_thread_id(), level, ompt_get_parallel_id(level), ompt_get_task_id(level), frame->exit_runtime_frame, frame->reenter_runtime_frame);
ompt_frame_t* frame ;
ompt_data_t* parallel_data;
ompt_data_t* task_data;
int exists_task = ompt_get_task_info(level, NULL, &task_data, &frame, &parallel_data, NULL);
if (frame)
{
printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p\n", ompt_get_thread_data()->value, level, exists_task ? parallel_data->value : 0, exists_task ? task_data->value : 0, frame->exit_runtime_frame, frame->reenter_runtime_frame);
}
else
printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", frame=%p\n", ompt_get_thread_data()->value, level, exists_task ? parallel_data->value : 0, exists_task ? task_data->value : 0, frame);
}
#define print_frame(level)\
do {\
printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", ompt_get_thread_id(), level, __builtin_frame_address(level));\
printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", ompt_get_thread_data()->value, level, __builtin_frame_address(level));\
} while(0)
#define print_current_address(id)\
{} /* Empty block between "#pragma omp ..." and __asm__ statement as a workaround for icc bug */ \
__asm__("nop"); /* provide an instruction as jump target (compiler would insert an instruction if label is target of a jmp ) */ \
ompt_label_##id:\
printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, (char*)(&& ompt_label_##id)-1, (char*)(&& ompt_label_##id)-4)
/* "&& label" returns the address of the label (GNU extension); works with gcc, clang, icc */
/* for void-type runtime function, the label is after the nop (-1), for functions with return value, there is a mov instruction before the label (-4) */
static void
on_ompt_event_barrier_begin(
ompt_parallel_id_t parallel_id,
ompt_task_id_t task_id)
#define print_fuzzy_address(id)\
{} /* Empty block between "#pragma omp ..." and __asm__ statement as a workaround for icc bug */ \
__asm__("nop"); /* provide an instruction as jump target (compiler would insert an instruction if label is target of a jmp ) */ \
ompt_label_##id:\
printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_##id))/256-1, ((uint64_t)(char*)(&& ompt_label_##id))/256)
/* "&& label" returns the address of the label (GNU extension); works with gcc, clang, icc */
/* for void-type runtime function, the label is after the nop (-1), for functions with return value, there is a mov instruction before the label (-4) */
static void format_task_type(int type, char* buffer)
{
printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id);
print_ids(0);
char* progress = buffer;
if(type & ompt_task_initial) progress += sprintf(progress, "ompt_task_initial");
if(type & ompt_task_implicit) progress += sprintf(progress, "ompt_task_implicit");
if(type & ompt_task_explicit) progress += sprintf(progress, "ompt_task_explicit");
if(type & ompt_task_target) progress += sprintf(progress, "ompt_task_target");
if(type & ompt_task_undeferred) progress += sprintf(progress, "|ompt_task_undeferred");
if(type & ompt_task_untied) progress += sprintf(progress, "|ompt_task_untied");
if(type & ompt_task_final) progress += sprintf(progress, "|ompt_task_final");
if(type & ompt_task_mergeable) progress += sprintf(progress, "|ompt_task_mergeable");
if(type & ompt_task_merged) progress += sprintf(progress, "|ompt_task_merged");
}
static void
on_ompt_event_barrier_end(
ompt_parallel_id_t parallel_id,
ompt_task_id_t task_id)
on_ompt_callback_mutex_acquire(
ompt_mutex_kind_t kind,
unsigned int hint,
unsigned int impl,
ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id);
switch(kind)
{
case ompt_mutex_lock:
printf("%" PRIu64 ": ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
break;
case ompt_mutex_nest_lock:
printf("%" PRIu64 ": ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
break;
case ompt_mutex_critical:
printf("%" PRIu64 ": ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
break;
case ompt_mutex_atomic:
printf("%" PRIu64 ": ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
break;
case ompt_mutex_ordered:
printf("%" PRIu64 ": ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
break;
default:
break;
}
}
static void
on_ompt_event_implicit_task_begin(
ompt_parallel_id_t parallel_id,
ompt_task_id_t task_id)
on_ompt_callback_mutex_acquired(
ompt_mutex_kind_t kind,
ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id);
switch(kind)
{
case ompt_mutex_lock:
printf("%" PRIu64 ": ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
case ompt_mutex_nest_lock:
printf("%" PRIu64 ": ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
case ompt_mutex_critical:
printf("%" PRIu64 ": ompt_event_acquired_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
case ompt_mutex_atomic:
printf("%" PRIu64 ": ompt_event_acquired_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
case ompt_mutex_ordered:
printf("%" PRIu64 ": ompt_event_acquired_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
default:
break;
}
}
static void
on_ompt_event_implicit_task_end(
ompt_parallel_id_t parallel_id,
ompt_task_id_t task_id)
on_ompt_callback_mutex_released(
ompt_mutex_kind_t kind,
ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id);
switch(kind)
{
case ompt_mutex_lock:
printf("%" PRIu64 ": ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
case ompt_mutex_nest_lock:
printf("%" PRIu64 ": ompt_event_release_nest_lock_last: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
case ompt_mutex_critical:
printf("%" PRIu64 ": ompt_event_release_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
case ompt_mutex_atomic:
printf("%" PRIu64 ": ompt_event_release_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
case ompt_mutex_ordered:
printf("%" PRIu64 ": ompt_event_release_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
default:
break;
}
}
static void
on_ompt_event_loop_begin(
ompt_parallel_id_t parallel_id,
ompt_task_id_t parent_task_id,
void *workshare_function)
on_ompt_callback_nest_lock(
ompt_scope_endpoint_t endpoint,
ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", workshare_function=%p\n", ompt_get_thread_id(), parallel_id, parent_task_id, workshare_function);
switch(endpoint)
{
case ompt_scope_begin:
printf("%" PRIu64 ": ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
case ompt_scope_end:
printf("%" PRIu64 ": ompt_event_release_nest_lock_prev: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
}
}
static void
on_ompt_event_loop_end(
ompt_parallel_id_t parallel_id,
ompt_task_id_t task_id)
on_ompt_callback_sync_region(
ompt_sync_region_kind_t kind,
ompt_scope_endpoint_t endpoint,
ompt_data_t *parallel_data,
ompt_data_t *task_data,
const void *codeptr_ra)
{
printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id);
switch(endpoint)
{
case ompt_scope_begin:
switch(kind)
{
case ompt_sync_region_barrier:
printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
print_ids(0);
break;
case ompt_sync_region_taskwait:
printf("%" PRIu64 ": ompt_event_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
break;
case ompt_sync_region_taskgroup:
printf("%" PRIu64 ": ompt_event_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
break;
}
break;
case ompt_scope_end:
switch(kind)
{
case ompt_sync_region_barrier:
printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
break;
case ompt_sync_region_taskwait:
printf("%" PRIu64 ": ompt_event_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
break;
case ompt_sync_region_taskgroup:
printf("%" PRIu64 ": ompt_event_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
break;
}
break;
}
}
static void
on_ompt_event_parallel_begin(
ompt_task_id_t parent_task_id,
ompt_frame_t *parent_task_frame,
ompt_parallel_id_t parallel_id,
on_ompt_callback_sync_region_wait(
ompt_sync_region_kind_t kind,
ompt_scope_endpoint_t endpoint,
ompt_data_t *parallel_data,
ompt_data_t *task_data,
const void *codeptr_ra)
{
switch(endpoint)
{
case ompt_scope_begin:
switch(kind)
{
case ompt_sync_region_barrier:
printf("%" PRIu64 ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
break;
case ompt_sync_region_taskwait:
printf("%" PRIu64 ": ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
break;
case ompt_sync_region_taskgroup:
printf("%" PRIu64 ": ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
break;
}
break;
case ompt_scope_end:
switch(kind)
{
case ompt_sync_region_barrier:
printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
break;
case ompt_sync_region_taskwait:
printf("%" PRIu64 ": ompt_event_wait_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
break;
case ompt_sync_region_taskgroup:
printf("%" PRIu64 ": ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
break;
}
break;
}
}
static void
on_ompt_callback_flush(
ompt_data_t *thread_data,
const void *codeptr_ra)
{
printf("%" PRIu64 ": ompt_event_flush: codeptr_ra=%p\n", thread_data->value, codeptr_ra);
}
static void
on_ompt_callback_cancel(
ompt_data_t *task_data,
int flags,
const void *codeptr_ra)
{
const char* first_flag_value;
const char* second_flag_value;
if(flags & ompt_cancel_parallel)
first_flag_value = ompt_cancel_flag_t_values[0];
else if(flags & ompt_cancel_sections)
first_flag_value = ompt_cancel_flag_t_values[1];
else if(flags & ompt_cancel_do)
first_flag_value = ompt_cancel_flag_t_values[2];
else if(flags & ompt_cancel_taskgroup)
first_flag_value = ompt_cancel_flag_t_values[3];
if(flags & ompt_cancel_activated)
second_flag_value = ompt_cancel_flag_t_values[4];
else if(flags & ompt_cancel_detected)
second_flag_value = ompt_cancel_flag_t_values[5];
else if(flags & ompt_cancel_discarded_task)
second_flag_value = ompt_cancel_flag_t_values[6];
printf("%" PRIu64 ": ompt_event_cancel: task_data=%" PRIu64 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, task_data->value, first_flag_value, second_flag_value, flags, codeptr_ra);
}
static void
on_ompt_callback_idle(
ompt_scope_endpoint_t endpoint)
{
switch(endpoint)
{
case ompt_scope_begin:
printf("%" PRIu64 ": ompt_event_idle_begin:\n", ompt_get_thread_data()->value);
break;
case ompt_scope_end:
printf("%" PRIu64 ": ompt_event_idle_end:\n", ompt_get_thread_data()->value);
break;
}
}
static void
on_ompt_callback_implicit_task(
ompt_scope_endpoint_t endpoint,
ompt_data_t *parallel_data,
ompt_data_t *task_data,
unsigned int team_size,
unsigned int thread_num)
{
switch(endpoint)
{
case ompt_scope_begin:
if(task_data->ptr)
printf("%s\n", "0: task_data initially not null");
task_data->value = ompt_get_unique_id();
printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num);
break;
case ompt_scope_end:
printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num);
break;
}
}
static void
on_ompt_callback_lock_init(
ompt_mutex_kind_t kind,
unsigned int hint,
unsigned int impl,
ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
switch(kind)
{
case ompt_mutex_lock:
printf("%" PRIu64 ": ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
break;
case ompt_mutex_nest_lock:
printf("%" PRIu64 ": ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
break;
default:
break;
}
}
static void
on_ompt_callback_lock_destroy(
ompt_mutex_kind_t kind,
ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
switch(kind)
{
case ompt_mutex_lock:
printf("%" PRIu64 ": ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
case ompt_mutex_nest_lock:
printf("%" PRIu64 ": ompt_event_destroy_nest_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
break;
default:
break;
}
}
static void
on_ompt_callback_work(
ompt_work_type_t wstype,
ompt_scope_endpoint_t endpoint,
ompt_data_t *parallel_data,
ompt_data_t *task_data,
uint64_t count,
const void *codeptr_ra)
{
switch(endpoint)
{
case ompt_scope_begin:
switch(wstype)
{
case ompt_work_loop:
printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_work_sections:
printf("%" PRIu64 ": ompt_event_sections_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_work_single_executor:
printf("%" PRIu64 ": ompt_event_single_in_block_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_work_single_other:
printf("%" PRIu64 ": ompt_event_single_others_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_work_workshare:
//impl
break;
case ompt_work_distribute:
printf("%" PRIu64 ": ompt_event_distribute_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_work_taskloop:
//impl
printf("%" PRIu64 ": ompt_event_taskloop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
}
break;
case ompt_scope_end:
switch(wstype)
{
case ompt_work_loop:
printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_work_sections:
printf("%" PRIu64 ": ompt_event_sections_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_work_single_executor:
printf("%" PRIu64 ": ompt_event_single_in_block_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_work_single_other:
printf("%" PRIu64 ": ompt_event_single_others_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_work_workshare:
//impl
break;
case ompt_work_distribute:
printf("%" PRIu64 ": ompt_event_distribute_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_work_taskloop:
//impl
printf("%" PRIu64 ": ompt_event_taskloop_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
break;
}
break;
}
}
static void
on_ompt_callback_master(
ompt_scope_endpoint_t endpoint,
ompt_data_t *parallel_data,
ompt_data_t *task_data,
const void *codeptr_ra)
{
switch(endpoint)
{
case ompt_scope_begin:
printf("%" PRIu64 ": ompt_event_master_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
break;
case ompt_scope_end:
printf("%" PRIu64 ": ompt_event_master_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
break;
}
}
static void
on_ompt_callback_parallel_begin(
ompt_data_t *parent_task_data,
const ompt_frame_t *parent_task_frame,
ompt_data_t* parallel_data,
uint32_t requested_team_size,
void *parallel_function,
ompt_invoker_t invoker)
ompt_invoker_t invoker,
const void *codeptr_ra)
{
printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", parallel_function=%p, invoker=%d\n", ompt_get_thread_id(), parent_task_id, parent_task_frame->exit_runtime_frame, parent_task_frame->reenter_runtime_frame, parallel_id, requested_team_size, parallel_function, invoker);
if(parallel_data->ptr)
printf("%s\n", "0: parallel_data initially not null");
parallel_data->value = ompt_get_unique_id();
printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", ompt_get_thread_data()->value, parent_task_data->value, parent_task_frame->exit_runtime_frame, parent_task_frame->reenter_runtime_frame, parallel_data->value, requested_team_size, codeptr_ra, invoker);
}
static void
on_ompt_event_parallel_end(
ompt_parallel_id_t parallel_id,
ompt_task_id_t task_id,
ompt_invoker_t invoker)
on_ompt_callback_parallel_end(
ompt_data_t *parallel_data,
ompt_data_t *task_data,
ompt_invoker_t invoker,
const void *codeptr_ra)
{
printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d\n", ompt_get_thread_id(), parallel_id, task_id, invoker);
printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, invoker, codeptr_ra);
}
static void
on_ompt_callback_task_create(
ompt_data_t *parent_task_data, /* id of parent task */
const ompt_frame_t *parent_frame, /* frame data for parent task */
ompt_data_t* new_task_data, /* id of created task */
int type,
int has_dependences,
const void *codeptr_ra) /* pointer to outlined function */
{
if(new_task_data->ptr)
printf("%s\n", "0: new_task_data initially not null");
new_task_data->value = ompt_get_unique_id();
char buffer[2048];
void ompt_initialize(
format_task_type(type, buffer);
//there is no paralllel_begin callback for implicit parallel region
//thus it is initialized in initial task
if(type & ompt_task_initial)
{
ompt_data_t *parallel_data;
ompt_get_parallel_info(0, &parallel_data, NULL);
if(parallel_data->ptr)
printf("%s\n", "0: parallel_data initially not null");
parallel_data->value = ompt_get_unique_id();
}
printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, parent_task_data ? parent_task_data->value : 0, parent_frame ? parent_frame->exit_runtime_frame : NULL, parent_frame ? parent_frame->reenter_runtime_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no");
}
static void
on_ompt_callback_task_schedule(
ompt_data_t *first_task_data,
ompt_task_status_t prior_task_status,
ompt_data_t *second_task_data)
{
printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status);
if(prior_task_status == ompt_task_complete)
{
printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value);
}
}
static void
on_ompt_callback_task_dependences(
ompt_data_t *task_data,
const ompt_task_dependence_t *deps,
int ndeps)
{
printf("%" PRIu64 ": ompt_event_task_dependences: task_id=%" PRIu64 ", deps=%p, ndeps=%d\n", ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps);
}
static void
on_ompt_callback_task_dependence(
ompt_data_t *first_task_data,
ompt_data_t *second_task_data)
{
printf("%" PRIu64 ": ompt_event_task_dependence_pair: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value);
}
static void
on_ompt_callback_thread_begin(
ompt_thread_type_t thread_type,
ompt_data_t *thread_data)
{
if(thread_data->ptr)
printf("%s\n", "0: thread_data initially not null");
thread_data->value = ompt_get_unique_id();
printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value);
}
static void
on_ompt_callback_thread_end(
ompt_data_t *thread_data)
{
printf("%" PRIu64 ": ompt_event_thread_end: thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, thread_data->value);
}
static int
on_ompt_callback_control_tool(
uint64_t command,
uint64_t modifier,
void *arg,
const void *codeptr_ra)
{
ompt_frame_t* omptTaskFrame;
ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_runtime_frame, omptTaskFrame->reenter_runtime_frame);
return 0; //success
}
#define register_callback_t(name, type) \
do{ \
type f_##name = &on_##name; \
if (ompt_set_callback(name, (ompt_callback_t)f_##name) == \
ompt_set_never) \
printf("0: Could not register callback '" #name "'\n"); \
}while(0)
#define register_callback(name) register_callback_t(name, name##_t)
int ompt_initialize(
ompt_function_lookup_t lookup,
const char *runtime_version,
unsigned int ompt_version)
ompt_fns_t* fns)
{
ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
ompt_get_task_id = (ompt_get_task_id_t) lookup("ompt_get_task_id");
ompt_get_task_frame = (ompt_get_task_frame_t) lookup("ompt_get_task_frame");
ompt_get_thread_id = (ompt_get_thread_id_t) lookup("ompt_get_thread_id");
ompt_get_parallel_id = (ompt_get_parallel_id_t) lookup("ompt_get_parallel_id");
ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
ompt_set_callback(ompt_event_barrier_begin, (ompt_callback_t) &on_ompt_event_barrier_begin);
ompt_set_callback(ompt_event_barrier_end, (ompt_callback_t) &on_ompt_event_barrier_end);
ompt_set_callback(ompt_event_implicit_task_begin, (ompt_callback_t) &on_ompt_event_implicit_task_begin);
ompt_set_callback(ompt_event_implicit_task_end, (ompt_callback_t) &on_ompt_event_implicit_task_end);
ompt_set_callback(ompt_event_loop_begin, (ompt_callback_t) &on_ompt_event_loop_begin);
ompt_set_callback(ompt_event_loop_end, (ompt_callback_t) &on_ompt_event_loop_end);
ompt_set_callback(ompt_event_parallel_begin, (ompt_callback_t) &on_ompt_event_parallel_begin);
ompt_set_callback(ompt_event_parallel_end, (ompt_callback_t) &on_ompt_event_parallel_end);
printf("0: NULL_POINTER=%p\n", NULL);
ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
register_callback(ompt_callback_mutex_acquire);
register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
register_callback(ompt_callback_nest_lock);
register_callback(ompt_callback_sync_region);
register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
register_callback(ompt_callback_control_tool);
register_callback(ompt_callback_flush);
register_callback(ompt_callback_cancel);
register_callback(ompt_callback_idle);
register_callback(ompt_callback_implicit_task);
register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
register_callback(ompt_callback_work);
register_callback(ompt_callback_master);
register_callback(ompt_callback_parallel_begin);
register_callback(ompt_callback_parallel_end);
register_callback(ompt_callback_task_create);
register_callback(ompt_callback_task_schedule);
register_callback(ompt_callback_task_dependences);
register_callback(ompt_callback_task_dependence);
register_callback(ompt_callback_thread_begin);
register_callback(ompt_callback_thread_end);
printf("0: NULL_POINTER=%p\n", (void*)NULL);
return 1; //success
}
ompt_initialize_t ompt_tool()
void ompt_finalize(ompt_fns_t* fns)
{
return &ompt_initialize;
printf("0: ompt_event_runtime_shutdown\n");
}
ompt_fns_t* ompt_start_tool(
unsigned int omp_version,
const char *runtime_version)
{
static ompt_fns_t ompt_fns = {&ompt_initialize,&ompt_finalize};
return &ompt_fns;
}

View File

@ -0,0 +1,42 @@
// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
// Current GOMP interface implementation does not support cancellation
// XFAIL: gcc
#include "callback.h"
#include "omp.h"
int main()
{
#pragma omp parallel num_threads(2)
{
if(omp_get_thread_num() == 0)
{
printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_1))/256-1, ((uint64_t)(char*)(&& ompt_label_1))/256);
#pragma omp cancel parallel
print_fuzzy_address(1); //does not actually print the address but provides a label
}
else
{
delay(100);
printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_2))/256-1, ((uint64_t)(char*)(&& ompt_label_2))/256);
#pragma omp cancellation point parallel
print_fuzzy_address(2); //does not actually print the address but provides a label
}
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_activated=17, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: fuzzy_address={{.*}}[[OTHER_RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_detected=33, codeptr_ra=[[OTHER_RETURN_ADDRESS]]{{[0-f][0-f]}}
return 0;
}

View File

@ -0,0 +1,88 @@
// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
// Current GOMP interface implementation does not support cancellation
// XFAIL: gcc
#include "callback.h"
#include <unistd.h>
#include <stdio.h>
int main()
{
int condition=0;
#pragma omp parallel num_threads(2)
{}
print_frame(0);
#pragma omp parallel num_threads(2)
{
#pragma omp master
{
#pragma omp taskgroup
{
#pragma omp task shared(condition)
{
printf("start execute task 1\n");
OMPT_SIGNAL(condition);
OMPT_WAIT(condition,2);
#pragma omp cancellation point taskgroup
printf("end execute task 1\n");
}
#pragma omp task shared(condition)
{
printf("start execute task 2\n");
OMPT_SIGNAL(condition);
OMPT_WAIT(condition,2);
#pragma omp cancellation point taskgroup
printf("end execute task 2\n");
}
#pragma omp task shared(condition)
{
printf("start execute task 3\n");
OMPT_SIGNAL(condition);
OMPT_WAIT(condition,2);
#pragma omp cancellation point taskgroup
printf("end execute task 3\n");
}
#pragma omp task if(0) shared(condition)
{
printf("start execute task 4\n");
OMPT_WAIT(condition,1);
#pragma omp cancel taskgroup
printf("end execute task 4\n");
}
OMPT_SIGNAL(condition);
}
}
#pragma omp barrier
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[FIRST_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[SECOND_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[CANCEL_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[PARENT_TASK_ID]], second_task_id=[[CANCEL_TASK_ID]], prior_task_status=ompt_task_others=4
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[CANCEL_TASK_ID]], flags=ompt_cancel_taskgroup|ompt_cancel_activated=24, codeptr_ra={{0x[0-f]*}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[CANCEL_TASK_ID]], second_task_id=[[PARENT_TASK_ID]], prior_task_status=ompt_task_cancel=3
// CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]]
// CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]]
// CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_detected=40, codeptr_ra={{0x[0-f]*}}
return 0;
}

View File

@ -0,0 +1,68 @@
// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
// Current GOMP interface implementation does not support cancellation
// XFAIL: gcc
#include "callback.h"
#include <unistd.h>
int main()
{
int condition=0;
#pragma omp parallel num_threads(2)
{
int x = 0;
int i;
#pragma omp for
for(i = 0; i < 2; i++)
{
if(i == 0)
{
x++;
OMPT_SIGNAL(condition);
#pragma omp cancel for
}
else
{
x++;
OMPT_WAIT(condition,1);
delay(10000);
#pragma omp cancellation point for
}
}
}
#pragma omp parallel num_threads(2)
{
#pragma omp sections
{
#pragma omp section
{
OMPT_SIGNAL(condition);
#pragma omp cancel sections
}
#pragma omp section
{
OMPT_WAIT(condition,2);
delay(10000);
#pragma omp cancellation point sections
}
}
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
// cancel for and sections
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_activated=20, codeptr_ra={{0x[0-f]*}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_activated=18, codeptr_ra={{0x[0-f]*}}
// CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_detected=36, codeptr_ra={{0x[0-f]*}}
// CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_detected=34, codeptr_ra={{0x[0-f]*}}
return 0;
}

View File

@ -0,0 +1,59 @@
// RUN: %libomp-compile -DCODE && %libomp-compile -DTOOL -o%T/tool.so -shared -fPIC && env OMP_TOOL_LIBRARIES=%T/tool.so %libomp-run | FileCheck %s
// REQUIRES: ompt
/*
* This file contains code for an OMPT shared library tool to be
* loaded and the code for the OpenMP executable.
* -DTOOL enables the code for the tool during compilation
* -DCODE enables the code for the executable during compilation
* The RUN line compiles the two binaries and then tries to load
* the tool using the OMP_TOOL_LIBRARIES environmental variable.
*/
#ifdef CODE
#include "omp.h"
int main()
{
#pragma omp parallel num_threads(2)
{
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}0: ompt_event_runtime_shutdown
return 0;
}
#endif /* CODE */
#ifdef TOOL
#include <stdio.h>
#include <ompt.h>
int ompt_initialize(
ompt_function_lookup_t lookup,
ompt_fns_t* fns)
{
printf("0: NULL_POINTER=%p\n", (void*)NULL);
return 1; //success
}
void ompt_finalize(ompt_fns_t* fns)
{
printf("%d: ompt_event_runtime_shutdown\n", omp_get_thread_num());
}
ompt_fns_t* ompt_start_tool(
unsigned int omp_version,
const char *runtime_version)
{
static ompt_fns_t ompt_fns = {&ompt_initialize,&ompt_finalize};
return &ompt_fns;
}
#endif /* TOOL */

View File

@ -0,0 +1,66 @@
// RUN: %libomp-compile && env OMP_PLACES=cores %libomp-run | FileCheck %s
// REQUIRES: ompt, linux
#include "callback.h"
#include <omp.h>
#define __USE_GNU
#include <sched.h>
#undef __USE_GNU
void print_list(char* function_name, int list[])
{
printf("%" PRIu64 ": %s(0)=(%d", ompt_get_thread_data()->value, function_name, list[0]);
int i;
for(i = 1; i < omp_get_place_num_procs(0); i++)
{
printf(",%d", list[i]);
}
printf(")\n");
}
int main()
{
#pragma omp parallel num_threads(1)
{
printf("%" PRIu64 ": omp_get_num_places()=%d\n", ompt_get_thread_data()->value, omp_get_num_places());
printf("%" PRIu64 ": ompt_get_num_places()=%d\n", ompt_get_thread_data()->value, ompt_get_num_places());
int omp_ids[omp_get_place_num_procs(0)];
omp_get_place_proc_ids(0, omp_ids);
print_list("omp_get_place_proc_ids" ,omp_ids);
int ompt_ids[omp_get_place_num_procs(0)];
ompt_get_place_proc_ids(0, omp_get_place_num_procs(0), ompt_ids);
print_list("ompt_get_place_proc_ids", ompt_ids);
printf("%" PRIu64 ": omp_get_place_num()=%d\n", ompt_get_thread_data()->value, omp_get_place_num());
printf("%" PRIu64 ": ompt_get_place_num()=%d\n", ompt_get_thread_data()->value, ompt_get_place_num());
int omp_nums[omp_get_partition_num_places()];
omp_get_partition_place_nums(omp_nums);
print_list("omp_get_partition_place_nums" ,omp_nums);
int ompt_nums[omp_get_partition_num_places()];
ompt_get_partition_place_nums(omp_get_partition_num_places(), ompt_nums);
print_list("ompt_get_partition_place_nums", ompt_nums);
printf("%" PRIu64 ": sched_getcpu()=%d\n", ompt_get_thread_data()->value, sched_getcpu());
printf("%" PRIu64 ": ompt_get_proc_id()=%d\n", ompt_get_thread_data()->value, ompt_get_proc_id());
}
// Check if libomp supports the callbacks for this test.
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: omp_get_num_places()=[[NUM_PLACES:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_places()=[[NUM_PLACES]]
// CHECK: {{^}}[[MASTER_ID]]: omp_get_place_proc_ids(0)=([[PROC_IDS:[0-9\,]+]])
// CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_proc_ids(0)=([[PROC_IDS]])
// CHECK: {{^}}[[MASTER_ID]]: omp_get_place_num()=[[PLACE_NUM:[-]?[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_num()=[[PLACE_NUM]]
// CHECK: {{^}}[[MASTER_ID]]: sched_getcpu()=[[CPU_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_get_proc_id()=[[CPU_ID]]
return 0;
}

View File

@ -0,0 +1,27 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
#pragma omp parallel num_threads(1)
{
print_frame(1);
print_frame(0);
omp_control_tool(omp_control_tool_flush, 1, NULL);
print_current_address(0);
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_control_tool'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(1)=[[EXIT_FRAME:0x[0-f]*]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER_FRAME:0x[0-f]*]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter=[[REENTER_FRAME]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,12 @@
// RUN: %libomp-compile-and-run
#include <omp.h>
int main()
{
#pragma omp parallel num_threads(1)
{
omp_control_tool(omp_control_tool_flush, 1, NULL);
}
return 0;
}

View File

@ -0,0 +1,32 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int x = 0;
#pragma omp parallel num_threads(3)
{
#pragma omp atomic
x++;
}
#pragma omp parallel num_threads(2)
{
#pragma omp atomic
x++;
}
printf("x=%d\n", x);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_idle'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_idle_begin:
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_idle_end:
return 0;
}

View File

@ -0,0 +1,31 @@
#if defined(WIN32) || defined(_WIN32)
#include <windows.h>
#define delay() Sleep(1);
#else
#include <unistd.h>
#define delay(t) usleep(t);
#endif
// These functions are used to provide a signal-wait mechanism to enforce expected scheduling for the test cases.
// Conditional variable (s) needs to be shared! Initialize to 0
#define OMPT_SIGNAL(s) ompt_signal(&s)
//inline
void ompt_signal(int* s)
{
#pragma omp atomic
(*s)++;
}
#define OMPT_WAIT(s,v) ompt_wait(&s,v)
// wait for s >= v
//inline
void ompt_wait(int *s, int v)
{
int wait=0;
do{
delay(10);
#pragma omp atomic read
wait = (*s);
}while(wait<v);
}

View File

@ -0,0 +1,43 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
int main()
{
omp_set_dynamic(1);
#pragma omp parallel num_threads(4)
{
print_ids(0);
print_ids(1);
}
print_fuzzy_address(1);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
//team-size of 1-4 is expected
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[1-4]}}
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,43 @@
// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
int main()
{
omp_set_dynamic(1);
#pragma omp parallel num_threads(4)
{
print_ids(0);
print_ids(1);
}
print_fuzzy_address(1);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
//team-size of 1-4 is expected
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[1-4]}}
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,72 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
omp_set_nested(1);
omp_set_max_active_levels(1);
#pragma omp parallel num_threads(2)
{
print_ids(0);
print_ids(1);
#pragma omp parallel num_threads(2)
{
print_ids(0);
print_ids(1);
print_ids(2);
}
print_fuzzy_address(1);
}
print_fuzzy_address(2);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// THREADS: 0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -3,9 +3,11 @@
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
#include <unistd.h>
int main()
{
int condition=0;
omp_set_nested(1);
print_frame(0);
@ -15,6 +17,10 @@ int main()
print_ids(0);
print_ids(1);
print_frame(0);
//get all implicit task events before starting nested:
#pragma omp barrier
#pragma omp parallel num_threads(4)
{
print_frame(1);
@ -22,17 +28,38 @@ int main()
print_ids(1);
print_ids(2);
print_frame(0);
OMPT_SIGNAL(condition);
OMPT_WAIT(condition,16);
#pragma omp barrier
print_fuzzy_address(1);
print_ids(0);
}
print_fuzzy_address(2);
print_ids(0);
}
print_fuzzy_address(3);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
@ -46,219 +73,224 @@ int main()
// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// nested parallel masters
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[NESTED_EXIT:0x[0-f]+]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
// THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
// THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
// explicit barrier
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
// implicit barrier
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// implicit barrier
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// nested parallel worker threads
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -3,35 +3,59 @@
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
#include <unistd.h>
int main()
{
omp_set_nested(1);
int condition;
#pragma omp parallel num_threads(4)
{
print_ids(0);
print_ids(1);
//get all implicit task events before starting nested:
#pragma omp barrier
#pragma omp parallel num_threads(1)
{
print_ids(0);
print_ids(1);
print_ids(2);
//get all implicit task events before starting nested:
#pragma omp barrier
#pragma omp parallel num_threads(4)
{
print_ids(0);
print_ids(1);
print_ids(2);
print_ids(3);
OMPT_SIGNAL(condition);
OMPT_WAIT(condition,16);
}
print_fuzzy_address(1);
}
print_fuzzy_address(2);
}
print_fuzzy_address(3);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
@ -48,251 +72,261 @@ int main()
// THREADS: 0: NULL_POINTER=[[NULL:.*$]]
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// nested parallel masters
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[MASTER_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// nested parallel worker threads
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// can't reliably tell which parallel region is the parent...
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
// THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;

View File

@ -18,13 +18,29 @@ int main()
print_ids(1);
print_ids(2);
}
print_fuzzy_address(1);
}
print_fuzzy_address(2);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
@ -41,67 +57,71 @@ int main()
// THREADS: 0: NULL_POINTER=[[NULL:.*$]]
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -0,0 +1,95 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s
// REQUIRES: ompt
#include "callback.h"
int main()
{
omp_set_num_threads(4);
#pragma omp parallel
{
print_ids(0);
print_ids(1);
}
print_fuzzy_address(1);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
// CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: 0: NULL_POINTER=[[NULL:.*$]]
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=(nil), parent_task_frame.reenter=(nil), new_task_id=281474976710658, codeptr_ra=(nil), task_type=ompt_task_initial=1, has_dependences=no
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}}
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -10,12 +10,27 @@ int main()
print_ids(0);
print_ids(1);
}
print_fuzzy_address(1);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
@ -28,43 +43,48 @@ int main()
// CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// THREADS: 0: NULL_POINTER=[[NULL:.*$]]
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker={{.*}}
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{.*}}
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -0,0 +1,76 @@
// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | FileCheck %s
// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | %sort-threads | FileCheck --check-prefix=THREADS %s
// REQUIRES: ompt
#include "callback.h"
int main()
{
#pragma omp parallel num_threads(4)
{
print_ids(0);
print_ids(1);
}
print_fuzzy_address(1);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
// CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS: 0: NULL_POINTER=[[NULL:.*$]]
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}}
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
// THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -0,0 +1,75 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
int main()
{
// print_frame(0);
#pragma omp parallel if(0)
{
// print_frame(1);
print_ids(0);
print_ids(1);
// print_frame(0);
#pragma omp parallel if(0)
{
// print_frame(1);
print_ids(0);
print_ids(1);
print_ids(2);
// print_frame(0);
#pragma omp task
{
// print_frame(1);
print_ids(0);
print_ids(1);
print_ids(2);
print_ids(3);
}
}
print_fuzzy_address(1);
}
print_fuzzy_address(2);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -4,21 +4,73 @@
int main()
{
// print_frame(0);
#pragma omp parallel num_threads(1)
{
// print_frame(1);
print_ids(0);
print_ids(1);
// print_frame(0);
#pragma omp parallel num_threads(1)
{
// print_frame(1);
print_ids(0);
print_ids(1);
print_ids(2);
// print_frame(0);
#pragma omp task
{
// print_frame(1);
print_ids(0);
print_ids(1);
print_ids(2);
print_ids(3);
}
}
print_fuzzy_address(1);
}
print_fuzzy_address(2);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
// make sure initial data pointers are null
// CHECK-NOT: 0: parallel_data initially not null
// CHECK-NOT: 0: task_data initially not null
// CHECK-NOT: 0: thread_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[OUTER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[INNER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[INNER_RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[INNER_RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[OUTER_RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[OUTER_RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,57 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int x = 0;
#pragma omp parallel num_threads(2)
{
#pragma omp atomic
x++;
#pragma omp barrier
print_current_address();
#pragma omp atomic
x++;
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// master thread explicit barrier
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// master thread implicit barrier at parallel end
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
// worker thread explicit barrier
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
// CHECK: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// worker thread implicit barrier at parallel end
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
return 0;
}

View File

@ -0,0 +1,55 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int y[] = {0,1,2,3};
#pragma omp parallel num_threads(2)
{
//implicit barrier at end of for loop
int i;
#pragma omp for
for (i = 0; i < 4; i++)
{
y[i]++;
}
print_current_address();
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// master thread implicit barrier at loop end
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// master thread implicit barrier at parallel end
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// worker thread explicit barrier
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// worker thread implicit barrier after parallel
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
return 0;
}

View File

@ -0,0 +1,32 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int y[] = {0,1,2,3};
int i;
#pragma omp for simd
for (i = 0; i < 4; i++)
{
y[i]++;
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// master thread implicit barrier at simd loop end
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
return 0;
}

View File

@ -0,0 +1,40 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int x = 0;
//implicit barrier at end of a parallel region
#pragma omp parallel num_threads(2)
{
#pragma omp atomic
x++;
}
print_fuzzy_address();
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// master thread implicit barrier at parallel end
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// worker thread implicit barrier at parallel end
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
return 0;
}

View File

@ -0,0 +1,63 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int x = 0;
#pragma omp parallel num_threads(2)
{
//implicit barrier after sections with nowait but with lastprivates
//implicit barrier at end of sections
#pragma omp sections
{
#pragma omp section
{
#pragma omp atomic
x++;
}
#pragma omp section
{
#pragma omp atomic
x++;
}
}
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// master thread implicit barrier at sections end
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// master thread implicit barrier at parallel end
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// worker thread implicit barrier at sections end
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// worker thread implicit barrier at parallel end
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
return 0;
}

View File

@ -0,0 +1,60 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int x = 0;
#pragma omp parallel num_threads(2)
{
//implicit barrier at end of single
#pragma omp single
{
x++;
}
print_fuzzy_address();
//critical section to avoid merge of two barriers into one
#pragma omp critical
{
x++;
}
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// master thread implicit barrier at single end
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// master thread implicit barrier at parallel end
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// worker thread implicit barrier at single end
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
// CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// worker thread implicit barrier at parallel end
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
return 0;
}

View File

@ -0,0 +1,31 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
#pragma omp critical
{
print_current_address(1);
print_ids(0);
}
print_current_address(2);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_critical: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,32 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
// GCC generates code that does not call the runtime for the flush construct
// XFAIL: gcc
#include "callback.h"
#include <omp.h>
int main()
{
#pragma omp parallel num_threads(2)
{
int tid = omp_get_thread_num();
#pragma omp flush
print_current_address(1);
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_flush'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_flush: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: current_address=[[RETURN_ADDRESS]]
//
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_flush: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: current_address=[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,44 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
//need to use an OpenMP construct so that OMPT will be initalized
#pragma omp parallel num_threads(1)
print_ids(0);
omp_lock_t lock;
printf("%" PRIu64 ": &lock: %lli\n", ompt_get_thread_data()->value, (long long) &lock);
omp_init_lock(&lock);
print_current_address(1);
omp_set_lock(&lock);
print_current_address(2);
omp_unset_lock(&lock);
print_current_address(3);
omp_destroy_lock(&lock);
print_current_address(4);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: &lock: [[WAIT_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_init_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,36 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
// GCC generates code that does not call the runtime for the master construct
// XFAIL: gcc
#include "callback.h"
#include <omp.h>
int main()
{
int x = 0;
#pragma omp parallel num_threads(2)
{
#pragma omp master
{
print_fuzzy_address(1);
x++;
}
print_current_address(2);
}
printf("%" PRIu64 ": x=%d\n", ompt_get_thread_data()->value, x);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_master_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS_END:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: current_address=[[RETURN_ADDRESS_END]]
return 0;
}

View File

@ -0,0 +1,52 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
//need to use an OpenMP construct so that OMPT will be initalized
#pragma omp parallel num_threads(1)
print_ids(0);
omp_nest_lock_t nest_lock;
printf("%" PRIu64 ": &nest_lock: %lli\n", ompt_get_thread_data()->value, (long long) &nest_lock);
omp_init_nest_lock(&nest_lock);
print_current_address(1);
omp_set_nest_lock(&nest_lock);
print_current_address(2);
omp_set_nest_lock(&nest_lock);
print_current_address(3);
omp_unset_nest_lock(&nest_lock);
print_current_address(4);
omp_unset_nest_lock(&nest_lock);
print_current_address(5);
omp_destroy_nest_lock(&nest_lock);
print_current_address(6);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,31 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
#pragma omp ordered
{
print_current_address(1);
print_ids(0);
}
print_current_address(2);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_ordered: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,48 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <unistd.h>
#include <stdio.h>
int main()
{
int condition=0;
int x=0;
#pragma omp parallel num_threads(2)
{
#pragma omp master
{
#pragma omp taskgroup
{
print_current_address(1);
#pragma omp task
{
#pragma omp atomic
x++;
}
}
print_current_address(2);
}
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskgroup_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,35 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int x = 0;
#pragma omp parallel num_threads(2)
{
#pragma omp master
{
#pragma omp task
{
x++;
}
#pragma omp taskwait
print_current_address(1);
}
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: ompt_event_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,54 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
omp_lock_t lock;
omp_init_lock(&lock);
print_current_address(1);
omp_test_lock(&lock);
print_current_address(2);
omp_unset_lock(&lock);
print_current_address(3);
omp_set_lock(&lock);
print_current_address(4);
omp_test_lock(&lock);
print_current_address(5);
omp_unset_lock(&lock);
print_current_address(6);
omp_destroy_lock(&lock);
print_current_address(7);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,42 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
omp_nest_lock_t nest_lock;
omp_init_nest_lock(&nest_lock);
omp_test_nest_lock(&nest_lock);
omp_unset_nest_lock(&nest_lock);
omp_set_nest_lock(&nest_lock);
omp_test_nest_lock(&nest_lock);
omp_unset_nest_lock(&nest_lock);
omp_unset_nest_lock(&nest_lock);
omp_destroy_nest_lock(&nest_lock);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
return 0;
}

View File

@ -0,0 +1,59 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
omp_nest_lock_t nest_lock;
omp_init_nest_lock(&nest_lock);
#pragma omp parallel num_threads(2)
{
#pragma omp master
{
omp_set_nest_lock(&nest_lock);
print_current_address(1);
}
#pragma omp barrier
omp_test_nest_lock(&nest_lock); //should fail for non-master
print_current_address(2);
#pragma omp barrier
#pragma omp master
{
omp_unset_nest_lock(&nest_lock);
print_current_address(3);
omp_unset_nest_lock(&nest_lock);
print_current_address(4);
}
}
omp_destroy_nest_lock(&nest_lock);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
// CHECK-NOT: {{^}}[[THREAD_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]]
// CHECK-NEXT: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,53 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
#include <math.h>
#include <unistd.h>
int main()
{
int x = 0;
#pragma omp parallel num_threads(2)
{
#pragma omp master
{
#pragma omp task depend(out:x)
{
x++;
delay(100);
}
print_fuzzy_address(1);
#pragma omp task depend(in:x)
{
x = -1;
}
}
}
x++;
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependences'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependence'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}{{[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter=[[NULL]], new_task_id=[[FIRST_TASK:[0-f]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, task_type=ompt_task_explicit=4, has_dependences=yes
// CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependences: task_id=[[FIRST_TASK]], deps={{0x[0-f]+}}, ndeps=1
// CHECK: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}{{[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter=[[NULL]], new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=yes
// CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependences: task_id=[[SECOND_TASK]], deps={{0x[0-f]+}}, ndeps=1
// CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependence_pair: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]]
return 0;
}

View File

@ -0,0 +1,100 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int condition=0;
omp_set_nested(0);
print_frame(0);
#pragma omp parallel num_threads(2)
{
print_frame(1);
print_ids(0);
print_ids(1);
print_frame(0);
#pragma omp master
{
print_ids(0);
#pragma omp task shared(condition)
{
OMPT_SIGNAL(condition);
print_frame(1);
print_ids(0);
print_ids(1);
print_ids(2);
}
print_fuzzy_address(1);
OMPT_WAIT(condition,1);
print_ids(0);
}
#pragma omp barrier
print_ids(0);
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// nested parallel masters
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// <- ompt_event_task_create would be expected here
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// explicit barrier after master
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// implicit barrier parallel
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
// this is expected to come earlier and at MASTER:
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
// CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -0,0 +1,93 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
#include <math.h>
int main()
{
omp_set_nested(0);
print_frame(0);
#pragma omp parallel num_threads(2)
{
print_frame(1);
print_ids(0);
print_ids(1);
print_frame(0);
#pragma omp master
{
print_ids(0);
int t = (int)sin(0.1);
#pragma omp task if(t)
{
print_frame(1);
print_ids(0);
print_ids(1);
print_ids(2);
}
print_fuzzy_address(1);
print_ids(0);
}
print_ids(0);
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// nested parallel masters
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// <- ompt_event_task_schedule ([[IMPLICIT_TASK_ID]], [[TASK_ID]]) would be expected here
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
// CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// <- ompt_event_task_schedule ([[TASK_ID]], [[IMPLICIT_TASK_ID]]) would be expected here
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reen
// implicit barrier parallel
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -0,0 +1,90 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int condition=0;
omp_set_nested(0);
print_frame(0);
#pragma omp parallel num_threads(2)
{
print_frame(1);
print_ids(0);
print_ids(1);
print_frame(0);
#pragma omp master
{
print_ids(0);
#pragma omp task shared(condition)
{
OMPT_SIGNAL(condition);
print_frame(1);
print_ids(0);
print_ids(1);
print_ids(2);
}
OMPT_WAIT(condition,1);
print_ids(0);
}
print_ids(0);
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// nested parallel masters
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// <- ompt_event_task_create would be expected here
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// implicit barrier parallel
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// implicit barrier parallel
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -0,0 +1,112 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
#include <math.h>
void print_task_type(int id)
{
#pragma omp critical
{
int task_type;
char buffer[2048];
ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL);
format_task_type(task_type, buffer);
printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type);
}
};
int main()
{
//initial task
print_task_type(0);
int x;
//implicit task
#pragma omp parallel num_threads(1)
{
print_task_type(1);
x++;
}
#pragma omp parallel num_threads(2)
#pragma omp master
{
//explicit task
#pragma omp task
{
print_task_type(2);
x++;
}
//explicit task with undeferred
#pragma omp task if(0)
{
print_task_type(3);
x++;
}
//explicit task with untied
#pragma omp task untied
{
print_task_type(4);
x++;
}
//explicit task with final
#pragma omp task final(1)
{
print_task_type(5);
x++;
//nested explicit task with final and undeferred
#pragma omp task
{
print_task_type(6);
x++;
}
}
//Mergeable task test deactivated for now
//explicit task with mergeable
/*
#pragma omp task mergeable if((int)sin(0))
{
print_task_type(7);
x++;
}
*/
//TODO: merged task
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
// CHECK-NOT: 0: parallel_data initially not null
// CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1
// CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730
// CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no
// CHECK-DAG: {{^[0-9]+}}: id=2 task_type=ompt_task_explicit=4
// CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
// CHECK-DAG: {{^[0-9]+}}: id=3 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
// CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_untied=268435460, has_dependences=no
// CHECK-DAG: {{^[0-9]+}}: id=4 task_type=ompt_task_explicit|ompt_task_untied=268435460
// CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_final=536870916, has_dependences=no
// CHECK-DAG: {{^[0-9]+}}: id=5 task_type=ompt_task_explicit|ompt_task_final=536870916
// CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no
// CHECK-DAG: {{^[0-9]+}}: id=6 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644
return 0;
}

View File

@ -0,0 +1,112 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
void print_task_type(int id)
{
#pragma omp critical
{
int task_type;
char buffer[2048];
ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL);
format_task_type(task_type, buffer);
printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type);
}
};
int main()
{
//initial task
print_task_type(0);
int x;
//implicit task
#pragma omp parallel num_threads(1)
{
print_task_type(1);
x++;
}
#pragma omp parallel num_threads(1)
#pragma omp master
{
//explicit task
#pragma omp task
{
print_task_type(2);
x++;
}
//explicit task with undeferred
#pragma omp task if(0)
{
print_task_type(3);
x++;
}
//explicit task with untied
#pragma omp task untied
{
print_task_type(4);
x++;
}
//explicit task with final
#pragma omp task final(1)
{
print_task_type(5);
x++;
//nested explicit task with final and undeferred
#pragma omp task
{
print_task_type(6);
x++;
}
}
/*
//TODO:not working
//explicit task with mergeable
#pragma omp task mergeable
{
print_task_type(7);
x++;
}
*/
//TODO: merged task
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
// CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1
// CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730
// CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
// CHECK: {{^[0-9]+}}: id=2 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
// CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
// CHECK: {{^[0-9]+}}: id=3 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
// CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188, has_dependences=no
// CHECK: {{^[0-9]+}}: id=4 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188
// CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no
// CHECK: {{^[0-9]+}}: id=5 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644
// CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no
// CHECK: {{^[0-9]+}}: id=6 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644
// ___CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
// ___CHECK: {{^[0-9]+}}: id=7 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
return 0;
}

View File

@ -0,0 +1,62 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
// Current GOMP interface implements taskyield as stub
// XFAIL: gcc
#include "callback.h"
#include <omp.h>
#include <unistd.h>
int main()
{
int condition=0, x=0;
#pragma omp parallel num_threads(2)
{
#pragma omp master
{
#pragma omp task shared(condition)
{
OMPT_SIGNAL(condition);
OMPT_WAIT(condition,2);
}
OMPT_WAIT(condition,1);
#pragma omp task shared(x)
{
x++;
}
printf("%" PRIu64 ": before yield\n", ompt_get_thread_data()->value);
#pragma omp taskyield
printf("%" PRIu64 ": after yield\n", ompt_get_thread_data()->value);
OMPT_SIGNAL(condition);
}
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[0-9]+}}, thread_num={{[0-9]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[WORKER_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[MAIN_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[MAIN_TASK]], prior_task_status=ompt_task_yield=2
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[MAIN_TASK]], second_task_id=[[IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_task_schedule: first_task_id={{[0-9]+}}, second_task_id=[[WORKER_TASK]], prior_task_status=ompt_task_others=4
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[WORKER_TASK]], second_task_id={{[0-9]+}}, prior_task_status=ompt_task_complete=1
return 0;
}

View File

@ -0,0 +1,107 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include <omp.h>
int main()
{
int condition=0;
omp_set_nested(0);
print_frame(0);
#pragma omp parallel num_threads(2)
{
print_frame(1);
print_ids(0);
print_ids(1);
print_frame(0);
#pragma omp master
{
print_ids(0);
#pragma omp task untied shared(condition)
{
OMPT_SIGNAL(condition);
print_frame(1);
print_ids(0);
print_ids(1);
print_ids(2);
#pragma omp task if(0)
{
print_ids(0);
print_ids(1);
print_ids(2);
}
print_ids(0);
print_ids(1);
print_ids(2);
}
OMPT_WAIT(condition,1);
print_ids(0);
}
#pragma omp barrier
print_ids(0);
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// make sure initial data pointers are null
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// nested parallel masters
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// <- ompt_event_task_create would be expected here
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// explicit barrier after master
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// implicit barrier parallel
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
// this is expected to come earlier and at MASTER:
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
// CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -0,0 +1,8 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
// REQUIRES: ompt
// GCC doesn't call runtime for auto = static schedule
// XFAIL: gcc
#define SCHEDULE auto
#include "base_split.h"

View File

@ -9,28 +9,35 @@ int main()
for (i = 0; i < 4; i++) {
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker={{.*}}
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -8,14 +8,21 @@ int main()
#pragma omp parallel for num_threads(1) schedule(SCHEDULE)
for (i = 0; i < 1; i++) {
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=0x{{[0-f]+}}, invoker={{.+}}
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[PARALLEL_ID,0]}}, task_id=[[IMPLICIT_TASK_ID]]
return 0;
}

View File

@ -0,0 +1,66 @@
#include "callback.h"
#include <omp.h>
/* With the combined parallel-for construct (base.h), the return-addresses are hard to compare.
With the separate parallel and for-nowait construct, the addresses become more predictable,
but the begin of the for-loop still generates additional code, so the offset of loop-begin
to the label is >4 Byte.
*/
int main()
{
unsigned int i;
#pragma omp parallel num_threads(4)
{
print_current_address(0);
#pragma omp for schedule(SCHEDULE) nowait
for (i = 0; i < 4; i++) {
print_fuzzy_address(1);
}
print_fuzzy_address(2);
}
print_fuzzy_address(3);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[PARALLEL_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, invoker={{[0-9]+}}, codeptr_ra=[[PARALLEL_RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[PARALLEL_RETURN_ADDRESS]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
// CHECK-LOOP: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK-LOOP: 0: ompt_event_runtime_shutdown
// CHECK-LOOP: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra={{0x[0-f]+}}, invoker={{[0-9]+}}
// CHECK-LOOP: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
// CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
// CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
// CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
return 0;
}

View File

@ -0,0 +1,6 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
// REQUIRES: ompt
#define SCHEDULE dynamic
#include "base_split.h"

View File

@ -0,0 +1,6 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
// REQUIRES: ompt
#define SCHEDULE guided
#include "base_split.h"

View File

@ -0,0 +1,6 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
// REQUIRES: ompt
#define SCHEDULE runtime
#include "base_split.h"

View File

@ -0,0 +1,8 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
// REQUIRES: ompt
// GCC doesn't call runtime for static schedule
// XFAIL: gcc
#define SCHEDULE static
#include "base_split.h"

View File

@ -0,0 +1,36 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
// GCC generates code that does not distinguish between sections and loops
// XFAIL: gcc
#include "callback.h"
#include <omp.h>
int main()
{
#pragma omp parallel sections num_threads(2)
{
#pragma omp section
{
printf("%lu: section 1\n", ompt_get_thread_data()->value);
}
#pragma omp section
{
printf("%lu: section 2\n", ompt_get_thread_data()->value);
}
}
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN:0x[0-f]+]], count=2
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN]], count=2
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END]]
return 0;
}

View File

@ -0,0 +1,36 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
// GCC generates code that does not call the runtime for the single construct
// XFAIL: gcc
#include "callback.h"
#include <omp.h>
int main()
{
int x = 0;
#pragma omp parallel num_threads(2)
{
#pragma omp single
{
x++;
}
}
printf("x=%d\n", x);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// CHECK: {{^}}[[THREAD_ID_1:[0-9]+]]: ompt_event_single_in_block_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1
// CHECK: {{^}}[[THREAD_ID_1]]: ompt_event_single_in_block_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1
// CHECK: {{^}}[[THREAD_ID_2:[0-9]+]]: ompt_event_single_others_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1
// CHECK: {{^}}[[THREAD_ID_2]]: ompt_event_single_others_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1
return 0;
}