forked from OSchip/llvm-project
Fix OMPT support for task frames, parallel regions, and parallel regions + loops
This patch makes it possible for a performance tool that uses call stack unwinding to map implementation-level call stacks from master and worker threads into a unified global view. There are several components to this patch. include/*/ompt.h.var Add a new enumeration type that indicates whether the code for a master task for a parallel region is invoked by the user program or the runtime system Change the signature for OMPT parallel begin/end callbacks to indicate whether the master task will be invoked by the program or the runtime system. This enables a performance tool using call stack unwinding to handle these two cases differently. For this case, a profiler that uses call stack unwinding needs to know that the call path prefix for the master task may differ from those available within the begin/end callbacks if the program invokes the master. kmp.h Change the signature for __kmp_join_call to take an additional parameter indicating the fork_context type. This is needed to supply the OMPT parallel end callback with information about whether the compiler or the runtime invoked the master task for a parallel region. kmp_csupport.c Ensure that the OMPT task frame field reenter_runtime_frame is properly set and cleared before and after calls to fork and join threads for a parallel region. Adjust the code for the new signature for __kmp_join_call. Adjust the OMPT parallel begin callback invocations to carry the extra parameter indicating whether the program or the runtime invokes the master task for a parallel region. kmp_gsupport.c Apply all of the analogous changes described for kmp_csupport.c for the GOMP interface Add OMPT support for the GOMP combined parallel region + loop API to maintain the OMPT task frame field reenter_runtime_frame. kmp_runtime.c: Use the new information passed by __kmp_join_call to adjust the OMPT parallel end callback invocations to carry the extra parameter indicating whether the program or the runtime invokes the master task for a parallel region. ompt_internal.h: Use the flavor of the parallel region API (GNU or Intel) to determine who invokes the master task. Differential Revision: http://reviews.llvm.org/D11259 llvm-svn: 242817
This commit is contained in:
parent
16154afb06
commit
3fdf3294ab
|
@ -81,7 +81,7 @@
|
|||
\
|
||||
/*--- Mandatory Events ---*/ \
|
||||
macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \
|
||||
macro (ompt_event_parallel_end, ompt_parallel_callback_t, 2) /* parallel end */ \
|
||||
macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \
|
||||
\
|
||||
macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \
|
||||
macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \
|
||||
|
@ -267,6 +267,11 @@ typedef enum {
|
|||
ompt_thread_other = 3
|
||||
} ompt_thread_type_t;
|
||||
|
||||
typedef enum {
|
||||
ompt_invoker_program = 0, /* program invokes master task */
|
||||
ompt_invoker_runtime = 1 /* runtime invokes master task */
|
||||
} ompt_invoker_t;
|
||||
|
||||
typedef void (*ompt_thread_type_callback_t) (
|
||||
ompt_thread_type_t thread_type, /* type of thread */
|
||||
ompt_thread_id_t thread_id /* ID of thread */
|
||||
|
@ -293,7 +298,14 @@ typedef void (*ompt_new_parallel_callback_t) (
|
|||
ompt_frame_t *parent_task_frame, /* frame data of parent task */
|
||||
ompt_parallel_id_t parallel_id, /* id of parallel region */
|
||||
uint32_t requested_team_size, /* number of threads in team */
|
||||
void *parallel_function /* pointer to outlined function */
|
||||
void *parallel_function, /* pointer to outlined function */
|
||||
ompt_invoker_t invoker /* who invokes master task? */
|
||||
);
|
||||
|
||||
typedef void (*ompt_end_parallel_callback_t) (
|
||||
ompt_parallel_id_t parallel_id, /* id of parallel region */
|
||||
ompt_task_id_t task_id, /* id of task */
|
||||
ompt_invoker_t invoker /* who invokes master task? */
|
||||
);
|
||||
|
||||
/* tasks */
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
\
|
||||
/*--- Mandatory Events ---*/ \
|
||||
macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \
|
||||
macro (ompt_event_parallel_end, ompt_parallel_callback_t, 2) /* parallel end */ \
|
||||
macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \
|
||||
\
|
||||
macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \
|
||||
macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \
|
||||
|
@ -267,6 +267,11 @@ typedef enum {
|
|||
ompt_thread_other = 3
|
||||
} ompt_thread_type_t;
|
||||
|
||||
typedef enum {
|
||||
ompt_invoker_program = 0, /* program invokes master task */
|
||||
ompt_invoker_runtime = 1 /* runtime invokes master task */
|
||||
} ompt_invoker_t;
|
||||
|
||||
typedef void (*ompt_thread_type_callback_t) (
|
||||
ompt_thread_type_t thread_type, /* type of thread */
|
||||
ompt_thread_id_t thread_id /* ID of thread */
|
||||
|
@ -293,7 +298,14 @@ typedef void (*ompt_new_parallel_callback_t) (
|
|||
ompt_frame_t *parent_task_frame, /* frame data of parent task */
|
||||
ompt_parallel_id_t parallel_id, /* id of parallel region */
|
||||
uint32_t requested_team_size, /* number of threads in team */
|
||||
void *parallel_function /* pointer to outlined function */
|
||||
void *parallel_function, /* pointer to outlined function */
|
||||
ompt_invoker_t invoker /* who invokes master task? */
|
||||
);
|
||||
|
||||
typedef void (*ompt_end_parallel_callback_t) (
|
||||
ompt_parallel_id_t parallel_id, /* id of parallel region */
|
||||
ompt_task_id_t task_id, /* id of task */
|
||||
ompt_invoker_t invoker /* who invokes master task? */
|
||||
);
|
||||
|
||||
/* tasks */
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
\
|
||||
/*--- Mandatory Events ---*/ \
|
||||
macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \
|
||||
macro (ompt_event_parallel_end, ompt_parallel_callback_t, 2) /* parallel end */ \
|
||||
macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \
|
||||
\
|
||||
macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \
|
||||
macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \
|
||||
|
@ -267,6 +267,11 @@ typedef enum {
|
|||
ompt_thread_other = 3
|
||||
} ompt_thread_type_t;
|
||||
|
||||
typedef enum {
|
||||
ompt_invoker_program = 0, /* program invokes master task */
|
||||
ompt_invoker_runtime = 1 /* runtime invokes master task */
|
||||
} ompt_invoker_t;
|
||||
|
||||
typedef void (*ompt_thread_type_callback_t) (
|
||||
ompt_thread_type_t thread_type, /* type of thread */
|
||||
ompt_thread_id_t thread_id /* ID of thread */
|
||||
|
@ -293,7 +298,14 @@ typedef void (*ompt_new_parallel_callback_t) (
|
|||
ompt_frame_t *parent_task_frame, /* frame data of parent task */
|
||||
ompt_parallel_id_t parallel_id, /* id of parallel region */
|
||||
uint32_t requested_team_size, /* number of threads in team */
|
||||
void *parallel_function /* pointer to outlined function */
|
||||
void *parallel_function, /* pointer to outlined function */
|
||||
ompt_invoker_t invoker /* who invokes master task? */
|
||||
);
|
||||
|
||||
typedef void (*ompt_end_parallel_callback_t) (
|
||||
ompt_parallel_id_t parallel_id, /* id of parallel region */
|
||||
ompt_task_id_t task_id, /* id of task */
|
||||
ompt_invoker_t invoker /* who invokes master task? */
|
||||
);
|
||||
|
||||
/* tasks */
|
||||
|
|
|
@ -3108,7 +3108,7 @@ extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_con
|
|||
#endif
|
||||
);
|
||||
|
||||
extern void __kmp_join_call( ident_t *loc, int gtid
|
||||
extern void __kmp_join_call( ident_t *loc, int gtid, enum fork_context_e fork_context
|
||||
#if OMP_40_ENABLED
|
||||
, int exit_teams = 0
|
||||
#endif
|
||||
|
|
|
@ -289,11 +289,13 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
|
|||
va_start( ap, microtask );
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
int tid = __kmp_tid_from_gtid( gtid );
|
||||
kmp_info_t *master_th = __kmp_threads[ gtid ];
|
||||
kmp_team_t *parent_team = master_th->th.th_team;
|
||||
int tid = __kmp_tid_from_gtid( gtid );
|
||||
parent_team->t.t_implicit_task_taskdata[tid].
|
||||
ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
|
||||
if (ompt_status & ompt_status_track) {
|
||||
parent_team->t.t_implicit_task_taskdata[tid].
|
||||
ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_SSC_MARKS
|
||||
|
@ -316,7 +318,7 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
|
|||
#if INCLUDE_SSC_MARKS
|
||||
SSC_MARK_JOINING();
|
||||
#endif
|
||||
__kmp_join_call( loc, gtid );
|
||||
__kmp_join_call( loc, gtid, fork_context_intel );
|
||||
|
||||
va_end( ap );
|
||||
|
||||
|
@ -372,6 +374,15 @@ __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
|
|||
this_thr->th.th_teams_microtask = microtask;
|
||||
this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
kmp_team_t *parent_team = this_thr->th.th_team;
|
||||
int tid = __kmp_tid_from_gtid( gtid );
|
||||
if (ompt_status & ompt_status_track) {
|
||||
parent_team->t.t_implicit_task_taskdata[tid].
|
||||
ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
// check if __kmpc_push_num_teams called, set default number of teams otherwise
|
||||
if ( this_thr->th.th_teams_size.nteams == 0 ) {
|
||||
__kmp_push_num_teams( loc, gtid, 0, 0 );
|
||||
|
@ -393,7 +404,15 @@ __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
|
|||
ap
|
||||
#endif
|
||||
);
|
||||
__kmp_join_call( loc, gtid );
|
||||
__kmp_join_call( loc, gtid, fork_context_intel );
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_status & ompt_status_track) {
|
||||
parent_team->t.t_implicit_task_taskdata[tid].
|
||||
ompt_task_info.frame.reenter_runtime_frame = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
this_thr->th.th_teams_microtask = NULL;
|
||||
this_thr->th.th_teams_level = 0;
|
||||
*(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
|
||||
|
|
|
@ -407,7 +407,8 @@ __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, void (*task)(void *
|
|||
int team_size = 1;
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
|
||||
ompt_task_id, ompt_frame, ompt_parallel_id,
|
||||
team_size, (void *) task);
|
||||
team_size, (void *) task,
|
||||
OMPT_INVOKER(fork_context_gnu));
|
||||
}
|
||||
|
||||
// set up lightweight task
|
||||
|
@ -488,8 +489,10 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void)
|
|||
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
|
||||
parallel_id = team_info->parallel_id;
|
||||
|
||||
// Record that we re-entered the runtime system in the implicit
|
||||
// task frame representing the parallel region.
|
||||
ompt_frame = __ompt_get_task_frame_internal(0);
|
||||
ompt_frame->exit_runtime_frame = __builtin_frame_address(0);
|
||||
ompt_frame->reenter_runtime_frame = __builtin_frame_address(0);
|
||||
|
||||
#if OMPT_TRACE
|
||||
if ((ompt_status == ompt_status_track_callback) &&
|
||||
|
@ -503,7 +506,19 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void)
|
|||
// unlink if necessary. no-op if there is not a lightweight task.
|
||||
ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr);
|
||||
// GOMP allocates/frees lwt since it can't be kept on the stack
|
||||
if (lwt) __kmp_free(lwt);
|
||||
if (lwt) {
|
||||
__kmp_free(lwt);
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_status & ompt_status_track) {
|
||||
// Since a lightweight task was destroyed, make sure that the
|
||||
// remaining deepest task knows the stack frame where the runtime
|
||||
// was reentered.
|
||||
ompt_frame = __ompt_get_task_frame_internal(0);
|
||||
ompt_frame->reenter_runtime_frame = __builtin_frame_address(0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -511,26 +526,48 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void)
|
|||
kmp_info_t *thr = __kmp_threads[gtid];
|
||||
__kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
|
||||
thr->th.th_team);
|
||||
__kmp_join_call(&loc, gtid);
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_status & ompt_status_track) {
|
||||
// Set reenter frame in parent task, which will become current task
|
||||
// in the midst of join. This is needed before the end_parallel callback.
|
||||
ompt_frame = __ompt_get_task_frame_internal(1);
|
||||
ompt_frame->reenter_runtime_frame = __builtin_frame_address(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
__kmp_join_call(&loc, gtid, fork_context_gnu);
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_status & ompt_status_track) {
|
||||
ompt_frame->reenter_runtime_frame = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
__kmpc_end_serialized_parallel(&loc, gtid);
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_status & ompt_status_track) {
|
||||
// Record that we re-entered the runtime system in the frame that
|
||||
// created the parallel region.
|
||||
ompt_frame->reenter_runtime_frame = __builtin_frame_address(0);
|
||||
|
||||
if ((ompt_status == ompt_status_track_callback) &&
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
|
||||
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
|
||||
parallel_id, task_info->task_id);
|
||||
parallel_id, task_info->task_id,
|
||||
OMPT_INVOKER(fork_context_gnu));
|
||||
}
|
||||
|
||||
ompt_frame->reenter_runtime_frame = NULL;
|
||||
|
||||
thr->th.ompt_thread_info.state =
|
||||
(((thr->th.th_team)->t.t_serialized) ?
|
||||
ompt_state_work_serial : ompt_state_work_parallel);
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -817,7 +854,7 @@ LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \
|
|||
// There are no ull versions (yet).
|
||||
//
|
||||
|
||||
#define PARALLEL_LOOP_START(func, schedule) \
|
||||
#define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \
|
||||
void func (void (*task) (void *), void *data, unsigned num_threads, \
|
||||
long lb, long ub, long str, long chunk_sz) \
|
||||
{ \
|
||||
|
@ -826,6 +863,8 @@ LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \
|
|||
KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \
|
||||
gtid, lb, ub, str, chunk_sz )); \
|
||||
\
|
||||
ompt_pre(); \
|
||||
\
|
||||
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \
|
||||
if (num_threads != 0) { \
|
||||
__kmp_push_num_threads(&loc, gtid, num_threads); \
|
||||
|
@ -843,14 +882,45 @@ LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \
|
|||
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
|
||||
(schedule) != kmp_sch_static); \
|
||||
\
|
||||
ompt_post(); \
|
||||
\
|
||||
KA_TRACE(20, ( #func " exit: T#%d\n", gtid)); \
|
||||
}
|
||||
|
||||
|
||||
PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START), kmp_sch_static)
|
||||
PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked)
|
||||
PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), kmp_sch_guided_chunked)
|
||||
PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), kmp_sch_runtime)
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
|
||||
#define OMPT_LOOP_PRE() \
|
||||
ompt_frame_t *parent_frame; \
|
||||
if (ompt_status & ompt_status_track) { \
|
||||
parent_frame = __ompt_get_task_frame_internal(0); \
|
||||
parent_frame->reenter_runtime_frame = __builtin_frame_address(0); \
|
||||
}
|
||||
|
||||
|
||||
#define OMPT_LOOP_POST() \
|
||||
if (ompt_status & ompt_status_track) { \
|
||||
parent_frame->reenter_runtime_frame = NULL; \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define OMPT_LOOP_PRE()
|
||||
|
||||
#define OMPT_LOOP_POST()
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START),
|
||||
kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
|
||||
PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START),
|
||||
kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
|
||||
PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START),
|
||||
kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
|
||||
PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START),
|
||||
kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
|
||||
|
||||
|
||||
//
|
||||
|
|
|
@ -1558,7 +1558,7 @@ __kmp_fork_call(
|
|||
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
|
||||
ompt_task_id, ompt_frame, ompt_parallel_id,
|
||||
team_size, unwrapped_task);
|
||||
team_size, unwrapped_task, OMPT_INVOKER(call_context));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1646,7 +1646,8 @@ __kmp_fork_call(
|
|||
if ((ompt_status == ompt_status_track_callback) &&
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
|
||||
ompt_parallel_id, ompt_task_id);
|
||||
ompt_parallel_id, ompt_task_id,
|
||||
OMPT_INVOKER(call_context));
|
||||
}
|
||||
master_th->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
}
|
||||
|
@ -1821,7 +1822,8 @@ __kmp_fork_call(
|
|||
if ((ompt_status == ompt_status_track_callback) &&
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
|
||||
ompt_parallel_id, ompt_task_id);
|
||||
ompt_parallel_id, ompt_task_id,
|
||||
OMPT_INVOKER(call_context));
|
||||
}
|
||||
master_th->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
}
|
||||
|
@ -1927,7 +1929,8 @@ __kmp_fork_call(
|
|||
if ((ompt_status == ompt_status_track_callback) &&
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
|
||||
ompt_parallel_id, ompt_task_id);
|
||||
ompt_parallel_id, ompt_task_id,
|
||||
OMPT_INVOKER(call_context));
|
||||
}
|
||||
master_th->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
}
|
||||
|
@ -2253,12 +2256,13 @@ static inline void
|
|||
__kmp_join_ompt(
|
||||
kmp_info_t *thread,
|
||||
kmp_team_t *team,
|
||||
ompt_parallel_id_t parallel_id)
|
||||
ompt_parallel_id_t parallel_id,
|
||||
fork_context_e fork_context)
|
||||
{
|
||||
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
|
||||
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
|
||||
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
|
||||
parallel_id, task_info->task_id);
|
||||
parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
|
||||
}
|
||||
|
||||
__kmp_join_restore_state(thread,team);
|
||||
|
@ -2266,7 +2270,7 @@ __kmp_join_ompt(
|
|||
#endif
|
||||
|
||||
void
|
||||
__kmp_join_call(ident_t *loc, int gtid
|
||||
__kmp_join_call(ident_t *loc, int gtid, enum fork_context_e fork_context
|
||||
#if OMP_40_ENABLED
|
||||
, int exit_teams
|
||||
#endif /* OMP_40_ENABLED */
|
||||
|
@ -2424,7 +2428,7 @@ __kmp_join_call(ident_t *loc, int gtid
|
|||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_status == ompt_status_track_callback) {
|
||||
__kmp_join_ompt(master_th, parent_team, parallel_id);
|
||||
__kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -2515,7 +2519,7 @@ __kmp_join_call(ident_t *loc, int gtid
|
|||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_status == ompt_status_track_callback) {
|
||||
__kmp_join_ompt(master_th, parent_team, parallel_id);
|
||||
__kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -6960,8 +6964,10 @@ __kmp_teams_master( int gtid )
|
|||
#if INCLUDE_SSC_MARKS
|
||||
SSC_MARK_JOINING();
|
||||
#endif
|
||||
__kmp_join_call( loc, gtid, 1 ); // AC: last parameter "1" eliminates join barrier which won't work because
|
||||
// worker threads are in a fork barrier waiting for more parallel regions
|
||||
|
||||
// AC: last parameter "1" eliminates join barrier which won't work because
|
||||
// worker threads are in a fork barrier waiting for more parallel regions
|
||||
__kmp_join_call( loc, gtid, fork_context_intel, 1 );
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#define _OMP_EXTERN extern "C"
|
||||
|
||||
#define OMPT_INVOKER(x) \
|
||||
((x == fork_context_gnu) ? ompt_invoker_program : ompt_invoker_runtime)
|
||||
|
||||
|
||||
#define ompt_callback(e) e ## _callback
|
||||
|
|
Loading…
Reference in New Issue