[openmp] libomp: added itt notifications for task, taskwait, taskgroup

Add releasing->acquire edges for child task->taskwait and
child task->end of taskgroup.

Differential Revision: https://reviews.llvm.org/D83804
This commit is contained in:
AndreyChurbanov 2020-07-16 14:28:09 +03:00
parent ff2f5c3e58
commit ffd8f00931
2 changed files with 57 additions and 9 deletions

View File

@ -4301,6 +4301,39 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
// suppress race conditions detection on synchronization flags in debug mode
// this helps to analyze library internals eliminating false positives
__itt_suppress_mark_range(
__itt_suppress_range, __itt_suppress_threading_errors,
&new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
__itt_suppress_mark_range(
__itt_suppress_range, __itt_suppress_threading_errors,
&new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
#if KMP_OS_WINDOWS
__itt_suppress_mark_range(
__itt_suppress_range, __itt_suppress_threading_errors,
&new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
#else
__itt_suppress_mark_range(__itt_suppress_range,
__itt_suppress_threading_errors,
&new_thr->th.th_suspend_init_count,
sizeof(new_thr->th.th_suspend_init_count));
#endif
// TODO: check if we need to also suppress b_arrived flags
__itt_suppress_mark_range(__itt_suppress_range,
__itt_suppress_threading_errors,
CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
sizeof(new_thr->th.th_bar[0].bb.b_go));
__itt_suppress_mark_range(__itt_suppress_range,
__itt_suppress_threading_errors,
CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
sizeof(new_thr->th.th_bar[1].bb.b_go));
__itt_suppress_mark_range(__itt_suppress_range,
__itt_suppress_threading_errors,
CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
sizeof(new_thr->th.th_bar[2].bb.b_go));
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
if (__kmp_storage_map) {
__kmp_print_thread_storage_map(new_thr, new_gtid);
}

View File

@ -420,7 +420,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
(thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
TCW_4(thread_data->td.td_deque_ntasks,
TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self
KMP_FSYNC_RELEASING(taskdata); // releasing child
KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
"task=%p ntasks=%d head=%u tail=%u\n",
gtid, taskdata, thread_data->td.td_deque_ntasks,
@ -1560,6 +1561,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
else
kmp_itt_count_task = 0; // thread is not on a barrier - skip timing
}
KMP_FSYNC_ACQUIRED(taskdata); // acquired self (new task)
#endif
#ifdef KMP_GOMP_COMPAT
@ -1577,11 +1579,12 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
// Barrier imbalance - adjust arrive time with the task duration
thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
}
KMP_FSYNC_CANCEL(taskdata); // destroy self (just executed)
KMP_FSYNC_RELEASING(taskdata->td_parent); // releasing parent
#endif
}
// Proxy tasks are not handled by the runtime
if (taskdata->td_flags.proxy != TASK_PROXY) {
ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
@ -1883,6 +1886,7 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
#if USE_ITT_BUILD
if (itt_sync_obj != NULL)
__kmp_itt_taskwait_finished(gtid, itt_sync_obj);
KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with children
#endif /* USE_ITT_BUILD */
// Debugger: The taskwait is completed. Location remains, but thread is
@ -2521,6 +2525,7 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
#if USE_ITT_BUILD
if (itt_sync_obj != NULL)
__kmp_itt_taskwait_finished(gtid, itt_sync_obj);
KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with descendants
#endif /* USE_ITT_BUILD */
}
KMP_DEBUG_ASSERT(taskgroup->count == 0);
@ -3341,15 +3346,25 @@ static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "
"task team for team %p\n",
__kmp_gtid_from_thread(thread), team));
// Allocate a new task team if one is not available.
// Cannot use __kmp_thread_malloc() because threads not around for
// kmp_reap_task_team( ).
// Allocate a new task team if one is not available. Cannot use
// __kmp_thread_malloc because threads not around for kmp_reap_task_team.
task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t));
__kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
// AC: __kmp_allocate zeroes returned memory
// task_team -> tt.tt_threads_data = NULL;
// task_team -> tt.tt_max_threads = 0;
// task_team -> tt.tt_next = NULL;
#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
// suppress race conditions detection on synchronization flags in debug mode
// this helps to analyze library internals eliminating false positives
__itt_suppress_mark_range(
__itt_suppress_range, __itt_suppress_threading_errors,
&task_team->tt.tt_found_tasks, sizeof(task_team->tt.tt_found_tasks));
__itt_suppress_mark_range(__itt_suppress_range,
__itt_suppress_threading_errors,
CCAST(kmp_uint32 *, &task_team->tt.tt_active),
sizeof(task_team->tt.tt_active));
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
// Note: __kmp_allocate zeroes returned memory, othewise we would need:
// task_team->tt.tt_threads_data = NULL;
// task_team->tt.tt_max_threads = 0;
// task_team->tt.tt_next = NULL;
}
TCW_4(task_team->tt.tt_found_tasks, FALSE);