D9302.partial2: cleanup of ittnotify checks, that eliminats redundant notifications in case of nested regions.

llvm-svn: 236631
This commit is contained in:
Andrey Churbanov 2015-05-06 19:22:36 +00:00
parent 036181471c
commit 51aecb82cd
5 changed files with 126 additions and 114 deletions

View File

@ -57,7 +57,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid
#if USE_ITT_BUILD && USE_ITT_NOTIFY
// Barrier imbalance - save arrive time to the thread
if(__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
}
#endif
@ -97,7 +97,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid
USE_ITT_BUILD_ARG(itt_sync_obj) );
#if USE_ITT_BUILD && USE_ITT_NOTIFY
// Barrier imbalance - write min of the thread time and the other thread time to the thread.
if (__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
if (__kmp_forkjoin_frames_mode == 2) {
this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
other_threads[i]->th.th_bar_min_time);
}
@ -234,7 +234,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
#if USE_ITT_BUILD && USE_ITT_NOTIFY
// Barrier imbalance - save arrive time to the thread
if(__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
}
#endif
@ -262,7 +262,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
USE_ITT_BUILD_ARG(itt_sync_obj) );
#if USE_ITT_BUILD && USE_ITT_NOTIFY
// Barrier imbalance - write min of the thread time and a child time to the thread.
if (__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
if (__kmp_forkjoin_frames_mode == 2) {
this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
child_thr->th.th_bar_min_time);
}
@ -432,7 +432,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
#if USE_ITT_BUILD && USE_ITT_NOTIFY
// Barrier imbalance - save arrive time to the thread
if(__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
}
#endif
@ -485,7 +485,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
USE_ITT_BUILD_ARG(itt_sync_obj) );
#if USE_ITT_BUILD && USE_ITT_NOTIFY
// Barrier imbalance - write min of the thread time and a child time to the thread.
if (__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
if (__kmp_forkjoin_frames_mode == 2) {
this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
child_thr->th.th_bar_min_time);
}
@ -1147,24 +1147,29 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
__kmp_itt_barrier_middle(gtid, itt_sync_obj);
#endif /* USE_ITT_BUILD */
#if USE_ITT_BUILD && USE_ITT_NOTIFY
// Barrier - report frame end
if (__itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode) {
// Barrier - report frame end (only if active_level == 1)
if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
#if OMP_40_ENABLED
this_thr->th.th_teams_microtask == NULL &&
#endif
team->t.t_active_level == 1)
{
kmp_uint64 cur_time = __itt_get_timestamp();
kmp_info_t **other_threads = this_thr->th.th_team->t.t_threads;
kmp_info_t **other_threads = team->t.t_threads;
int nproc = this_thr->th.th_team_nproc;
int i;
// Initialize with master's wait time
kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
switch(__kmp_forkjoin_frames_mode) {
case 1:
__kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
this_thr->th.th_frame_time = cur_time;
break;
case 2:
case 2: // AC 2015-01-19: currently does not work for hierarchical (to be fixed)
__kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
break;
case 3:
if( __itt_metadata_add_ptr ) {
// Initialize with master's wait time
kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
for (i=1; i<nproc; ++i) {
delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
}
@ -1413,14 +1418,17 @@ __kmp_join_barrier(int gtid)
# if USE_ITT_BUILD && USE_ITT_NOTIFY
// Join barrier - report frame end
if (__itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode) {
if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
#if OMP_40_ENABLED
this_thr->th.th_teams_microtask == NULL &&
#endif
team->t.t_active_level == 1)
{
kmp_uint64 cur_time = __itt_get_timestamp();
ident_t * loc = team->t.t_ident;
kmp_info_t **other_threads = this_thr->th.th_team->t.t_threads;
kmp_info_t **other_threads = team->t.t_threads;
int nproc = this_thr->th.th_team_nproc;
int i;
// Initialize with master's wait time
kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
switch(__kmp_forkjoin_frames_mode) {
case 1:
__kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
@ -1430,6 +1438,8 @@ __kmp_join_barrier(int gtid)
break;
case 3:
if( __itt_metadata_add_ptr ) {
// Initialize with master's wait time
kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
for (i=1; i<nproc; ++i) {
delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
}

View File

@ -535,27 +535,30 @@ __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
#if USE_ITT_BUILD
kmp_uint64 cur_time = 0;
#if USE_ITT_NOTIFY
if( __itt_get_timestamp_ptr ) {
if ( __itt_get_timestamp_ptr ) {
cur_time = __itt_get_timestamp();
}
#endif /* USE_ITT_NOTIFY */
// Report the barrier
if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr ) {
if( this_thr->th.th_team->t.t_level == 0 ) {
__kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
}
}
// Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
{
if ( this_thr->th.th_team->t.t_level == 0
#if OMP_40_ENABLED
&& this_thr->th.th_teams_microtask == NULL
#endif
) {
// Report the barrier
this_thr->th.th_ident = loc;
__kmp_itt_region_joined( global_tid, 1 );
}
if ( ( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode == 3 ) || KMP_ITT_DEBUG )
{
this_thr->th.th_ident = loc;
// Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
__kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time, cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
{
__kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
if ( __kmp_forkjoin_frames_mode == 3 )
// Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
__kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
} else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
// Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
__kmp_itt_region_joined( global_tid, 1 );
}
#endif /* USE_ITT_BUILD */

View File

@ -633,6 +633,12 @@ __kmp_dispatch_init(
#if USE_ITT_BUILD
kmp_uint64 cur_chunk = chunk;
int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
KMP_MASTER_GTID(gtid) &&
#if OMP_40_ENABLED
th->th.th_teams_microtask == NULL &&
#endif
team->t.t_active_level == 1;
#endif
if ( ! active ) {
pr = reinterpret_cast< dispatch_private_info_template< T >* >
@ -869,9 +875,8 @@ __kmp_dispatch_init(
}
#if USE_ITT_BUILD
// Calculate chunk for metadata report
if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) {
if ( itt_need_metadata_reporting )
cur_chunk = limit - init + 1;
}
#endif
if ( st == 1 ) {
pr->u.p.lb = lb + init;
@ -1124,16 +1129,10 @@ __kmp_dispatch_init(
if ( pr->ordered ) {
__kmp_itt_ordered_init( gtid );
}; // if
#endif /* USE_ITT_BUILD */
}; // if
#if USE_ITT_BUILD
// Report loop metadata
if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) {
kmp_uint32 tid = __kmp_tid_from_gtid( gtid );
if (KMP_MASTER_TID(tid)) {
// Report loop metadata
if ( itt_need_metadata_reporting ) {
// Only report metadata by master of active team at level 1
kmp_uint64 schedtype = 0;
switch ( schedule ) {
case kmp_sch_static_chunked:
case kmp_sch_static_balanced:// Chunk is calculated in the switch above
@ -1156,8 +1155,8 @@ __kmp_dispatch_init(
}
__kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
}
}
#endif /* USE_ITT_BUILD */
}; // if
#ifdef KMP_DEBUG
{

View File

@ -814,6 +814,16 @@ __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
/* TODO: Should this be acquire or release? */
status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
th->th.th_local.this_construct);
#if USE_ITT_BUILD
if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
#if OMP_40_ENABLED
th->th.th_teams_microtask == NULL &&
#endif
team->t.t_active_level == 1 )
{ // Only report metadata by master of active team at level 1
__kmp_itt_metadata_single( id_ref );
}
#endif /* USE_ITT_BUILD */
}
if( __kmp_env_consistency_check ) {
@ -827,10 +837,6 @@ __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
if ( status ) {
__kmp_itt_single_start( gtid );
}
if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid)) {
__kmp_itt_metadata_single( id_ref );
}
#endif /* USE_ITT_BUILD */
return status;
}
@ -1420,22 +1426,26 @@ __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
#if USE_ITT_BUILD
// Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment
if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
{
this_thr->th.th_ident = loc;
// 0 - no barriers; 1 - serialized parallel
__kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 );
}
// Save the start of the "parallel" region for VTune. This is the join barrier begin at the same time.
if( ( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) &&
__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr ) || KMP_ITT_DEBUG )
{
this_thr->th.th_ident = loc;
#if USE_ITT_NOTIFY
if( this_thr->th.th_team->t.t_level == 1 ) {
serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
}
if ( serial_team->t.t_level == 1
#if OMP_40_ENABLED
&& this_thr->th.th_teams_microtask == NULL
#endif
) {
#if USE_ITT_NOTIFY
// Save the start of the "parallel" region for VTune. This is the frame begin at the same time.
if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) &&
( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
{
serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
} else // only one notification scheme (either "submit" or "forking/joined", not both)
#endif
if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
__kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode )
{
this_thr->th.th_ident = loc;
// 0 - no barriers; 1 - serialized parallel
__kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 );
}
}
#endif /* USE_ITT_BUILD */
}
@ -2137,36 +2147,30 @@ __kmp_fork_call(
#if USE_ITT_BUILD
// Mark start of "parallel" region for VTune. Only use one of frame notification scheme at the moment.
if ((__itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) || KMP_ITT_DEBUG) {
if ( team->t.t_active_level == 1 // only report frames at level 1
# if OMP_40_ENABLED
if (!master_th->th.th_teams_microtask || microtask == (microtask_t)__kmp_teams_master)
// Either not in teams or the outer fork of the teams construct
&& !master_th->th.th_teams_microtask // not in teams construct
# endif /* OMP_40_ENABLED */
) {
#if USE_ITT_NOTIFY
if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
{
kmp_uint64 tmp_time = 0;
if ( __itt_get_timestamp_ptr )
tmp_time = __itt_get_timestamp();
// Internal fork - report frame begin
master_th->th.th_frame_time = tmp_time;
if ( __kmp_forkjoin_frames_mode == 3 )
team->t.t_region_time = tmp_time;
} else // only one notification scheme (either "submit" or "forking/joined", not both)
#endif /* USE_ITT_NOTIFY */
if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
__kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
{ // Mark start of "parallel" region for VTune.
__kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
}
}
#if USE_ITT_NOTIFY
kmp_uint64 tmp_time = 0;
if (((__kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3) && __itt_frame_submit_v3_ptr) || KMP_ITT_DEBUG) {
if (!(team->t.t_active_level > 1)) {
# if OMP_40_ENABLED
if (!master_th->th.th_teams_microtask || microtask == (microtask_t)__kmp_teams_master) {
// Either not in teams or the outer fork of the teams construct
# endif /* OMP_40_ENABLED */
if ( __itt_get_timestamp_ptr )
tmp_time = __itt_get_timestamp();
// Internal fork - report frame begin
master_th->th.th_frame_time = tmp_time;
if ( __kmp_forkjoin_frames_mode==3 )
team->t.t_region_time = tmp_time;
# if OMP_40_ENABLED
}
# endif /* OMP_40_ENABLED */
}
}
#endif /* USE_ITT_NOTIFY */
#endif /* USE_ITT_BUILD */
/* now go on and do the work */
@ -2342,31 +2346,21 @@ __kmp_join_call(ident_t *loc, int gtid
__kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
}
// Mark end of "parallel" region for VTune. Only use one of frame notification scheme at the moment.
if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG ) {
// Mark end of "parallel" region for VTune.
if ( team->t.t_active_level == 1
# if OMP_40_ENABLED
if ( !master_th->th.th_teams_microtask /* not in teams */ ||
( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) )
// Either not in teams or exiting teams region
// (teams is a frame and no other frames inside the teams)
&& !master_th->th.th_teams_microtask /* not in teams construct */
# endif /* OMP_40_ENABLED */
{
) {
master_th->th.th_ident = loc;
__kmp_itt_region_joined( gtid );
}
}
if ( ( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode == 3 ) || KMP_ITT_DEBUG ) {
# if OMP_40_ENABLED
if ( !master_th->th.th_teams_microtask /* not in teams */ ||
( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) )
// Either not in teams or exiting teams region
// (teams is a frame and no other frames inside the teams)
# endif /* OMP_40_ENABLED */
{
master_th->th.th_ident = loc;
__kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, 0, loc, master_th->th.th_team_nproc, 1 );
}
}
// only one notification scheme (either "submit" or "forking/joined", not both)
if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
__kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
0, loc, master_th->th.th_team_nproc, 1 );
else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
__kmp_itt_region_joined( gtid );
} // active_level == 1
#endif /* USE_ITT_BUILD */
#if OMP_40_ENABLED

View File

@ -92,6 +92,7 @@ __kmp_for_static_init(
register kmp_uint32 nth;
register UT trip_count;
register kmp_team_t *team;
register kmp_info_t *th = __kmp_threads[ gtid ];
#if OMPT_SUPPORT && OMPT_TRACE
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
@ -157,13 +158,13 @@ __kmp_for_static_init(
if ( schedtype > kmp_ord_upper ) {
// we are in DISTRIBUTE construct
schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
tid = __kmp_threads[ gtid ]->th.th_team->t.t_master_tid;
team = __kmp_threads[ gtid ]->th.th_team->t.t_parent;
tid = th->th.th_team->t.t_master_tid;
team = th->th.th_team->t.t_parent;
} else
#endif
{
tid = __kmp_tid_from_gtid( global_tid );
team = __kmp_threads[ gtid ]->th.th_team;
team = th->th.th_team;
}
/* determine if "for" loop is an active worksharing construct */
@ -318,7 +319,12 @@ __kmp_for_static_init(
#if USE_ITT_BUILD
// Report loop metadata
if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) {
if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
#if OMP_40_ENABLED
th->th.th_teams_microtask == NULL &&
#endif
team->t.t_active_level == 1 )
{
kmp_uint64 cur_chunk = chunk;
// Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
if ( schedtype == kmp_sch_static ) {