forked from OSchip/llvm-project
D9302.partial2: cleanup of ittnotify checks, that eliminats redundant notifications in case of nested regions.
llvm-svn: 236631
This commit is contained in:
parent
036181471c
commit
51aecb82cd
|
@ -57,7 +57,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid
|
||||||
|
|
||||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
// Barrier imbalance - save arrive time to the thread
|
// Barrier imbalance - save arrive time to the thread
|
||||||
if(__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
|
if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
|
||||||
this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
|
this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -97,7 +97,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid
|
||||||
USE_ITT_BUILD_ARG(itt_sync_obj) );
|
USE_ITT_BUILD_ARG(itt_sync_obj) );
|
||||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
// Barrier imbalance - write min of the thread time and the other thread time to the thread.
|
// Barrier imbalance - write min of the thread time and the other thread time to the thread.
|
||||||
if (__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
|
if (__kmp_forkjoin_frames_mode == 2) {
|
||||||
this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
|
this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
|
||||||
other_threads[i]->th.th_bar_min_time);
|
other_threads[i]->th.th_bar_min_time);
|
||||||
}
|
}
|
||||||
|
@ -234,7 +234,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
||||||
|
|
||||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
// Barrier imbalance - save arrive time to the thread
|
// Barrier imbalance - save arrive time to the thread
|
||||||
if(__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
|
if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
|
||||||
this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
|
this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -262,7 +262,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
||||||
USE_ITT_BUILD_ARG(itt_sync_obj) );
|
USE_ITT_BUILD_ARG(itt_sync_obj) );
|
||||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
// Barrier imbalance - write min of the thread time and a child time to the thread.
|
// Barrier imbalance - write min of the thread time and a child time to the thread.
|
||||||
if (__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
|
if (__kmp_forkjoin_frames_mode == 2) {
|
||||||
this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
|
this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
|
||||||
child_thr->th.th_bar_min_time);
|
child_thr->th.th_bar_min_time);
|
||||||
}
|
}
|
||||||
|
@ -432,7 +432,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
||||||
|
|
||||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
// Barrier imbalance - save arrive time to the thread
|
// Barrier imbalance - save arrive time to the thread
|
||||||
if(__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
|
if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
|
||||||
this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
|
this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -485,7 +485,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
||||||
USE_ITT_BUILD_ARG(itt_sync_obj) );
|
USE_ITT_BUILD_ARG(itt_sync_obj) );
|
||||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
// Barrier imbalance - write min of the thread time and a child time to the thread.
|
// Barrier imbalance - write min of the thread time and a child time to the thread.
|
||||||
if (__kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3) {
|
if (__kmp_forkjoin_frames_mode == 2) {
|
||||||
this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
|
this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
|
||||||
child_thr->th.th_bar_min_time);
|
child_thr->th.th_bar_min_time);
|
||||||
}
|
}
|
||||||
|
@ -1147,24 +1147,29 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
|
||||||
__kmp_itt_barrier_middle(gtid, itt_sync_obj);
|
__kmp_itt_barrier_middle(gtid, itt_sync_obj);
|
||||||
#endif /* USE_ITT_BUILD */
|
#endif /* USE_ITT_BUILD */
|
||||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
// Barrier - report frame end
|
// Barrier - report frame end (only if active_level == 1)
|
||||||
if (__itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode) {
|
if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
|
||||||
|
#if OMP_40_ENABLED
|
||||||
|
this_thr->th.th_teams_microtask == NULL &&
|
||||||
|
#endif
|
||||||
|
team->t.t_active_level == 1)
|
||||||
|
{
|
||||||
kmp_uint64 cur_time = __itt_get_timestamp();
|
kmp_uint64 cur_time = __itt_get_timestamp();
|
||||||
kmp_info_t **other_threads = this_thr->th.th_team->t.t_threads;
|
kmp_info_t **other_threads = team->t.t_threads;
|
||||||
int nproc = this_thr->th.th_team_nproc;
|
int nproc = this_thr->th.th_team_nproc;
|
||||||
int i;
|
int i;
|
||||||
// Initialize with master's wait time
|
|
||||||
kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
|
|
||||||
switch(__kmp_forkjoin_frames_mode) {
|
switch(__kmp_forkjoin_frames_mode) {
|
||||||
case 1:
|
case 1:
|
||||||
__kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
|
__kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
|
||||||
this_thr->th.th_frame_time = cur_time;
|
this_thr->th.th_frame_time = cur_time;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2: // AC 2015-01-19: currently does not work for hierarchical (to be fixed)
|
||||||
__kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
|
__kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
if( __itt_metadata_add_ptr ) {
|
if( __itt_metadata_add_ptr ) {
|
||||||
|
// Initialize with master's wait time
|
||||||
|
kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
|
||||||
for (i=1; i<nproc; ++i) {
|
for (i=1; i<nproc; ++i) {
|
||||||
delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
|
delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
|
||||||
}
|
}
|
||||||
|
@ -1413,14 +1418,17 @@ __kmp_join_barrier(int gtid)
|
||||||
|
|
||||||
# if USE_ITT_BUILD && USE_ITT_NOTIFY
|
# if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
// Join barrier - report frame end
|
// Join barrier - report frame end
|
||||||
if (__itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode) {
|
if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
|
||||||
|
#if OMP_40_ENABLED
|
||||||
|
this_thr->th.th_teams_microtask == NULL &&
|
||||||
|
#endif
|
||||||
|
team->t.t_active_level == 1)
|
||||||
|
{
|
||||||
kmp_uint64 cur_time = __itt_get_timestamp();
|
kmp_uint64 cur_time = __itt_get_timestamp();
|
||||||
ident_t * loc = team->t.t_ident;
|
ident_t * loc = team->t.t_ident;
|
||||||
kmp_info_t **other_threads = this_thr->th.th_team->t.t_threads;
|
kmp_info_t **other_threads = team->t.t_threads;
|
||||||
int nproc = this_thr->th.th_team_nproc;
|
int nproc = this_thr->th.th_team_nproc;
|
||||||
int i;
|
int i;
|
||||||
// Initialize with master's wait time
|
|
||||||
kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
|
|
||||||
switch(__kmp_forkjoin_frames_mode) {
|
switch(__kmp_forkjoin_frames_mode) {
|
||||||
case 1:
|
case 1:
|
||||||
__kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
|
__kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
|
||||||
|
@ -1430,6 +1438,8 @@ __kmp_join_barrier(int gtid)
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
if( __itt_metadata_add_ptr ) {
|
if( __itt_metadata_add_ptr ) {
|
||||||
|
// Initialize with master's wait time
|
||||||
|
kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
|
||||||
for (i=1; i<nproc; ++i) {
|
for (i=1; i<nproc; ++i) {
|
||||||
delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
|
delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
|
||||||
}
|
}
|
||||||
|
|
|
@ -535,27 +535,30 @@ __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
|
||||||
#if USE_ITT_BUILD
|
#if USE_ITT_BUILD
|
||||||
kmp_uint64 cur_time = 0;
|
kmp_uint64 cur_time = 0;
|
||||||
#if USE_ITT_NOTIFY
|
#if USE_ITT_NOTIFY
|
||||||
if( __itt_get_timestamp_ptr ) {
|
if ( __itt_get_timestamp_ptr ) {
|
||||||
cur_time = __itt_get_timestamp();
|
cur_time = __itt_get_timestamp();
|
||||||
}
|
}
|
||||||
#endif /* USE_ITT_NOTIFY */
|
#endif /* USE_ITT_NOTIFY */
|
||||||
// Report the barrier
|
if ( this_thr->th.th_team->t.t_level == 0
|
||||||
if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr ) {
|
#if OMP_40_ENABLED
|
||||||
if( this_thr->th.th_team->t.t_level == 0 ) {
|
&& this_thr->th.th_teams_microtask == NULL
|
||||||
__kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
|
#endif
|
||||||
}
|
) {
|
||||||
}
|
// Report the barrier
|
||||||
// Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
|
|
||||||
if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
|
|
||||||
{
|
|
||||||
this_thr->th.th_ident = loc;
|
this_thr->th.th_ident = loc;
|
||||||
__kmp_itt_region_joined( global_tid, 1 );
|
if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
|
||||||
}
|
( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
|
||||||
if ( ( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode == 3 ) || KMP_ITT_DEBUG )
|
{
|
||||||
{
|
__kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
|
||||||
this_thr->th.th_ident = loc;
|
cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
|
||||||
// Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
|
if ( __kmp_forkjoin_frames_mode == 3 )
|
||||||
__kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time, cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
|
// Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
|
||||||
|
__kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
|
||||||
|
cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
|
||||||
|
} else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
|
||||||
|
! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
|
||||||
|
// Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
|
||||||
|
__kmp_itt_region_joined( global_tid, 1 );
|
||||||
}
|
}
|
||||||
#endif /* USE_ITT_BUILD */
|
#endif /* USE_ITT_BUILD */
|
||||||
|
|
||||||
|
|
|
@ -633,6 +633,12 @@ __kmp_dispatch_init(
|
||||||
|
|
||||||
#if USE_ITT_BUILD
|
#if USE_ITT_BUILD
|
||||||
kmp_uint64 cur_chunk = chunk;
|
kmp_uint64 cur_chunk = chunk;
|
||||||
|
int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
|
||||||
|
KMP_MASTER_GTID(gtid) &&
|
||||||
|
#if OMP_40_ENABLED
|
||||||
|
th->th.th_teams_microtask == NULL &&
|
||||||
|
#endif
|
||||||
|
team->t.t_active_level == 1;
|
||||||
#endif
|
#endif
|
||||||
if ( ! active ) {
|
if ( ! active ) {
|
||||||
pr = reinterpret_cast< dispatch_private_info_template< T >* >
|
pr = reinterpret_cast< dispatch_private_info_template< T >* >
|
||||||
|
@ -869,9 +875,8 @@ __kmp_dispatch_init(
|
||||||
}
|
}
|
||||||
#if USE_ITT_BUILD
|
#if USE_ITT_BUILD
|
||||||
// Calculate chunk for metadata report
|
// Calculate chunk for metadata report
|
||||||
if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) {
|
if ( itt_need_metadata_reporting )
|
||||||
cur_chunk = limit - init + 1;
|
cur_chunk = limit - init + 1;
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
if ( st == 1 ) {
|
if ( st == 1 ) {
|
||||||
pr->u.p.lb = lb + init;
|
pr->u.p.lb = lb + init;
|
||||||
|
@ -1124,16 +1129,10 @@ __kmp_dispatch_init(
|
||||||
if ( pr->ordered ) {
|
if ( pr->ordered ) {
|
||||||
__kmp_itt_ordered_init( gtid );
|
__kmp_itt_ordered_init( gtid );
|
||||||
}; // if
|
}; // if
|
||||||
#endif /* USE_ITT_BUILD */
|
// Report loop metadata
|
||||||
}; // if
|
if ( itt_need_metadata_reporting ) {
|
||||||
|
// Only report metadata by master of active team at level 1
|
||||||
#if USE_ITT_BUILD
|
|
||||||
// Report loop metadata
|
|
||||||
if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) {
|
|
||||||
kmp_uint32 tid = __kmp_tid_from_gtid( gtid );
|
|
||||||
if (KMP_MASTER_TID(tid)) {
|
|
||||||
kmp_uint64 schedtype = 0;
|
kmp_uint64 schedtype = 0;
|
||||||
|
|
||||||
switch ( schedule ) {
|
switch ( schedule ) {
|
||||||
case kmp_sch_static_chunked:
|
case kmp_sch_static_chunked:
|
||||||
case kmp_sch_static_balanced:// Chunk is calculated in the switch above
|
case kmp_sch_static_balanced:// Chunk is calculated in the switch above
|
||||||
|
@ -1156,8 +1155,8 @@ __kmp_dispatch_init(
|
||||||
}
|
}
|
||||||
__kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
|
__kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif /* USE_ITT_BUILD */
|
#endif /* USE_ITT_BUILD */
|
||||||
|
}; // if
|
||||||
|
|
||||||
#ifdef KMP_DEBUG
|
#ifdef KMP_DEBUG
|
||||||
{
|
{
|
||||||
|
|
|
@ -814,6 +814,16 @@ __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
|
||||||
/* TODO: Should this be acquire or release? */
|
/* TODO: Should this be acquire or release? */
|
||||||
status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
|
status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
|
||||||
th->th.th_local.this_construct);
|
th->th.th_local.this_construct);
|
||||||
|
#if USE_ITT_BUILD
|
||||||
|
if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
|
||||||
|
#if OMP_40_ENABLED
|
||||||
|
th->th.th_teams_microtask == NULL &&
|
||||||
|
#endif
|
||||||
|
team->t.t_active_level == 1 )
|
||||||
|
{ // Only report metadata by master of active team at level 1
|
||||||
|
__kmp_itt_metadata_single( id_ref );
|
||||||
|
}
|
||||||
|
#endif /* USE_ITT_BUILD */
|
||||||
}
|
}
|
||||||
|
|
||||||
if( __kmp_env_consistency_check ) {
|
if( __kmp_env_consistency_check ) {
|
||||||
|
@ -827,10 +837,6 @@ __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
|
||||||
if ( status ) {
|
if ( status ) {
|
||||||
__kmp_itt_single_start( gtid );
|
__kmp_itt_single_start( gtid );
|
||||||
}
|
}
|
||||||
if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid)) {
|
|
||||||
__kmp_itt_metadata_single( id_ref );
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* USE_ITT_BUILD */
|
#endif /* USE_ITT_BUILD */
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
@ -1420,22 +1426,26 @@ __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
|
||||||
|
|
||||||
#if USE_ITT_BUILD
|
#if USE_ITT_BUILD
|
||||||
// Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment
|
// Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment
|
||||||
if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
|
if ( serial_team->t.t_level == 1
|
||||||
{
|
#if OMP_40_ENABLED
|
||||||
this_thr->th.th_ident = loc;
|
&& this_thr->th.th_teams_microtask == NULL
|
||||||
// 0 - no barriers; 1 - serialized parallel
|
|
||||||
__kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 );
|
|
||||||
}
|
|
||||||
// Save the start of the "parallel" region for VTune. This is the join barrier begin at the same time.
|
|
||||||
if( ( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) &&
|
|
||||||
__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr ) || KMP_ITT_DEBUG )
|
|
||||||
{
|
|
||||||
this_thr->th.th_ident = loc;
|
|
||||||
#if USE_ITT_NOTIFY
|
|
||||||
if( this_thr->th.th_team->t.t_level == 1 ) {
|
|
||||||
serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
) {
|
||||||
|
#if USE_ITT_NOTIFY
|
||||||
|
// Save the start of the "parallel" region for VTune. This is the frame begin at the same time.
|
||||||
|
if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) &&
|
||||||
|
( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
|
||||||
|
{
|
||||||
|
serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
|
||||||
|
} else // only one notification scheme (either "submit" or "forking/joined", not both)
|
||||||
|
#endif
|
||||||
|
if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
|
||||||
|
__kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode )
|
||||||
|
{
|
||||||
|
this_thr->th.th_ident = loc;
|
||||||
|
// 0 - no barriers; 1 - serialized parallel
|
||||||
|
__kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif /* USE_ITT_BUILD */
|
#endif /* USE_ITT_BUILD */
|
||||||
}
|
}
|
||||||
|
@ -2137,36 +2147,30 @@ __kmp_fork_call(
|
||||||
|
|
||||||
|
|
||||||
#if USE_ITT_BUILD
|
#if USE_ITT_BUILD
|
||||||
// Mark start of "parallel" region for VTune. Only use one of frame notification scheme at the moment.
|
if ( team->t.t_active_level == 1 // only report frames at level 1
|
||||||
if ((__itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) || KMP_ITT_DEBUG) {
|
|
||||||
# if OMP_40_ENABLED
|
# if OMP_40_ENABLED
|
||||||
if (!master_th->th.th_teams_microtask || microtask == (microtask_t)__kmp_teams_master)
|
&& !master_th->th.th_teams_microtask // not in teams construct
|
||||||
// Either not in teams or the outer fork of the teams construct
|
|
||||||
# endif /* OMP_40_ENABLED */
|
# endif /* OMP_40_ENABLED */
|
||||||
|
) {
|
||||||
|
#if USE_ITT_NOTIFY
|
||||||
|
if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
|
||||||
|
( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
|
||||||
{
|
{
|
||||||
|
kmp_uint64 tmp_time = 0;
|
||||||
|
if ( __itt_get_timestamp_ptr )
|
||||||
|
tmp_time = __itt_get_timestamp();
|
||||||
|
// Internal fork - report frame begin
|
||||||
|
master_th->th.th_frame_time = tmp_time;
|
||||||
|
if ( __kmp_forkjoin_frames_mode == 3 )
|
||||||
|
team->t.t_region_time = tmp_time;
|
||||||
|
} else // only one notification scheme (either "submit" or "forking/joined", not both)
|
||||||
|
#endif /* USE_ITT_NOTIFY */
|
||||||
|
if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
|
||||||
|
__kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
|
||||||
|
{ // Mark start of "parallel" region for VTune.
|
||||||
__kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
|
__kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if USE_ITT_NOTIFY
|
|
||||||
kmp_uint64 tmp_time = 0;
|
|
||||||
if (((__kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3) && __itt_frame_submit_v3_ptr) || KMP_ITT_DEBUG) {
|
|
||||||
if (!(team->t.t_active_level > 1)) {
|
|
||||||
# if OMP_40_ENABLED
|
|
||||||
if (!master_th->th.th_teams_microtask || microtask == (microtask_t)__kmp_teams_master) {
|
|
||||||
// Either not in teams or the outer fork of the teams construct
|
|
||||||
# endif /* OMP_40_ENABLED */
|
|
||||||
if ( __itt_get_timestamp_ptr )
|
|
||||||
tmp_time = __itt_get_timestamp();
|
|
||||||
// Internal fork - report frame begin
|
|
||||||
master_th->th.th_frame_time = tmp_time;
|
|
||||||
if ( __kmp_forkjoin_frames_mode==3 )
|
|
||||||
team->t.t_region_time = tmp_time;
|
|
||||||
# if OMP_40_ENABLED
|
|
||||||
}
|
|
||||||
# endif /* OMP_40_ENABLED */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* USE_ITT_NOTIFY */
|
|
||||||
#endif /* USE_ITT_BUILD */
|
#endif /* USE_ITT_BUILD */
|
||||||
|
|
||||||
/* now go on and do the work */
|
/* now go on and do the work */
|
||||||
|
@ -2342,31 +2346,21 @@ __kmp_join_call(ident_t *loc, int gtid
|
||||||
__kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
|
__kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark end of "parallel" region for VTune. Only use one of frame notification scheme at the moment.
|
// Mark end of "parallel" region for VTune.
|
||||||
if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG ) {
|
if ( team->t.t_active_level == 1
|
||||||
# if OMP_40_ENABLED
|
# if OMP_40_ENABLED
|
||||||
if ( !master_th->th.th_teams_microtask /* not in teams */ ||
|
&& !master_th->th.th_teams_microtask /* not in teams construct */
|
||||||
( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) )
|
|
||||||
// Either not in teams or exiting teams region
|
|
||||||
// (teams is a frame and no other frames inside the teams)
|
|
||||||
# endif /* OMP_40_ENABLED */
|
# endif /* OMP_40_ENABLED */
|
||||||
{
|
) {
|
||||||
master_th->th.th_ident = loc;
|
master_th->th.th_ident = loc;
|
||||||
__kmp_itt_region_joined( gtid );
|
// only one notification scheme (either "submit" or "forking/joined", not both)
|
||||||
}
|
if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
|
||||||
}
|
__kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
|
||||||
if ( ( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode == 3 ) || KMP_ITT_DEBUG ) {
|
0, loc, master_th->th.th_team_nproc, 1 );
|
||||||
# if OMP_40_ENABLED
|
else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
|
||||||
if ( !master_th->th.th_teams_microtask /* not in teams */ ||
|
! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
|
||||||
( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) )
|
__kmp_itt_region_joined( gtid );
|
||||||
// Either not in teams or exiting teams region
|
} // active_level == 1
|
||||||
// (teams is a frame and no other frames inside the teams)
|
|
||||||
# endif /* OMP_40_ENABLED */
|
|
||||||
{
|
|
||||||
master_th->th.th_ident = loc;
|
|
||||||
__kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, 0, loc, master_th->th.th_team_nproc, 1 );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* USE_ITT_BUILD */
|
#endif /* USE_ITT_BUILD */
|
||||||
|
|
||||||
#if OMP_40_ENABLED
|
#if OMP_40_ENABLED
|
||||||
|
|
|
@ -92,6 +92,7 @@ __kmp_for_static_init(
|
||||||
register kmp_uint32 nth;
|
register kmp_uint32 nth;
|
||||||
register UT trip_count;
|
register UT trip_count;
|
||||||
register kmp_team_t *team;
|
register kmp_team_t *team;
|
||||||
|
register kmp_info_t *th = __kmp_threads[ gtid ];
|
||||||
|
|
||||||
#if OMPT_SUPPORT && OMPT_TRACE
|
#if OMPT_SUPPORT && OMPT_TRACE
|
||||||
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
|
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
|
||||||
|
@ -157,13 +158,13 @@ __kmp_for_static_init(
|
||||||
if ( schedtype > kmp_ord_upper ) {
|
if ( schedtype > kmp_ord_upper ) {
|
||||||
// we are in DISTRIBUTE construct
|
// we are in DISTRIBUTE construct
|
||||||
schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
|
schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
|
||||||
tid = __kmp_threads[ gtid ]->th.th_team->t.t_master_tid;
|
tid = th->th.th_team->t.t_master_tid;
|
||||||
team = __kmp_threads[ gtid ]->th.th_team->t.t_parent;
|
team = th->th.th_team->t.t_parent;
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
tid = __kmp_tid_from_gtid( global_tid );
|
tid = __kmp_tid_from_gtid( global_tid );
|
||||||
team = __kmp_threads[ gtid ]->th.th_team;
|
team = th->th.th_team;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* determine if "for" loop is an active worksharing construct */
|
/* determine if "for" loop is an active worksharing construct */
|
||||||
|
@ -318,7 +319,12 @@ __kmp_for_static_init(
|
||||||
|
|
||||||
#if USE_ITT_BUILD
|
#if USE_ITT_BUILD
|
||||||
// Report loop metadata
|
// Report loop metadata
|
||||||
if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) {
|
if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
|
||||||
|
#if OMP_40_ENABLED
|
||||||
|
th->th.th_teams_microtask == NULL &&
|
||||||
|
#endif
|
||||||
|
team->t.t_active_level == 1 )
|
||||||
|
{
|
||||||
kmp_uint64 cur_chunk = chunk;
|
kmp_uint64 cur_chunk = chunk;
|
||||||
// Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
|
// Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
|
||||||
if ( schedtype == kmp_sch_static ) {
|
if ( schedtype == kmp_sch_static ) {
|
||||||
|
|
Loading…
Reference in New Issue