Refactor of task_team code.

This is a refactoring of the task_team code that more elegantly handles the two
task_team case. Two task_teams per team are kept in use for the lifetime of the
team. Thus no reference counting is needed.

Differential Revision: http://reviews.llvm.org/D13993

llvm-svn: 252082
This commit is contained in:
Jonathan Peyton 2015-11-04 21:37:48 +00:00
parent d4304d2f9c
commit 54127981be
5 changed files with 131 additions and 242 deletions

View File

@ -2100,14 +2100,6 @@ typedef struct kmp_base_task_team {
KMP_ALIGN_CACHE
volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */
KMP_ALIGN_CACHE
#if KMP_USE_INTERNODE_ALIGNMENT
kmp_int32 tt_padme[INTERNODE_CACHE_LINE/sizeof(kmp_int32)];
#endif
volatile kmp_uint32 tt_ref_ct; /* #threads accessing struct */
/* (not incl. master) */
} kmp_base_task_team_t;
union KMP_ALIGN_CACHE kmp_task_team {
@ -3172,15 +3164,16 @@ int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_onco
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team );
extern void __kmp_reap_task_teams( void );
extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread );
extern void __kmp_wait_to_unref_task_teams( void );
extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int both, int always );
extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always );
extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team );
extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team
#if USE_ITT_BUILD
, void * itt_sync_obj
#endif /* USE_ITT_BUILD */
, int wait=1
);
extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid );

View File

@ -1153,7 +1153,7 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
if (__kmp_tasking_mode != tskm_immediate_exec) {
__kmp_task_team_wait(this_thr, team
USE_ITT_BUILD_ARG(itt_sync_obj) );
__kmp_task_team_setup(this_thr, team, 0, 0); // use 0,0 to only setup the current team if nthreads > 1
__kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team if nthreads > 1
}
#if USE_DEBUGGER
// Let the debugger know: All threads are arrived and starting leaving the barrier.
@ -1261,7 +1261,7 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE);
__kmp_task_team_wait(this_thr, team
USE_ITT_BUILD_ARG(itt_sync_obj));
__kmp_task_team_setup(this_thr, team, 0, 0);
__kmp_task_team_setup(this_thr, team, 0);
#if USE_ITT_BUILD
if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
@ -1575,7 +1575,7 @@ __kmp_fork_barrier(int gtid, int tid)
#endif
if (__kmp_tasking_mode != tskm_immediate_exec) {
__kmp_task_team_setup(this_thr, team, 1, 0); // 1,0 indicates setup both task teams if nthreads > 1
__kmp_task_team_setup(this_thr, team, 0); // 0 indicates setup current task team if nthreads > 1
}
/* The master thread may have changed its blocktime between the join barrier and the
@ -1614,14 +1614,7 @@ __kmp_fork_barrier(int gtid, int tid)
// Early exit for reaping threads releasing forkjoin barrier
if (TCR_4(__kmp_global.g.g_done)) {
if (this_thr->th.th_task_team != NULL) {
if (KMP_MASTER_TID(tid)) {
TCW_PTR(this_thr->th.th_task_team, NULL);
}
else {
__kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
}
}
this_thr->th.th_task_team = NULL;
#if USE_ITT_BUILD && USE_ITT_NOTIFY
if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {

View File

@ -2104,23 +2104,31 @@ __kmp_fork_call(
// Take a memo of master's task_state
KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
kmp_uint8 *old_stack, *new_stack = (kmp_uint8 *) __kmp_allocate( 2*master_th->th.th_task_state_stack_sz );
kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
kmp_uint8 *old_stack, *new_stack;
kmp_uint32 i;
new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
new_stack[i] = master_th->th.th_task_state_memo_stack[i];
}
for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
new_stack[i] = 0;
}
old_stack = master_th->th.th_task_state_memo_stack;
master_th->th.th_task_state_memo_stack = new_stack;
master_th->th.th_task_state_stack_sz *= 2;
master_th->th.th_task_state_stack_sz = new_size;
__kmp_free(old_stack);
}
// Store master's task_state on stack
master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
master_th->th.th_task_state_top++;
master_th->th.th_task_state = 0;
if (team == master_th->th.th_hot_teams[level].hot_team) { // Restore master's nested state if nested hot team
master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
}
else {
master_th->th.th_task_state = 0;
}
}
master_th->th.th_task_team = team->t.t_task_team[master_th->th.th_task_state];
#if !KMP_NESTED_HOT_TEAMS
KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
#endif
@ -2410,12 +2418,7 @@ __kmp_join_call(ident_t *loc, int gtid
int old_num = master_th->th.th_team_nproc;
int new_num = master_th->th.th_teams_size.nth;
kmp_info_t **other_threads = team->t.t_threads;
kmp_task_team_t * task_team = master_th->th.th_task_team;
team->t.t_nproc = new_num;
if ( task_team ) { // task team might have lesser value of counters
task_team->tt.tt_ref_ct = new_num - 1;
task_team->tt.tt_unfinished_threads = new_num;
}
for ( i = 0; i < old_num; ++i ) {
other_threads[i]->th.th_team_nproc = new_num;
}
@ -2509,18 +2512,18 @@ __kmp_join_call(ident_t *loc, int gtid
}
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
// Restore task state from memo stack
KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
if (master_th->th.th_task_state_top > 0) {
if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
// Remember master's state if we re-use this nested hot team
master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
--master_th->th.th_task_state_top; // pop
// Now restore state at this level
master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
}
// Copy the first task team from the new child / old parent team to the thread and reset state flag.
// Copy the task team from the parent team to the master thread
master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
__kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
parent_team ) );
__kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
}
// TODO: GEH - cannot do this assertion because root thread not set up as executing
@ -2615,31 +2618,13 @@ __kmp_set_num_threads( int new_nth, int gtid )
__kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
int tt_idx;
for (tt_idx=0; tt_idx<2; ++tt_idx) {
kmp_task_team_t *task_team = hot_team->t.t_task_team[tt_idx];
if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
// Signal worker threads (esp. the extra ones) to stop looking for tasks while spin waiting.
// The task teams are reference counted and will be deallocated by the last worker thread.
KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
TCW_SYNC_4( task_team->tt.tt_active, FALSE );
KMP_MB();
KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n",
&hot_team->t.t_task_team[tt_idx] ) );
hot_team->t.t_task_team[tt_idx] = NULL;
}
else {
KMP_DEBUG_ASSERT( task_team == NULL );
}
}
}
//
// Release the extra threads we don't need any more.
//
for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
if ( __kmp_tasking_mode != tskm_immediate_exec) {
// When decreasing team size, threads no longer in the team should unref task team.
hot_team->t.t_threads[f]->th.th_task_team = NULL;
}
__kmp_free_thread( hot_team->t.t_threads[f] );
hot_team->t.t_threads[f] = NULL;
}
@ -4081,7 +4066,6 @@ __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid
TCW_PTR(this_thr->th.th_sleep_loc, NULL);
KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
this_thr->th.th_task_state = 0;
KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
tid, gtid, this_thr, this_thr->th.th_current_task ) );
@ -4151,9 +4135,12 @@ __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid
this_thr->th.th_next_pool = NULL;
if (!this_thr->th.th_task_state_memo_stack) {
size_t i;
this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
this_thr->th.th_task_state_top = 0;
this_thr->th.th_task_state_stack_sz = 4;
for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
this_thr->th.th_task_state_memo_stack[i] = 0;
}
KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
@ -4211,6 +4198,7 @@ __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
TCW_4(__kmp_nth, __kmp_nth + 1);
new_thr->th.th_task_state = 0;
new_thr->th.th_task_state_top = 0;
new_thr->th.th_task_state_stack_sz = 4;
@ -4896,26 +4884,6 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
team->t.t_size_changed = 1;
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
// Signal the worker threads (esp. extra ones) to stop looking for tasks while spin waiting.
// The task teams are reference counted and will be deallocated by the last worker thread.
int tt_idx;
for (tt_idx=0; tt_idx<2; ++tt_idx) {
// We don't know which of the two task teams workers are waiting on, so deactivate both.
kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
TCW_SYNC_4( task_team->tt.tt_active, FALSE );
KMP_MB();
KA_TRACE(20, ("__kmp_allocate_team: setting task_team %p to NULL\n",
&team->t.t_task_team[tt_idx]));
team->t.t_task_team[tt_idx] = NULL;
}
else {
KMP_DEBUG_ASSERT( task_team == NULL );
}
}
}
#if KMP_NESTED_HOT_TEAMS
if( __kmp_hot_teams_mode == 0 ) {
// AC: saved number of threads should correspond to team's value in this mode,
@ -4926,6 +4894,10 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
/* release the extra threads we don't need any more */
for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
if ( __kmp_tasking_mode != tskm_immediate_exec) {
// When decreasing team size, threads no longer in the team should unref task team.
team->t.t_threads[f]->th.th_task_team = NULL;
}
__kmp_free_thread( team->t.t_threads[ f ] );
team->t.t_threads[ f ] = NULL;
}
@ -4937,32 +4909,9 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
team->t.t_sched = new_icvs->sched;
__kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
// Init both task teams
int tt_idx;
for (tt_idx=0; tt_idx<2; ++tt_idx) {
kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
if ( task_team != NULL ) {
KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
task_team->tt.tt_nproc = new_nproc;
task_team->tt.tt_unfinished_threads = new_nproc;
task_team->tt.tt_ref_ct = new_nproc - 1;
}
}
}
/* update the remaining threads */
if (level) {
team->t.t_threads[0]->th.th_team_nproc = new_nproc;
for(f = 1; f < new_nproc; ++f) {
team->t.t_threads[f]->th.th_team_nproc = new_nproc;
team->t.t_threads[f]->th.th_task_state = 0;
}
}
else {
for(f = 0; f < new_nproc; ++f) {
team->t.t_threads[f]->th.th_team_nproc = new_nproc;
}
for(f = 0; f < new_nproc; ++f) {
team->t.t_threads[f]->th.th_team_nproc = new_nproc;
}
// restore the current task state of the master thread: should be the implicit task
KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
@ -5076,39 +5025,24 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
} // end of check of t_nproc vs. new_nproc vs. hot_team_nth
#endif // KMP_NESTED_HOT_TEAMS
/* make sure everyone is syncronized */
int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
__kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
// Signal the worker threads to stop looking for tasks while spin waiting.
// The task teams are reference counted and will be deallocated by the last worker thread.
int tt_idx;
for (tt_idx=0; tt_idx<2; ++tt_idx) {
// We don't know which of the two task teams workers are waiting on, so deactivate both.
kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
if ( (task_team != NULL) && TCR_SYNC_4(task_team->tt.tt_active) ) {
TCW_SYNC_4( task_team->tt.tt_active, FALSE );
team->t.t_task_team[tt_idx] = NULL;
}
}
}
/* reinitialize the threads */
KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
if (level) {
int old_state = team->t.t_threads[0]->th.th_task_state;
for (f=0; f < team->t.t_nproc; ++f)
__kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
// th_task_state for master thread will be put in stack of states in __kmp_fork_call()
// before zeroing, for workers it was just zeroed in __kmp_initialize_info()
team->t.t_threads[0]->th.th_task_state = old_state;
for (f=0; f < team->t.t_nproc; ++f)
__kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
if (level) { // set th_task_state for new threads in nested hot team
// __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
// th_task_state for the new threads. th_task_state for master thread will not be accurate until
// after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
for (f=old_nproc; f < team->t.t_nproc; ++f)
team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
}
else {
int old_state = team->t.t_threads[0]->th.th_task_state;
for (f=0; f<team->t.t_nproc; ++f) {
__kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
else { // set th_task_state for new threads in non-nested hot team
int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
for (f=old_nproc; f < team->t.t_nproc; ++f)
team->t.t_threads[f]->th.th_task_state = old_state;
team->t.t_threads[f]->th.th_task_team = team->t.t_task_team[old_state];
}
}
#ifdef KMP_DEBUG
@ -5342,18 +5276,17 @@ __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info
/* if we are non-hot team, release our threads */
if( ! use_hot_team ) {
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
// Delete task teams
int tt_idx;
for (tt_idx=0; tt_idx<2; ++tt_idx) {
// We don't know which of the two task teams workers are waiting on, so deactivate both.
kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
if ( task_team != NULL ) {
// Signal the worker threads to stop looking for tasks while spin waiting. The task
// teams are reference counted and will be deallocated by the last worker thread via the
// thread's pointer to the task team.
KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n", task_team ) );
for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
team->t.t_threads[f]->th.th_task_team = NULL;
}
KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
TCW_SYNC_4( task_team->tt.tt_active, FALSE );
KMP_MB();
__kmp_free_task_team( master, task_team );
team->t.t_task_team[tt_idx] = NULL;
}
}
@ -5452,6 +5385,7 @@ __kmp_free_thread( kmp_info_t *this_th )
balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
balign[b].bb.team = NULL;
}
this_th->th.th_task_state = 0;
/* put thread back on the free pool */
@ -5622,9 +5556,7 @@ __kmp_launch_thread( kmp_info_t *this_thr )
}
#endif
if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
__kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
}
this_thr->th.th_task_team = NULL;
/* run the destructors for the threadprivate data for this thread */
__kmp_common_destroy_gtid( gtid );
@ -6120,10 +6052,7 @@ __kmp_internal_end_thread( int gtid_req )
KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
if ( gtid >= 0 ) {
kmp_info_t *this_thr = __kmp_threads[ gtid ];
if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
__kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
}
__kmp_threads[gtid]->th.th_task_team = NULL;
}
KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));

View File

@ -895,7 +895,7 @@ __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
*/
KMP_DEBUG_ASSERT(team->t.t_serialized);
KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
__kmp_task_team_setup(thread,team,0,1); // 0,1 indicates only setup the current team regardless of nthreads
__kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
}
kmp_task_team_t * task_team = thread->th.th_task_team;
@ -1297,8 +1297,7 @@ __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
kmp_info_t * thread;
int thread_finished = FALSE;
KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
gtid, loc_ref) );
KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
// GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
@ -1688,7 +1687,7 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti
KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
task_team = thread -> th.th_task_team;
KMP_DEBUG_ASSERT( task_team != NULL );
if (task_team == NULL) return FALSE;
KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
gtid, final_spin, *thread_finished) );
@ -1732,6 +1731,7 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti
KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
return TRUE;
}
if (thread->th.th_task_team == NULL) break;
KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
}
@ -1767,6 +1767,7 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti
}
}
if (thread->th.th_task_team == NULL) return FALSE;
#if OMP_41_ENABLED
// check if there are other threads to steal from, otherwise go back
if ( nthreads == 1 )
@ -1805,6 +1806,7 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti
return TRUE;
}
if (thread->th.th_task_team == NULL) break;
KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
// If the execution of the stolen task resulted in more tasks being
// placed on our run queue, then restart the whole process.
@ -1851,6 +1853,7 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti
return TRUE;
}
}
if (thread->th.th_task_team == NULL) return FALSE;
}
// Find a different thread to steal work from. Pick a random thread.
@ -1919,6 +1922,7 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti
gtid) );
return TRUE;
}
if (thread->th.th_task_team == NULL) break;
KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
// If the execution of the stolen task resulted in more tasks being
@ -1966,6 +1970,7 @@ static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gti
return TRUE;
}
}
if (thread->th.th_task_team == NULL) return FALSE;
}
KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
@ -2350,10 +2355,9 @@ __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
TCW_4( task_team -> tt.tt_active, TRUE );
TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
(thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
(thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
return task_team;
}
@ -2362,16 +2366,13 @@ __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
// __kmp_free_task_team:
// Frees the task team associated with a specific thread, and adds it
// to the global task team free list.
//
static void
void
__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
{
KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
// Put task team back on free list
__kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
@ -2412,32 +2413,6 @@ __kmp_reap_task_teams( void )
}
}
//------------------------------------------------------------------------------
// __kmp_unref_task_teams:
// Remove one thread from referencing the task team structure by
// decreasing the reference count and deallocate task team if no more
// references to it.
//
void
__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
{
kmp_uint ref_ct;
ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
__kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
if ( ref_ct == 0 ) {
__kmp_free_task_team( thread, task_team );
}
TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
}
//------------------------------------------------------------------------------
// __kmp_wait_to_unref_task_teams:
// Some threads could still be in the fork barrier release code, possibly
@ -2475,9 +2450,7 @@ __kmp_wait_to_unref_task_teams(void)
#if KMP_OS_WINDOWS
// TODO: GEH - add this check for Linux* OS / OS X* as well?
if (!__kmp_is_thread_alive(thread, &exit_val)) {
if (TCR_PTR(thread->th.th_task_team) != NULL) {
__kmp_unref_task_team( thread->th.th_task_team, thread );
}
thread->th.th_task_team = NULL;
continue;
}
#endif
@ -2517,34 +2490,46 @@ __kmp_wait_to_unref_task_teams(void)
// an already created, unused one if it already exists.
// This may be called by any thread, but only for teams with # threads >1.
void
__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both, int always )
__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
{
KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
// Allocate a new task team, which will be propagated to
// all of the worker threads after the barrier. As they
// spin in the barrier release phase, then will continue
// to use the previous task team struct, until they receive
// the signal to stop checking for tasks (they can't safely
// reference the kmp_team_t struct, which could be reallocated
// by the master thread).
// If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
// If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
__kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
}
// else: Either all threads have reported in, and no tasks were spawned for this release->gather region
// Leave the old task team struct in place for the upcoming region.
// No task teams are formed for serialized teams.
if (both) {
int other_team = 1 - this_thr->th.th_task_state;
if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
__kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
((team != NULL) ? team->t.t_id : -1), other_team ));
// After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
// allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
// previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
// reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
// serialized teams.
int other_team = 1 - this_thr->th.th_task_state;
if (team->t.t_task_team[other_team] == NULL && team->t.t_nproc > 1) { // setup other team as well
team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
__kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
((team != NULL) ? team->t.t_id : -1), other_team ));
}
else { // Leave the old task team struct in place for the upcoming region; adjust as needed
kmp_task_team_t *task_team = team->t.t_task_team[other_team];
if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
TCW_4(task_team->tt.tt_found_tasks, FALSE);
#if OMP_41_ENABLED
TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
#endif
TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
TCW_4(task_team->tt.tt_active, TRUE );
}
// if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
__kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
((team != NULL) ? team->t.t_id : -1), other_team ));
}
}
@ -2559,26 +2544,11 @@ __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
{
KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
// In case this thread never saw that the task team was no longer active, unref/deallocate it now.
if ( this_thr->th.th_task_team != NULL ) {
if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team (%p)is not active, unrefing\n",
__kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team));
__kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
}
#if KMP_DEBUG
else { // We are re-using a task team that was never enabled.
KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
}
#endif
}
// Toggle the th_task_state field, to switch which task_team this thread refers to
this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
// It is now safe to propagate the task team pointer from the team struct to the current thread.
TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to %p from Team #%d task team (parity=%d)\n",
KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
__kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
}
@ -2586,11 +2556,14 @@ __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
//--------------------------------------------------------------------------------------------
// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created
// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
// optionally as the last argument. When wait is zero, master thread does not wait for
// unfinished_threads to reach 0.
void
__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
USE_ITT_BUILD_ARG(void * itt_sync_obj)
)
, int wait)
{
kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
@ -2598,18 +2571,18 @@ __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
__kmp_gtid_from_thread(this_thr), task_team));
// Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
// here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
flag.wait(this_thr, TRUE
USE_ITT_BUILD_ARG(itt_sync_obj));
// Kill the old task team, so that the worker threads will stop referencing it while spinning.
// They will deallocate it when the reference count reaches zero.
// The master thread is not included in the ref count.
KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: setting active to false, setting local and team's pointer to NULL\n",
if (wait) {
KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
__kmp_gtid_from_thread(this_thr), task_team));
// Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
// here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
flag.wait(this_thr, TRUE
USE_ITT_BUILD_ARG(itt_sync_obj));
}
// Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
"setting active to false, setting local and team's pointer to NULL\n",
__kmp_gtid_from_thread(this_thr), task_team));
#if OMP_41_ENABLED
KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
@ -2621,7 +2594,6 @@ __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
KMP_MB();
TCW_PTR(this_thr->th.th_task_team, NULL);
team->t.t_task_team[this_thr->th.th_task_state] = NULL;
}
}

View File

@ -178,12 +178,14 @@ static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_
if (__kmp_tasking_mode != tskm_immediate_exec) {
task_team = this_thr->th.th_task_team;
if (task_team != NULL) {
if (!TCR_SYNC_4(task_team->tt.tt_active)) {
if (TCR_SYNC_4(task_team->tt.tt_active)) {
if (KMP_TASKING_ENABLED(task_team))
flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
USE_ITT_BUILD_ARG(itt_sync_obj), 0);
}
else {
KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
__kmp_unref_task_team(task_team, this_thr);
} else if (KMP_TASKING_ENABLED(task_team)) {
flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
USE_ITT_BUILD_ARG(itt_sync_obj), 0);
this_thr->th.th_task_team = NULL;
}
} // if
} // if