forked from OSchip/llvm-project
Tidy statistics collection
This removes some statistics counters and timers which were not used, adds new counters and timers for some language features that were not monitored previously and separates the counters and timers into those which are of interest for investigating user code and those which are only of interest to the developer of the runtime itself. The runtime developer statistics are now ony collected if the additional #define KMP_DEVELOPER_STATS is set. Additional user statistics which are now collected include: * Count of nested parallelism (omp parallel inside a parallel region) * Count of omp distribute occurrences * Count of omp teams occurrences * Counts of task related statistics (taskyield, task execution, task cancellation, task steal) * Values passed to omp_set_numtheads * Time spent in omp single and omp master None of this affects code compiled without stats gathering enabled, which is the normal library build mode. This also fixes the CMake build by linking to the standard c++ library when building the stats library as it is a requirement. The normal library does not have this requirement and its link phase is left alone. Differential Revision: http://reviews.llvm.org/D11759 llvm-svn: 244677
This commit is contained in:
parent
827529e7a0
commit
45be450070
|
@ -254,6 +254,10 @@ set(LIBOMP_STATS FALSE CACHE BOOL
|
|||
if(LIBOMP_STATS AND (NOT LIBOMP_HAVE_STATS))
|
||||
libomp_error_say("Stats-gathering functionality requested but not available")
|
||||
endif()
|
||||
# The stats functionality requires the std c++ library
|
||||
if(LIBOMP_STATS)
|
||||
set(LIBOMP_USE_STDCPPLIB TRUE)
|
||||
endif()
|
||||
|
||||
# OMPT-support
|
||||
# TODO: Make this a real feature check
|
||||
|
|
|
@ -149,7 +149,10 @@ endif()
|
|||
# Remove any cmake-automatic linking of the standard C++ library.
|
||||
# We neither need (nor want) the standard C++ library dependency even though we compile c++ files.
|
||||
if(NOT ${LIBOMP_USE_STDCPPLIB})
|
||||
set(LIBOMP_LINKER_LANGUAGE C)
|
||||
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES)
|
||||
else()
|
||||
set(LIBOMP_LINKER_LANGUAGE CXX)
|
||||
endif()
|
||||
|
||||
# Add the OpenMP library
|
||||
|
@ -158,7 +161,7 @@ add_library(omp SHARED ${LIBOMP_SOURCE_FILES})
|
|||
set_target_properties(omp PROPERTIES
|
||||
PREFIX "" SUFFIX "" OUTPUT_NAME "${LIBOMP_LIB_FILE}"
|
||||
LINK_FLAGS "${LIBOMP_CONFIGURED_LDFLAGS}"
|
||||
LINKER_LANGUAGE C # use C Compiler for linking step
|
||||
LINKER_LANGUAGE ${LIBOMP_LINKER_LANGUAGE}
|
||||
SKIP_BUILD_RPATH true # have Mac linker -install_name just be "-install_name libomp.dylib"
|
||||
)
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid
|
|||
void (*reduce)(void *, void *)
|
||||
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_linear_gather);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
|
||||
register kmp_team_t *team = this_thr->th.th_team;
|
||||
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
|
||||
register kmp_info_t **other_threads = team->t.t_threads;
|
||||
|
@ -123,7 +123,7 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti
|
|||
int propagate_icvs
|
||||
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_linear_release);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
|
||||
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
|
||||
register kmp_team_t *team;
|
||||
|
||||
|
@ -141,17 +141,18 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti
|
|||
|
||||
if (nproc > 1) {
|
||||
#if KMP_BARRIER_ICV_PUSH
|
||||
KMP_START_EXPLICIT_TIMER(USER_icv_copy);
|
||||
if (propagate_icvs) {
|
||||
ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
|
||||
for (i=1; i<nproc; ++i) {
|
||||
__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
|
||||
ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
|
||||
&team->t.t_implicit_task_taskdata[0].td_icvs);
|
||||
{
|
||||
KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
|
||||
if (propagate_icvs) {
|
||||
ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
|
||||
for (i=1; i<nproc; ++i) {
|
||||
__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
|
||||
ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
|
||||
&team->t.t_implicit_task_taskdata[0].td_icvs);
|
||||
}
|
||||
ngo_sync();
|
||||
}
|
||||
ngo_sync();
|
||||
}
|
||||
KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
|
||||
#endif // KMP_BARRIER_ICV_PUSH
|
||||
|
||||
// Now, release all of the worker threads
|
||||
|
@ -217,7 +218,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
|||
void (*reduce)(void *, void *)
|
||||
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_tree_gather);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
|
||||
register kmp_team_t *team = this_thr->th.th_team;
|
||||
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
|
||||
register kmp_info_t **other_threads = team->t.t_threads;
|
||||
|
@ -312,7 +313,7 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
|||
int propagate_icvs
|
||||
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_tree_release);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
|
||||
register kmp_team_t *team;
|
||||
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
|
||||
register kmp_uint32 nproc;
|
||||
|
@ -381,14 +382,15 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
|||
#endif /* KMP_CACHE_MANAGE */
|
||||
|
||||
#if KMP_BARRIER_ICV_PUSH
|
||||
KMP_START_EXPLICIT_TIMER(USER_icv_copy);
|
||||
if (propagate_icvs) {
|
||||
__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
|
||||
team, child_tid, FALSE);
|
||||
copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
|
||||
&team->t.t_implicit_task_taskdata[0].td_icvs);
|
||||
{
|
||||
KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
|
||||
if (propagate_icvs) {
|
||||
__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
|
||||
team, child_tid, FALSE);
|
||||
copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
|
||||
&team->t.t_implicit_task_taskdata[0].td_icvs);
|
||||
}
|
||||
}
|
||||
KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
|
||||
#endif // KMP_BARRIER_ICV_PUSH
|
||||
KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
|
||||
"go(%p): %u => %u\n", gtid, team->t.t_id, tid,
|
||||
|
@ -414,7 +416,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
|||
void (*reduce)(void *, void *)
|
||||
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_hyper_gather);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
|
||||
register kmp_team_t *team = this_thr->th.th_team;
|
||||
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
|
||||
register kmp_info_t **other_threads = team->t.t_threads;
|
||||
|
@ -520,7 +522,7 @@ __kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid
|
|||
int propagate_icvs
|
||||
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_hyper_release);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
|
||||
register kmp_team_t *team;
|
||||
register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
|
||||
register kmp_info_t **other_threads;
|
||||
|
@ -725,7 +727,7 @@ __kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr,
|
|||
int gtid, int tid, void (*reduce) (void *, void *)
|
||||
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_hier_gather);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
|
||||
register kmp_team_t *team = this_thr->th.th_team;
|
||||
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
|
||||
register kmp_uint32 nproc = this_thr->th.th_team_nproc;
|
||||
|
@ -853,7 +855,7 @@ __kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, i
|
|||
int propagate_icvs
|
||||
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_hier_release);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
|
||||
register kmp_team_t *team;
|
||||
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
|
||||
register kmp_uint32 nproc;
|
||||
|
@ -1035,7 +1037,7 @@ int
|
|||
__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
|
||||
void *reduce_data, void (*reduce)(void *, void *))
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_barrier);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
|
||||
register int tid = __kmp_tid_from_gtid(gtid);
|
||||
register kmp_info_t *this_thr = __kmp_threads[gtid];
|
||||
register kmp_team_t *team = this_thr->th.th_team;
|
||||
|
@ -1294,7 +1296,7 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
|
|||
void
|
||||
__kmp_end_split_barrier(enum barrier_type bt, int gtid)
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_end_split_barrier);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
|
||||
int tid = __kmp_tid_from_gtid(gtid);
|
||||
kmp_info_t *this_thr = __kmp_threads[gtid];
|
||||
kmp_team_t *team = this_thr->th.th_team;
|
||||
|
@ -1335,7 +1337,7 @@ __kmp_end_split_barrier(enum barrier_type bt, int gtid)
|
|||
void
|
||||
__kmp_join_barrier(int gtid)
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_join_barrier);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
|
||||
register kmp_info_t *this_thr = __kmp_threads[gtid];
|
||||
register kmp_team_t *team;
|
||||
register kmp_uint nproc;
|
||||
|
@ -1533,7 +1535,7 @@ __kmp_join_barrier(int gtid)
|
|||
void
|
||||
__kmp_fork_barrier(int gtid, int tid)
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_fork_barrier);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
|
||||
kmp_info_t *this_thr = __kmp_threads[gtid];
|
||||
kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
|
||||
#if USE_ITT_BUILD
|
||||
|
@ -1648,15 +1650,16 @@ __kmp_fork_barrier(int gtid, int tid)
|
|||
this data before this function is called. We cannot modify __kmp_fork_call() to look at
|
||||
the fixed ICVs in the master's thread struct, because it is not always the case that the
|
||||
threads arrays have been allocated when __kmp_fork_call() is executed. */
|
||||
KMP_START_EXPLICIT_TIMER(USER_icv_copy);
|
||||
if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
|
||||
// Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
|
||||
KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
|
||||
__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
|
||||
copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
|
||||
&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
|
||||
{
|
||||
KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
|
||||
if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
|
||||
// Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
|
||||
KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
|
||||
__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
|
||||
copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
|
||||
&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
|
||||
}
|
||||
}
|
||||
KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
|
||||
#endif // KMP_BARRIER_ICV_PULL
|
||||
|
||||
if (__kmp_tasking_mode != tskm_immediate_exec) {
|
||||
|
@ -1702,7 +1705,7 @@ __kmp_fork_barrier(int gtid, int tid)
|
|||
void
|
||||
__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_setup_icv_copy);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
|
||||
|
||||
KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
|
||||
KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
|
||||
|
|
|
@ -58,7 +58,7 @@ kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
|
|||
break;
|
||||
}
|
||||
case cancel_taskgroup:
|
||||
// cancellation requests for parallel and worksharing constructs
|
||||
// cancellation requests for a task group
|
||||
// are handled through the taskgroup structure
|
||||
{
|
||||
kmp_taskdata_t* task;
|
||||
|
@ -141,7 +141,7 @@ kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 c
|
|||
break;
|
||||
}
|
||||
case cancel_taskgroup:
|
||||
// cancellation requests for parallel and worksharing constructs
|
||||
// cancellation requests for a task group
|
||||
// are handled through the taskgroup structure
|
||||
{
|
||||
kmp_taskdata_t* task;
|
||||
|
|
|
@ -280,9 +280,21 @@ Do the actual fork and call the microtask in the relevant number of threads.
|
|||
void
|
||||
__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
|
||||
{
|
||||
KMP_STOP_EXPLICIT_TIMER(OMP_serial);
|
||||
KMP_COUNT_BLOCK(OMP_PARALLEL);
|
||||
int gtid = __kmp_entry_gtid();
|
||||
|
||||
#if (KMP_STATS_ENABLED)
|
||||
int inParallel = __kmpc_in_parallel(loc);
|
||||
if (inParallel)
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
|
||||
}
|
||||
else
|
||||
{
|
||||
KMP_STOP_EXPLICIT_TIMER(OMP_serial);
|
||||
KMP_COUNT_BLOCK(OMP_PARALLEL);
|
||||
}
|
||||
#endif
|
||||
|
||||
// maybe to save thr_state is enough here
|
||||
{
|
||||
va_list ap;
|
||||
|
@ -329,7 +341,10 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
|
|||
}
|
||||
#endif
|
||||
}
|
||||
KMP_START_EXPLICIT_TIMER(OMP_serial);
|
||||
#if (KMP_STATS_ENABLED)
|
||||
if (!inParallel)
|
||||
KMP_START_EXPLICIT_TIMER(OMP_serial);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if OMP_40_ENABLED
|
||||
|
@ -370,6 +385,8 @@ __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
|
|||
va_list ap;
|
||||
va_start( ap, microtask );
|
||||
|
||||
KMP_COUNT_BLOCK(OMP_TEAMS);
|
||||
|
||||
// remember teams entry point and nesting level
|
||||
this_thr->th.th_teams_microtask = microtask;
|
||||
this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
|
||||
|
@ -715,8 +732,10 @@ __kmpc_master(ident_t *loc, kmp_int32 global_tid)
|
|||
if( ! TCR_4( __kmp_init_parallel ) )
|
||||
__kmp_parallel_initialize();
|
||||
|
||||
if( KMP_MASTER_GTID( global_tid ))
|
||||
if( KMP_MASTER_GTID( global_tid )) {
|
||||
KMP_START_EXPLICIT_TIMER(OMP_master);
|
||||
status = 1;
|
||||
}
|
||||
|
||||
#if OMPT_SUPPORT && OMPT_TRACE
|
||||
if (status) {
|
||||
|
@ -764,6 +783,7 @@ __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
|
|||
KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
|
||||
|
||||
KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
|
||||
KMP_STOP_EXPLICIT_TIMER(OMP_master);
|
||||
|
||||
#if OMPT_SUPPORT && OMPT_TRACE
|
||||
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
|
||||
|
@ -1386,6 +1406,9 @@ __kmpc_single(ident_t *loc, kmp_int32 global_tid)
|
|||
{
|
||||
KMP_COUNT_BLOCK(OMP_SINGLE);
|
||||
kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
|
||||
if(rc == TRUE) {
|
||||
KMP_START_EXPLICIT_TIMER(OMP_single);
|
||||
}
|
||||
|
||||
#if OMPT_SUPPORT && OMPT_TRACE
|
||||
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
|
||||
|
@ -1427,6 +1450,7 @@ void
|
|||
__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
|
||||
{
|
||||
__kmp_exit_single( global_tid );
|
||||
KMP_STOP_EXPLICIT_TIMER(OMP_single);
|
||||
|
||||
#if OMPT_SUPPORT && OMPT_TRACE
|
||||
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
|
||||
|
@ -2191,7 +2215,6 @@ int
|
|||
__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_test_lock);
|
||||
KMP_TIME_BLOCK(OMP_test_lock);
|
||||
|
||||
#if KMP_USE_DYNAMIC_LOCK
|
||||
int rc;
|
||||
|
|
|
@ -670,6 +670,7 @@ __kmp_dispatch_init(
|
|||
} else {
|
||||
pr->ordered = FALSE;
|
||||
}
|
||||
|
||||
if ( schedule == kmp_sch_static ) {
|
||||
schedule = __kmp_static;
|
||||
} else {
|
||||
|
@ -761,6 +762,19 @@ __kmp_dispatch_init(
|
|||
tc = 0; // zero-trip
|
||||
}
|
||||
|
||||
// Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing
|
||||
// when statistics are disabled.
|
||||
if (schedule == __kmp_static)
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_FOR_static);
|
||||
KMP_COUNT_VALUE(FOR_static_iterations, tc);
|
||||
}
|
||||
else
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_FOR_dynamic);
|
||||
KMP_COUNT_VALUE(FOR_dynamic_iterations, tc);
|
||||
}
|
||||
|
||||
pr->u.p.lb = lb;
|
||||
pr->u.p.ub = ub;
|
||||
pr->u.p.st = st;
|
||||
|
@ -1384,6 +1398,11 @@ __kmp_dispatch_next(
|
|||
static const int ___kmp_size_type = sizeof( UT );
|
||||
#endif
|
||||
|
||||
// This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule
|
||||
// is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs
|
||||
// more than a compile time choice to use static scheduling would.)
|
||||
KMP_TIME_BLOCK(FOR_dynamic_scheduling);
|
||||
|
||||
int status;
|
||||
dispatch_private_info_template< T > * pr;
|
||||
kmp_info_t * th = __kmp_threads[ gtid ];
|
||||
|
@ -2164,7 +2183,6 @@ __kmp_dist_get_bounds(
|
|||
T *pupper,
|
||||
typename traits_t< T >::signed_t incr
|
||||
) {
|
||||
KMP_COUNT_BLOCK(OMP_DISTR_FOR_dynamic);
|
||||
typedef typename traits_t< T >::unsigned_t UT;
|
||||
typedef typename traits_t< T >::signed_t ST;
|
||||
register kmp_uint32 team_id;
|
||||
|
@ -2222,6 +2240,7 @@ __kmp_dist_get_bounds(
|
|||
} else {
|
||||
trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
|
||||
}
|
||||
|
||||
if( trip_count <= nteams ) {
|
||||
KMP_DEBUG_ASSERT(
|
||||
__kmp_static == kmp_sch_static_greedy || \
|
||||
|
@ -2297,7 +2316,6 @@ void
|
|||
__kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
|
||||
kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_FOR_dynamic);
|
||||
KMP_DEBUG_ASSERT( __kmp_init_serial );
|
||||
__kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
|
||||
}
|
||||
|
@ -2308,7 +2326,6 @@ void
|
|||
__kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
|
||||
kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_FOR_dynamic);
|
||||
KMP_DEBUG_ASSERT( __kmp_init_serial );
|
||||
__kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
|
||||
}
|
||||
|
@ -2321,7 +2338,6 @@ __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
|
|||
kmp_int64 lb, kmp_int64 ub,
|
||||
kmp_int64 st, kmp_int64 chunk )
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_FOR_dynamic);
|
||||
KMP_DEBUG_ASSERT( __kmp_init_serial );
|
||||
__kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
|
||||
}
|
||||
|
@ -2334,7 +2350,6 @@ __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
|
|||
kmp_uint64 lb, kmp_uint64 ub,
|
||||
kmp_int64 st, kmp_int64 chunk )
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_FOR_dynamic);
|
||||
KMP_DEBUG_ASSERT( __kmp_init_serial );
|
||||
__kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
|
||||
}
|
||||
|
@ -2352,7 +2367,6 @@ void
|
|||
__kmpc_dist_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
|
||||
kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_FOR_dynamic);
|
||||
KMP_DEBUG_ASSERT( __kmp_init_serial );
|
||||
__kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st );
|
||||
__kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
|
||||
|
@ -2362,7 +2376,6 @@ void
|
|||
__kmpc_dist_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
|
||||
kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_FOR_dynamic);
|
||||
KMP_DEBUG_ASSERT( __kmp_init_serial );
|
||||
__kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st );
|
||||
__kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
|
||||
|
@ -2372,7 +2385,6 @@ void
|
|||
__kmpc_dist_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
|
||||
kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk )
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_FOR_dynamic);
|
||||
KMP_DEBUG_ASSERT( __kmp_init_serial );
|
||||
__kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st );
|
||||
__kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
|
||||
|
@ -2382,7 +2394,6 @@ void
|
|||
__kmpc_dist_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
|
||||
kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk )
|
||||
{
|
||||
KMP_COUNT_BLOCK(OMP_FOR_dynamic);
|
||||
KMP_DEBUG_ASSERT( __kmp_init_serial );
|
||||
__kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st );
|
||||
__kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
|
||||
|
|
|
@ -1495,7 +1495,8 @@ __kmp_fork_call(
|
|||
kmp_hot_team_ptr_t **p_hot_teams;
|
||||
#endif
|
||||
{ // KMP_TIME_BLOCK
|
||||
KMP_TIME_BLOCK(KMP_fork_call);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
|
||||
KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
|
||||
|
||||
KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
|
||||
if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
|
||||
|
@ -1620,12 +1621,14 @@ __kmp_fork_call(
|
|||
}
|
||||
#endif
|
||||
|
||||
KMP_TIME_BLOCK(OMP_work);
|
||||
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
|
||||
{
|
||||
KMP_TIME_BLOCK(OMP_work);
|
||||
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
|
||||
#if OMPT_SUPPORT
|
||||
, exit_runtime_p
|
||||
, exit_runtime_p
|
||||
#endif
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_status & ompt_status_track) {
|
||||
|
@ -2224,8 +2227,8 @@ __kmp_fork_call(
|
|||
} // END of timer KMP_fork_call block
|
||||
|
||||
{
|
||||
//KMP_TIME_BLOCK(OMP_work);
|
||||
KMP_TIME_BLOCK(USER_master_invoke);
|
||||
KMP_TIME_BLOCK(OMP_work);
|
||||
// KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
|
||||
if (! team->t.t_invoke( gtid )) {
|
||||
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
|
||||
}
|
||||
|
@ -2280,7 +2283,7 @@ __kmp_join_call(ident_t *loc, int gtid, enum fork_context_e fork_context
|
|||
#endif /* OMP_40_ENABLED */
|
||||
)
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_join_call);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
|
||||
kmp_team_t *team;
|
||||
kmp_team_t *parent_team;
|
||||
kmp_info_t *master_th;
|
||||
|
@ -2582,6 +2585,7 @@ __kmp_set_num_threads( int new_nth, int gtid )
|
|||
else if (new_nth > __kmp_max_nth)
|
||||
new_nth = __kmp_max_nth;
|
||||
|
||||
KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
|
||||
thread = __kmp_threads[gtid];
|
||||
|
||||
__kmp_save_internal_controls( thread );
|
||||
|
@ -4790,7 +4794,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
|
|||
kmp_internal_control_t *new_icvs,
|
||||
int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
|
||||
{
|
||||
KMP_TIME_BLOCK(KMP_allocate_team);
|
||||
KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
|
||||
int f;
|
||||
kmp_team_t *team;
|
||||
int use_hot_team = ! root->r.r_active;
|
||||
|
@ -5577,12 +5581,12 @@ __kmp_launch_thread( kmp_info_t *this_thr )
|
|||
}
|
||||
#endif
|
||||
|
||||
KMP_STOP_EXPLICIT_TIMER(USER_launch_thread_loop);
|
||||
KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
|
||||
{
|
||||
KMP_TIME_BLOCK(USER_worker_invoke);
|
||||
KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
|
||||
rc = (*pteam)->t.t_invoke( gtid );
|
||||
}
|
||||
KMP_START_EXPLICIT_TIMER(USER_launch_thread_loop);
|
||||
KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
|
||||
KMP_ASSERT( rc );
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
|
@ -6910,12 +6914,15 @@ __kmp_invoke_task_func( int gtid )
|
|||
#endif
|
||||
#endif
|
||||
|
||||
rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
|
||||
gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
|
||||
{
|
||||
KMP_TIME_BLOCK(OMP_work);
|
||||
rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
|
||||
gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
|
||||
#if OMPT_SUPPORT
|
||||
, exit_runtime_p
|
||||
, exit_runtime_p
|
||||
#endif
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
#if OMPT_SUPPORT && OMPT_TRACE
|
||||
if (ompt_status & ompt_status_track) {
|
||||
|
|
|
@ -84,6 +84,8 @@ __kmp_for_static_init(
|
|||
typename traits_t< T >::signed_t chunk
|
||||
) {
|
||||
KMP_COUNT_BLOCK(OMP_FOR_static);
|
||||
KMP_TIME_BLOCK (FOR_static_scheduling);
|
||||
|
||||
typedef typename traits_t< T >::unsigned_t UT;
|
||||
typedef typename traits_t< T >::signed_t ST;
|
||||
/* this all has to be changed back to TID and such.. */
|
||||
|
@ -151,6 +153,7 @@ __kmp_for_static_init(
|
|||
team_info->microtask);
|
||||
}
|
||||
#endif
|
||||
KMP_COUNT_VALUE (FOR_static_iterations, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -246,6 +249,7 @@ __kmp_for_static_init(
|
|||
__kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
|
||||
}
|
||||
}
|
||||
KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
|
||||
|
||||
/* compute remaining parameters */
|
||||
switch ( schedtype ) {
|
||||
|
@ -372,7 +376,7 @@ __kmp_dist_for_static_init(
|
|||
typename traits_t< T >::signed_t incr,
|
||||
typename traits_t< T >::signed_t chunk
|
||||
) {
|
||||
KMP_COUNT_BLOCK(OMP_DISTR_FOR_static);
|
||||
KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
|
||||
typedef typename traits_t< T >::unsigned_t UT;
|
||||
typedef typename traits_t< T >::signed_t ST;
|
||||
register kmp_uint32 tid;
|
||||
|
@ -437,6 +441,7 @@ __kmp_dist_for_static_init(
|
|||
} else {
|
||||
trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
|
||||
}
|
||||
|
||||
*pstride = *pupper - *plower; // just in case (can be unused)
|
||||
if( trip_count <= nteams ) {
|
||||
KMP_DEBUG_ASSERT(
|
||||
|
|
|
@ -521,16 +521,14 @@ void kmp_stats_output_module::outputStats(const char* heading)
|
|||
|
||||
// Special handling for synthesized statistics.
|
||||
// These just have to be coded specially here for now.
|
||||
// At present we only have one: the total parallel work done in each thread.
|
||||
// At present we only have a few:
|
||||
// The total parallel work done in each thread.
|
||||
// The variance here makes it easy to see load imbalance over the whole program (though, of course,
|
||||
// it's possible to have a code with awful load balance in every parallel region but perfect load
|
||||
// balance oever the whole program.)
|
||||
// The time spent in barriers in each thread.
|
||||
allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal());
|
||||
|
||||
// Time waiting for work (synthesized)
|
||||
if ((t != 0) || !timeStat::workerOnly(timer_e(TIMER_OMP_await_work)))
|
||||
allStats[TIMER_Total_await_work].addSample ((*it)->getTimer(TIMER_OMP_await_work)->getTotal());
|
||||
|
||||
// Time in explicit barriers.
|
||||
allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal());
|
||||
|
||||
|
|
|
@ -31,6 +31,11 @@
|
|||
#include <new> // placement new
|
||||
#include "kmp_stats_timing.h"
|
||||
|
||||
/*
|
||||
* Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and
|
||||
* are intended for the runtime library developer.
|
||||
*/
|
||||
// #define KMP_DEVELOPER_STATS 1
|
||||
|
||||
/*!
|
||||
* @ingroup STATS_GATHERING
|
||||
|
@ -56,7 +61,7 @@ class stats_flags_e {
|
|||
* Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread
|
||||
* as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement).
|
||||
* The min,mean,max are therefore the values for the threads.
|
||||
* Adding the counter here and then putting in a KMP_BLOCK_COUNTER(name) is all you need to do.
|
||||
* Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do.
|
||||
* All of the tables and printing is generated from this macro.
|
||||
* Format is "macro(name, flags, arg)"
|
||||
*
|
||||
|
@ -64,21 +69,30 @@ class stats_flags_e {
|
|||
*/
|
||||
#define KMP_FOREACH_COUNTER(macro, arg) \
|
||||
macro (OMP_PARALLEL, stats_flags_e::onlyInMaster, arg) \
|
||||
macro (OMP_NESTED_PARALLEL, 0, arg) \
|
||||
macro (OMP_FOR_static, 0, arg) \
|
||||
macro (OMP_FOR_dynamic, 0, arg) \
|
||||
macro (OMP_DISTR_FOR_static, 0, arg) \
|
||||
macro (OMP_DISTR_FOR_dynamic, 0, arg) \
|
||||
macro (OMP_DISTRIBUTE, 0, arg) \
|
||||
macro (OMP_BARRIER, 0, arg) \
|
||||
macro (OMP_CRITICAL,0, arg) \
|
||||
macro (OMP_SINGLE, 0, arg) \
|
||||
macro (OMP_MASTER, 0, arg) \
|
||||
macro (OMP_TEAMS, 0, arg) \
|
||||
macro (OMP_set_lock, 0, arg) \
|
||||
macro (OMP_test_lock, 0, arg) \
|
||||
macro (OMP_test_lock_failure, 0, arg) \
|
||||
macro (REDUCE_wait, 0, arg) \
|
||||
macro (REDUCE_nowait, 0, arg) \
|
||||
macro (OMP_TASKYIELD, 0, arg) \
|
||||
macro (TASK_executed, 0, arg) \
|
||||
macro (TASK_cancelled, 0, arg) \
|
||||
macro (TASK_stolen, 0, arg) \
|
||||
macro (LAST,0,arg)
|
||||
|
||||
// OMP_PARALLEL_args -- the number of arguments passed to a fork
|
||||
// FOR_static_iterations -- Number of available parallel chunks of work in a static for
|
||||
// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
|
||||
// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
|
||||
|
||||
/*!
|
||||
* \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
|
||||
*
|
||||
|
@ -87,72 +101,45 @@ class stats_flags_e {
|
|||
*
|
||||
* \details A timer collects multiple samples of some count in each thread and then finally aggregates over all the threads.
|
||||
* The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork"
|
||||
* as well, or we could collect "loop iteration count" if we wanted to).
|
||||
* as well).
|
||||
* For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level.
|
||||
* Format is "macro(name, flags, arg)"
|
||||
*
|
||||
* @ingroup STATS_GATHERING
|
||||
* @ingroup STATS_GATHERING2
|
||||
*/
|
||||
#define KMP_FOREACH_TIMER(macro, arg) \
|
||||
macro (OMP_PARALLEL_args, stats_flags_e::onlyInMaster | stats_flags_e::noUnits, arg) \
|
||||
macro (FOR_static_iterations, stats_flags_e::onlyInMaster | stats_flags_e::noUnits, arg) \
|
||||
macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \
|
||||
#define KMP_FOREACH_TIMER(macro, arg) \
|
||||
macro (OMP_start_end, stats_flags_e::onlyInMaster, arg) \
|
||||
macro (OMP_serial, stats_flags_e::onlyInMaster, arg) \
|
||||
macro (OMP_work, 0, arg) \
|
||||
macro (Total_work, stats_flags_e::synthesized, arg) \
|
||||
macro (OMP_await_work, stats_flags_e::notInMaster, arg) \
|
||||
macro (Total_await_work, stats_flags_e::synthesized, arg) \
|
||||
macro (OMP_barrier, 0, arg) \
|
||||
macro (Total_barrier, stats_flags_e::synthesized, arg) \
|
||||
macro (OMP_test_lock, 0, arg) \
|
||||
macro (FOR_static_iterations, stats_flags_e::noUnits, arg) \
|
||||
macro (FOR_static_scheduling, 0, arg) \
|
||||
macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \
|
||||
macro (FOR_dynamic_scheduling, 0, arg) \
|
||||
macro (KMP_fork_call, 0, arg) \
|
||||
macro (KMP_join_call, 0, arg) \
|
||||
macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \
|
||||
macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \
|
||||
macro (KMP_barrier, 0, arg) \
|
||||
macro (KMP_end_split_barrier, 0, arg) \
|
||||
macro (KMP_wait_sleep, 0, arg) \
|
||||
macro (KMP_release, 0, arg) \
|
||||
macro (KMP_hier_gather, 0, arg) \
|
||||
macro (KMP_hier_release, 0, arg) \
|
||||
macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \
|
||||
macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \
|
||||
macro (KMP_linear_gather, 0, arg) \
|
||||
macro (KMP_linear_release, 0, arg) \
|
||||
macro (KMP_tree_gather, 0, arg) \
|
||||
macro (KMP_tree_release, 0, arg) \
|
||||
macro (USER_master_invoke, stats_flags_e::logEvent, arg) \
|
||||
macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \
|
||||
macro (USER_resume, stats_flags_e::logEvent, arg) \
|
||||
macro (USER_suspend, stats_flags_e::logEvent, arg) \
|
||||
macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
|
||||
macro (KMP_allocate_team, 0, arg) \
|
||||
macro (KMP_setup_icv_copy, 0, arg) \
|
||||
macro (USER_icv_copy, 0, arg) \
|
||||
macro (TASK_execution, 0, arg) \
|
||||
macro (OMP_set_numthreads, stats_flags_e::noUnits, arg) \
|
||||
macro (OMP_PARALLEL_args, stats_flags_e::noUnits, arg) \
|
||||
macro (OMP_single, 0, arg) \
|
||||
macro (OMP_master, 0, arg) \
|
||||
KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
|
||||
macro (LAST,0, arg)
|
||||
|
||||
|
||||
|
||||
// OMP_PARALLEL_args -- the number of arguments passed to a fork
|
||||
// FOR_static_iterations -- Number of available parallel chunks of work in a static for
|
||||
// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
|
||||
// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
|
||||
// OMP_serial -- thread zero time executing serial code
|
||||
// OMP_start_end -- time from when OpenMP is initialized until the stats are printed at exit
|
||||
// OMP_serial -- thread zero time executing serial code
|
||||
// OMP_work -- elapsed time in code dispatched by a fork (measured in the thread)
|
||||
// Total_work -- a synthesized statistic summarizing how much parallel work each thread executed.
|
||||
// OMP_barrier -- time at "real" barriers
|
||||
// Total_barrier -- a synthesized statistic summarizing how much time at real barriers in each thread
|
||||
// OMP_set_lock -- time in lock setting
|
||||
// OMP_test_lock -- time in testing a lock
|
||||
// LOCK_WAIT -- time waiting for a lock
|
||||
// FOR_static_scheduling -- time spent doing scheduling for a static "for"
|
||||
// FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for"
|
||||
// KMP_wait_sleep -- time in __kmp_wait_sleep
|
||||
// KMP_release -- time in __kmp_release
|
||||
|
||||
#if (KMP_DEVELOPER_STATS)
|
||||
// Timers which are of interest tio runtime library developers, not end users.
|
||||
// THese have to be explicitly enabled in addition to the other stats.
|
||||
|
||||
// KMP_fork_barrier -- time in __kmp_fork_barrier
|
||||
// KMP_join_barrier -- time in __kmp_join_barrier
|
||||
// KMP_barrier -- time in __kmp_barrier
|
||||
|
@ -165,6 +152,32 @@ class stats_flags_e {
|
|||
// KMP_tree_release -- time in __kmp_tree_barrier_release
|
||||
// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
|
||||
// KMP_hyper_release -- time in __kmp_hyper_barrier_release
|
||||
# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
|
||||
macro (KMP_fork_call, 0, arg) \
|
||||
macro (KMP_join_call, 0, arg) \
|
||||
macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \
|
||||
macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \
|
||||
macro (KMP_barrier, 0, arg) \
|
||||
macro (KMP_end_split_barrier, 0, arg) \
|
||||
macro (KMP_hier_gather, 0, arg) \
|
||||
macro (KMP_hier_release, 0, arg) \
|
||||
macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \
|
||||
macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \
|
||||
macro (KMP_linear_gather, 0, arg) \
|
||||
macro (KMP_linear_release, 0, arg) \
|
||||
macro (KMP_tree_gather, 0, arg) \
|
||||
macro (KMP_tree_release, 0, arg) \
|
||||
macro (USER_master_invoke, stats_flags_e::logEvent, arg) \
|
||||
macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \
|
||||
macro (USER_resume, stats_flags_e::logEvent, arg) \
|
||||
macro (USER_suspend, stats_flags_e::logEvent, arg) \
|
||||
macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
|
||||
macro (KMP_allocate_team, 0, arg) \
|
||||
macro (KMP_setup_icv_copy, 0, arg) \
|
||||
macro (USER_icv_copy, 0, arg)
|
||||
#else
|
||||
# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
|
||||
|
@ -182,13 +195,21 @@ class stats_flags_e {
|
|||
*
|
||||
* @ingroup STATS_GATHERING
|
||||
*/
|
||||
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
|
||||
macro(OMP_serial, 0, arg) \
|
||||
macro(OMP_start_end, 0, arg) \
|
||||
macro(USER_icv_copy, 0, arg) \
|
||||
macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
|
||||
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
|
||||
macro(OMP_serial, 0, arg) \
|
||||
macro(OMP_start_end, 0, arg) \
|
||||
macro(OMP_single, 0, arg) \
|
||||
macro(OMP_master, 0, arg) \
|
||||
KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \
|
||||
macro(LAST, 0, arg)
|
||||
|
||||
#if (KMP_DEVELOPER_STATS)
|
||||
# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \
|
||||
macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg)
|
||||
#else
|
||||
# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)
|
||||
#endif
|
||||
|
||||
#define ENUMERATE(name,ignore,prefix) prefix##name,
|
||||
enum timer_e {
|
||||
KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
|
||||
|
@ -689,6 +710,21 @@ extern kmp_stats_output_module __kmp_stats_output;
|
|||
*/
|
||||
#define KMP_RESET_STATS() __kmp_reset_stats()
|
||||
|
||||
#if (KMP_DEVELOPER_STATS)
|
||||
# define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
|
||||
# define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v)
|
||||
# define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
|
||||
# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
|
||||
# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
|
||||
#else
|
||||
// Null definitions
|
||||
# define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
|
||||
# define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
|
||||
# define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
|
||||
# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
|
||||
# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
|
||||
#endif
|
||||
|
||||
#else // KMP_STATS_ENABLED
|
||||
|
||||
// Null definitions
|
||||
|
@ -701,6 +737,11 @@ extern kmp_stats_output_module __kmp_stats_output;
|
|||
#define KMP_OUTPUT_STATS(heading_string) ((void)0)
|
||||
#define KMP_RESET_STATS() ((void)0)
|
||||
|
||||
#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
|
||||
#define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
|
||||
#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
|
||||
#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
|
||||
#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
|
||||
#endif // KMP_STATS_ENABLED
|
||||
|
||||
#endif // KMP_STATS_H
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "kmp_i18n.h"
|
||||
#include "kmp_itt.h"
|
||||
#include "kmp_wait_release.h"
|
||||
#include "kmp_stats.h"
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
#include "ompt-specific.h"
|
||||
|
@ -1136,6 +1137,7 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_ta
|
|||
kmp_team_t * this_team = this_thr->th.th_team;
|
||||
kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
|
||||
if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
|
||||
KMP_COUNT_BLOCK(TASK_cancelled);
|
||||
// this task belongs to a task group and we need to cancel it
|
||||
discard = 1 /* true */;
|
||||
}
|
||||
|
@ -1146,6 +1148,8 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_ta
|
|||
// Thunks generated by gcc take a different argument list.
|
||||
//
|
||||
if (!discard) {
|
||||
KMP_COUNT_BLOCK(TASK_executed);
|
||||
KMP_TIME_BLOCK (TASK_execution);
|
||||
#endif // OMP_40_ENABLED
|
||||
#ifdef KMP_GOMP_COMPAT
|
||||
if (taskdata->td_flags.native) {
|
||||
|
@ -1356,6 +1360,8 @@ __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
|
|||
kmp_info_t * thread;
|
||||
int thread_finished = FALSE;
|
||||
|
||||
KMP_COUNT_BLOCK(OMP_TASKYIELD);
|
||||
|
||||
KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
|
||||
gtid, loc_ref, end_part) );
|
||||
|
||||
|
@ -1648,6 +1654,7 @@ __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team
|
|||
|
||||
__kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
|
||||
|
||||
KMP_COUNT_BLOCK(TASK_stolen);
|
||||
KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
|
||||
"ntasks=%d head=%u tail=%u\n",
|
||||
gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
|
||||
|
|
|
@ -1688,7 +1688,7 @@ __kmp_suspend_uninitialize_thread( kmp_info_t *th )
|
|||
template <class C>
|
||||
static inline void __kmp_suspend_template( int th_gtid, C *flag )
|
||||
{
|
||||
KMP_TIME_BLOCK(USER_suspend);
|
||||
KMP_TIME_DEVELOPER_BLOCK(USER_suspend);
|
||||
kmp_info_t *th = __kmp_threads[th_gtid];
|
||||
int status;
|
||||
typename C::flag_t old_spin;
|
||||
|
@ -1826,6 +1826,7 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
|||
template <class C>
|
||||
static inline void __kmp_resume_template( int target_gtid, C *flag )
|
||||
{
|
||||
KMP_TIME_DEVELOPER_BLOCK(USER_resume);
|
||||
kmp_info_t *th = __kmp_threads[target_gtid];
|
||||
int status;
|
||||
|
||||
|
@ -1900,7 +1901,6 @@ void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
|
|||
void
|
||||
__kmp_resume_monitor()
|
||||
{
|
||||
KMP_TIME_BLOCK(USER_resume);
|
||||
int status;
|
||||
#ifdef KMP_DEBUG
|
||||
int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
|
||||
|
|
Loading…
Reference in New Issue