forked from OSchip/llvm-project
[OpenMP] Add strict mode in num_tasks and grainsize
This patch adds new API __kmpc_taskloop_5 to accomadate strict modifier (introduced in OpenMP 5.1) in num_tasks and grainsize clause. Differential Revision: https://reviews.llvm.org/D92352
This commit is contained in:
parent
c3ff9939bf
commit
540007b427
|
@ -371,6 +371,7 @@ kmpc_set_defaults 224
|
|||
__kmpc_doacross_fini 264
|
||||
__kmpc_taskloop 266
|
||||
__kmpc_critical_with_hint 270
|
||||
__kmpc_taskloop_5 285
|
||||
%endif
|
||||
kmpc_aligned_malloc 265
|
||||
kmpc_set_disp_num_buffers 267
|
||||
|
|
|
@ -3783,6 +3783,12 @@ KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
|
|||
kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
|
||||
kmp_int32 sched, kmp_uint64 grainsize,
|
||||
void *task_dup);
|
||||
KMP_EXPORT void __kmpc_taskloop_5(ident_t *loc, kmp_int32 gtid,
|
||||
kmp_task_t *task, kmp_int32 if_val,
|
||||
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
|
||||
kmp_int32 nogroup, kmp_int32 sched,
|
||||
kmp_uint64 grainsize, kmp_int32 modifier,
|
||||
void *task_dup);
|
||||
KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
|
||||
KMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data);
|
||||
KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
|
||||
|
|
|
@ -4142,6 +4142,7 @@ public:
|
|||
// num_tasks Number of tasks to execute
|
||||
// grainsize Number of loop iterations per task
|
||||
// extras Number of chunks with grainsize+1 iterations
|
||||
// last_chunk Reduction of grainsize for last task
|
||||
// tc Iterations count
|
||||
// task_dup Tasks duplication routine
|
||||
// codeptr_ra Return address for OMPT events
|
||||
|
@ -4149,7 +4150,7 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
|
|||
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
|
||||
kmp_uint64 ub_glob, kmp_uint64 num_tasks,
|
||||
kmp_uint64 grainsize, kmp_uint64 extras,
|
||||
kmp_uint64 tc,
|
||||
kmp_int64 last_chunk, kmp_uint64 tc,
|
||||
#if OMPT_SUPPORT
|
||||
void *codeptr_ra,
|
||||
#endif
|
||||
|
@ -4167,13 +4168,14 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
|
|||
kmp_task_t *next_task;
|
||||
kmp_int32 lastpriv = 0;
|
||||
|
||||
KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
|
||||
KMP_DEBUG_ASSERT(
|
||||
tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
|
||||
KMP_DEBUG_ASSERT(num_tasks > extras);
|
||||
KMP_DEBUG_ASSERT(num_tasks > 0);
|
||||
KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
|
||||
"extras %lld, i=%lld,%lld(%d)%lld, dup %p\n",
|
||||
gtid, num_tasks, grainsize, extras, lower, upper, ub_glob, st,
|
||||
task_dup));
|
||||
"extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
|
||||
gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
|
||||
ub_glob, st, task_dup));
|
||||
|
||||
// Launch num_tasks tasks, assign grainsize iterations each task
|
||||
for (i = 0; i < num_tasks; ++i) {
|
||||
|
@ -4185,6 +4187,9 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
|
|||
--extras; // first extras iterations get bigger chunk (grainsize+1)
|
||||
}
|
||||
upper = lower + st * chunk_minus_1;
|
||||
if (upper > *ub) {
|
||||
upper = *ub;
|
||||
}
|
||||
if (i == num_tasks - 1) {
|
||||
// schedule the last task, set lastprivate flag if needed
|
||||
if (st == 1) { // most common case
|
||||
|
@ -4248,6 +4253,7 @@ typedef struct __taskloop_params {
|
|||
kmp_uint64 num_tasks;
|
||||
kmp_uint64 grainsize;
|
||||
kmp_uint64 extras;
|
||||
kmp_int64 last_chunk;
|
||||
kmp_uint64 tc;
|
||||
kmp_uint64 num_t_min;
|
||||
#if OMPT_SUPPORT
|
||||
|
@ -4257,7 +4263,8 @@ typedef struct __taskloop_params {
|
|||
|
||||
void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *,
|
||||
kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
|
||||
kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64,
|
||||
kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
|
||||
kmp_uint64,
|
||||
#if OMPT_SUPPORT
|
||||
void *,
|
||||
#endif
|
||||
|
@ -4277,6 +4284,7 @@ int __kmp_taskloop_task(int gtid, void *ptask) {
|
|||
kmp_uint64 num_tasks = p->num_tasks;
|
||||
kmp_uint64 grainsize = p->grainsize;
|
||||
kmp_uint64 extras = p->extras;
|
||||
kmp_int64 last_chunk = p->last_chunk;
|
||||
kmp_uint64 tc = p->tc;
|
||||
kmp_uint64 num_t_min = p->num_t_min;
|
||||
#if OMPT_SUPPORT
|
||||
|
@ -4285,22 +4293,23 @@ int __kmp_taskloop_task(int gtid, void *ptask) {
|
|||
#if KMP_DEBUG
|
||||
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
|
||||
KMP_DEBUG_ASSERT(task != NULL);
|
||||
KA_TRACE(20, ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
|
||||
" %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
|
||||
gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
|
||||
task_dup));
|
||||
KA_TRACE(20,
|
||||
("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
|
||||
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
|
||||
gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
|
||||
st, task_dup));
|
||||
#endif
|
||||
KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
|
||||
if (num_tasks > num_t_min)
|
||||
__kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
|
||||
grainsize, extras, tc, num_t_min,
|
||||
grainsize, extras, last_chunk, tc, num_t_min,
|
||||
#if OMPT_SUPPORT
|
||||
codeptr_ra,
|
||||
#endif
|
||||
task_dup);
|
||||
else
|
||||
__kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
|
||||
grainsize, extras, tc,
|
||||
grainsize, extras, last_chunk, tc,
|
||||
#if OMPT_SUPPORT
|
||||
codeptr_ra,
|
||||
#endif
|
||||
|
@ -4323,6 +4332,7 @@ int __kmp_taskloop_task(int gtid, void *ptask) {
|
|||
// num_tasks Number of tasks to execute
|
||||
// grainsize Number of loop iterations per task
|
||||
// extras Number of chunks with grainsize+1 iterations
|
||||
// last_chunk Reduction of grainsize for last task
|
||||
// tc Iterations count
|
||||
// num_t_min Threshold to launch tasks recursively
|
||||
// task_dup Tasks duplication routine
|
||||
|
@ -4331,7 +4341,8 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
|
|||
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
|
||||
kmp_uint64 ub_glob, kmp_uint64 num_tasks,
|
||||
kmp_uint64 grainsize, kmp_uint64 extras,
|
||||
kmp_uint64 tc, kmp_uint64 num_t_min,
|
||||
kmp_int64 last_chunk, kmp_uint64 tc,
|
||||
kmp_uint64 num_t_min,
|
||||
#if OMPT_SUPPORT
|
||||
void *codeptr_ra,
|
||||
#endif
|
||||
|
@ -4339,10 +4350,11 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
|
|||
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
|
||||
KMP_DEBUG_ASSERT(task != NULL);
|
||||
KMP_DEBUG_ASSERT(num_tasks > num_t_min);
|
||||
KA_TRACE(20, ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
|
||||
" %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
|
||||
gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
|
||||
task_dup));
|
||||
KA_TRACE(20,
|
||||
("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
|
||||
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
|
||||
gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
|
||||
st, task_dup));
|
||||
p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
|
||||
kmp_uint64 lower = *lb;
|
||||
kmp_info_t *thread = __kmp_threads[gtid];
|
||||
|
@ -4353,16 +4365,23 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
|
|||
size_t upper_offset =
|
||||
(char *)ub - (char *)task; // remember offset of ub in the task structure
|
||||
|
||||
KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
|
||||
KMP_DEBUG_ASSERT(
|
||||
tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
|
||||
KMP_DEBUG_ASSERT(num_tasks > extras);
|
||||
KMP_DEBUG_ASSERT(num_tasks > 0);
|
||||
|
||||
// split the loop in two halves
|
||||
kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
|
||||
kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
|
||||
kmp_uint64 gr_size0 = grainsize;
|
||||
kmp_uint64 n_tsk0 = num_tasks >> 1; // num_tasks/2 to execute
|
||||
kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task
|
||||
if (n_tsk0 <= extras) {
|
||||
if (last_chunk < 0) {
|
||||
ext0 = ext1 = 0;
|
||||
last_chunk1 = last_chunk;
|
||||
tc0 = grainsize * n_tsk0;
|
||||
tc1 = tc - tc0;
|
||||
} else if (n_tsk0 <= extras) {
|
||||
gr_size0++; // integrate extras into grainsize
|
||||
ext0 = 0; // no extra iters in 1st half
|
||||
ext1 = extras - n_tsk0; // remaining extras
|
||||
|
@ -4404,6 +4423,7 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
|
|||
p->num_tasks = n_tsk1;
|
||||
p->grainsize = grainsize;
|
||||
p->extras = ext1;
|
||||
p->last_chunk = last_chunk1;
|
||||
p->tc = tc1;
|
||||
p->num_t_min = num_t_min;
|
||||
#if OMPT_SUPPORT
|
||||
|
@ -4420,44 +4440,28 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
|
|||
// execute the 1st half of current subrange
|
||||
if (n_tsk0 > num_t_min)
|
||||
__kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
|
||||
ext0, tc0, num_t_min,
|
||||
ext0, last_chunk0, tc0, num_t_min,
|
||||
#if OMPT_SUPPORT
|
||||
codeptr_ra,
|
||||
#endif
|
||||
task_dup);
|
||||
else
|
||||
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
|
||||
gr_size0, ext0, tc0,
|
||||
gr_size0, ext0, last_chunk0, tc0,
|
||||
#if OMPT_SUPPORT
|
||||
codeptr_ra,
|
||||
#endif
|
||||
task_dup);
|
||||
|
||||
KA_TRACE(40, ("__kmpc_taskloop_recur(exit): T#%d\n", gtid));
|
||||
KA_TRACE(40, ("__kmp_taskloop_recur(exit): T#%d\n", gtid));
|
||||
}
|
||||
|
||||
/*!
|
||||
@ingroup TASKING
|
||||
@param loc Source location information
|
||||
@param gtid Global thread ID
|
||||
@param task Task structure
|
||||
@param if_val Value of the if clause
|
||||
@param lb Pointer to loop lower bound in task structure
|
||||
@param ub Pointer to loop upper bound in task structure
|
||||
@param st Loop stride
|
||||
@param nogroup Flag, 1 if no taskgroup needs to be added, 0 otherwise
|
||||
@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
|
||||
@param grainsize Schedule value if specified
|
||||
@param task_dup Tasks duplication routine
|
||||
|
||||
Execute the taskloop construct.
|
||||
*/
|
||||
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
||||
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
|
||||
int sched, kmp_uint64 grainsize, void *task_dup) {
|
||||
static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
||||
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
|
||||
int nogroup, int sched, kmp_uint64 grainsize,
|
||||
int modifier, void *task_dup) {
|
||||
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
|
||||
KMP_DEBUG_ASSERT(task != NULL);
|
||||
__kmp_assert_valid_gtid(gtid);
|
||||
if (nogroup == 0) {
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
OMPT_STORE_RETURN_ADDRESS(gtid);
|
||||
|
@ -4474,13 +4478,16 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
|||
kmp_uint64 upper = task_bounds.get_ub();
|
||||
kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag
|
||||
kmp_uint64 num_tasks = 0, extras = 0;
|
||||
kmp_int64 last_chunk =
|
||||
0; // reduce grainsize of last task by last_chunk in strict mode
|
||||
kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
|
||||
kmp_info_t *thread = __kmp_threads[gtid];
|
||||
kmp_taskdata_t *current_task = thread->th.th_current_task;
|
||||
|
||||
KA_TRACE(20, ("__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
|
||||
"grain %llu(%d), dup %p\n",
|
||||
gtid, taskdata, lower, upper, st, grainsize, sched, task_dup));
|
||||
KA_TRACE(20, ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
|
||||
"grain %llu(%d, %d), dup %p\n",
|
||||
gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
|
||||
task_dup));
|
||||
|
||||
// compute trip count
|
||||
if (st == 1) { // most common case
|
||||
|
@ -4491,7 +4498,7 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
|||
tc = (upper - lower) / st + 1;
|
||||
}
|
||||
if (tc == 0) {
|
||||
KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
|
||||
KA_TRACE(20, ("__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
|
||||
// free the pattern task and exit
|
||||
__kmp_task_start(gtid, task, current_task);
|
||||
// do not execute anything for zero-trip loop
|
||||
|
@ -4533,20 +4540,28 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
|||
break;
|
||||
case 1: // grainsize provided
|
||||
if (grainsize > tc) {
|
||||
num_tasks = 1; // too big grainsize requested, adjust values
|
||||
grainsize = tc;
|
||||
num_tasks = 1;
|
||||
grainsize = tc; // too big grainsize requested, adjust values
|
||||
extras = 0;
|
||||
} else {
|
||||
num_tasks = tc / grainsize;
|
||||
// adjust grainsize for balanced distribution of iterations
|
||||
grainsize = tc / num_tasks;
|
||||
extras = tc % num_tasks;
|
||||
if (modifier) {
|
||||
num_tasks = (tc + grainsize - 1) / grainsize;
|
||||
last_chunk = tc - (num_tasks * grainsize);
|
||||
extras = 0;
|
||||
} else {
|
||||
num_tasks = tc / grainsize;
|
||||
// adjust grainsize for balanced distribution of iterations
|
||||
grainsize = tc / num_tasks;
|
||||
extras = tc % num_tasks;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
KMP_ASSERT2(0, "unknown scheduling of taskloop");
|
||||
}
|
||||
KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
|
||||
|
||||
KMP_DEBUG_ASSERT(
|
||||
tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
|
||||
KMP_DEBUG_ASSERT(num_tasks > extras);
|
||||
KMP_DEBUG_ASSERT(num_tasks > 0);
|
||||
// =========================================================================
|
||||
|
@ -4558,7 +4573,7 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
|||
taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
|
||||
// always start serial tasks linearly
|
||||
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
|
||||
grainsize, extras, tc,
|
||||
grainsize, extras, last_chunk, tc,
|
||||
#if OMPT_SUPPORT
|
||||
OMPT_GET_RETURN_ADDRESS(0),
|
||||
#endif
|
||||
|
@ -4566,21 +4581,23 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
|||
// !taskdata->td_flags.native => currently force linear spawning of tasks
|
||||
// for GOMP_taskloop
|
||||
} else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
|
||||
KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
|
||||
"(%lld), grain %llu, extras %llu\n",
|
||||
gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
|
||||
KA_TRACE(20, ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
|
||||
"(%lld), grain %llu, extras %llu, last_chunk %lld\n",
|
||||
gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
|
||||
last_chunk));
|
||||
__kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
|
||||
grainsize, extras, tc, num_tasks_min,
|
||||
grainsize, extras, last_chunk, tc, num_tasks_min,
|
||||
#if OMPT_SUPPORT
|
||||
OMPT_GET_RETURN_ADDRESS(0),
|
||||
#endif
|
||||
task_dup);
|
||||
} else {
|
||||
KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
|
||||
"(%lld), grain %llu, extras %llu\n",
|
||||
gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
|
||||
KA_TRACE(20, ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
|
||||
"(%lld), grain %llu, extras %llu, last_chunk %lld\n",
|
||||
gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
|
||||
last_chunk));
|
||||
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
|
||||
grainsize, extras, tc,
|
||||
grainsize, extras, last_chunk, tc,
|
||||
#if OMPT_SUPPORT
|
||||
OMPT_GET_RETURN_ADDRESS(0),
|
||||
#endif
|
||||
|
@ -4601,5 +4618,59 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
|||
#endif
|
||||
__kmpc_end_taskgroup(loc, gtid);
|
||||
}
|
||||
KA_TRACE(20, ("__kmp_taskloop(exit): T#%d\n", gtid));
|
||||
}
|
||||
|
||||
/*!
|
||||
@ingroup TASKING
|
||||
@param loc Source location information
|
||||
@param gtid Global thread ID
|
||||
@param task Task structure
|
||||
@param if_val Value of the if clause
|
||||
@param lb Pointer to loop lower bound in task structure
|
||||
@param ub Pointer to loop upper bound in task structure
|
||||
@param st Loop stride
|
||||
@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
|
||||
@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
|
||||
@param grainsize Schedule value if specified
|
||||
@param task_dup Tasks duplication routine
|
||||
|
||||
Execute the taskloop construct.
|
||||
*/
|
||||
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
||||
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
|
||||
int sched, kmp_uint64 grainsize, void *task_dup) {
|
||||
__kmp_assert_valid_gtid(gtid);
|
||||
KA_TRACE(20, ("__kmpc_taskloop(enter): T#%d\n", gtid));
|
||||
__kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
|
||||
0, task_dup);
|
||||
KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid));
|
||||
}
|
||||
|
||||
/*!
|
||||
@ingroup TASKING
|
||||
@param loc Source location information
|
||||
@param gtid Global thread ID
|
||||
@param task Task structure
|
||||
@param if_val Value of the if clause
|
||||
@param lb Pointer to loop lower bound in task structure
|
||||
@param ub Pointer to loop upper bound in task structure
|
||||
@param st Loop stride
|
||||
@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
|
||||
@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
|
||||
@param grainsize Schedule value if specified
|
||||
@param modifer Modifier 'strict' for sched, 1 if present, 0 otherwise
|
||||
@param task_dup Tasks duplication routine
|
||||
|
||||
Execute the taskloop construct.
|
||||
*/
|
||||
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
||||
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
|
||||
int nogroup, int sched, kmp_uint64 grainsize,
|
||||
int modifier, void *task_dup) {
|
||||
__kmp_assert_valid_gtid(gtid);
|
||||
KA_TRACE(20, ("__kmpc_taskloop_5(enter): T#%d\n", gtid));
|
||||
__kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
|
||||
modifier, task_dup);
|
||||
KA_TRACE(20, ("__kmpc_taskloop_5(exit): T#%d\n", gtid));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
// RUN: %libomp-compile-and-run
|
||||
// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
|
||||
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
#include "omp_my_sleep.h"
|
||||
|
||||
#define N 4
|
||||
#define ST 3
|
||||
#define UB 118
|
||||
#define LB 0
|
||||
|
||||
// globals
|
||||
int counter;
|
||||
int task_count;
|
||||
|
||||
// Compiler-generated code (emulation)
|
||||
typedef struct ident {
|
||||
void* dummy;
|
||||
} ident_t;
|
||||
|
||||
typedef struct shar {
|
||||
int *pcounter;
|
||||
int *pj;
|
||||
int *ptask_count;
|
||||
} *pshareds;
|
||||
|
||||
typedef struct task {
|
||||
pshareds shareds;
|
||||
int(* routine)(int,struct task*);
|
||||
int part_id;
|
||||
unsigned long long lb; // library always uses ULONG
|
||||
unsigned long long ub;
|
||||
int st;
|
||||
int last;
|
||||
int i;
|
||||
int j;
|
||||
int th;
|
||||
} *ptask, kmp_task_t;
|
||||
|
||||
typedef int(* task_entry_t)( int, ptask );
|
||||
|
||||
void
|
||||
__task_dup_entry(ptask task_dst, ptask task_src, int lastpriv)
|
||||
{
|
||||
// setup lastprivate flag
|
||||
task_dst->last = lastpriv;
|
||||
// could be constructor calls here...
|
||||
}
|
||||
|
||||
// OpenMP RTL interfaces
|
||||
typedef unsigned long long kmp_uint64;
|
||||
typedef long long kmp_int64;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void
|
||||
__kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
|
||||
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
|
||||
int nogroup, int sched, kmp_int64 grainsize, int modifier,
|
||||
void *task_dup);
|
||||
ptask
|
||||
__kmpc_omp_task_alloc(ident_t *loc, int gtid, int flags,
|
||||
size_t sizeof_kmp_task_t, size_t sizeof_shareds,
|
||||
task_entry_t task_entry);
|
||||
void __kmpc_atomic_fixed4_add(void *id_ref, int gtid, int * lhs, int rhs);
|
||||
int __kmpc_global_thread_num(void *id_ref);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
// User's code
|
||||
int task_entry(int gtid, ptask task)
|
||||
{
|
||||
pshareds pshar = task->shareds;
|
||||
__kmpc_atomic_fixed4_add(NULL, gtid, pshar->ptask_count, 1);
|
||||
|
||||
for (task->i = task->lb; task->i <= (int)task->ub; task->i += task->st) {
|
||||
task->th = omp_get_thread_num();
|
||||
__kmpc_atomic_fixed4_add(NULL,gtid,pshar->pcounter,1);
|
||||
task->j = task->i;
|
||||
}
|
||||
my_sleep( 0.1 ); // sleep 100 ms in order to allow other threads to steal tasks
|
||||
if (task->last) {
|
||||
*(pshar->pj) = task->j; // lastprivate
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void task_loop(int sched_type, int sched_val, int modifier)
|
||||
{
|
||||
int i, j, gtid = __kmpc_global_thread_num(NULL);
|
||||
ptask task;
|
||||
pshareds psh;
|
||||
omp_set_dynamic(0);
|
||||
counter = 0;
|
||||
task_count = 0;
|
||||
#pragma omp parallel num_threads(N)
|
||||
{
|
||||
#pragma omp master
|
||||
{
|
||||
int gtid = __kmpc_global_thread_num(NULL);
|
||||
task = __kmpc_omp_task_alloc(NULL, gtid, 1, sizeof(struct task),
|
||||
sizeof(struct shar), &task_entry);
|
||||
psh = task->shareds;
|
||||
psh->pcounter = &counter;
|
||||
psh->ptask_count = &task_count;
|
||||
psh->pj = &j;
|
||||
task->lb = LB;
|
||||
task->ub = UB;
|
||||
task->st = ST;
|
||||
|
||||
__kmpc_taskloop_5(
|
||||
NULL, // location
|
||||
gtid, // gtid
|
||||
task, // task structure
|
||||
1, // if clause value
|
||||
&task->lb, // lower bound
|
||||
&task->ub, // upper bound
|
||||
ST, // loop increment
|
||||
0, // 1 if nogroup specified
|
||||
sched_type, // schedule type: 0-none, 1-grainsize, 2-num_tasks
|
||||
sched_val, // schedule value (ignored for type 0)
|
||||
modifier, // strict modifier
|
||||
(void*)&__task_dup_entry // tasks duplication routine
|
||||
);
|
||||
} // end master
|
||||
} // end parallel
|
||||
// check results
|
||||
int tc;
|
||||
if (ST == 1) { // most common case
|
||||
tc = UB - LB + 1;
|
||||
} else if (ST < 0) {
|
||||
tc = (LB - UB) / (-ST) + 1;
|
||||
} else { // ST > 0
|
||||
tc = (UB - LB) / ST + 1;
|
||||
}
|
||||
int count;
|
||||
if (sched_type == 1) {
|
||||
count = (sched_val > tc) ? 1 : (tc + sched_val - 1) / sched_val;
|
||||
} else {
|
||||
count = (sched_val > tc) ? tc : sched_val;
|
||||
}
|
||||
if (j != LB + (tc - 1) * ST) {
|
||||
printf("Error in lastprivate, %d != %d\n", j, LB + (tc - 1) * ST);
|
||||
exit(1);
|
||||
}
|
||||
if (counter != tc) {
|
||||
printf("Error, counter %d != %d\n", counter, tc);
|
||||
exit(1);
|
||||
}
|
||||
if (task_count != count) {
|
||||
printf("Error, task count %d != %d\n", task_count, count);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
task_loop(1, 6, 1); // create 7 tasks
|
||||
task_loop(2, 6, 1); // create 6 tasks
|
||||
task_loop(1, 50, 1); // create 1 task
|
||||
task_loop(2, 50, 1); // create 40 tasks
|
||||
|
||||
printf("Test passed\n");
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue