[OpenMP] Add strict mode in num_tasks and grainsize

This patch adds new API __kmpc_taskloop_5 to accomadate strict
modifier (introduced in OpenMP 5.1) in num_tasks and grainsize
clause.

Differential Revision: https://reviews.llvm.org/D92352
This commit is contained in:
Nawrin Sultana 2020-11-30 17:41:53 -06:00
parent c3ff9939bf
commit 540007b427
4 changed files with 307 additions and 62 deletions

View File

@ -371,6 +371,7 @@ kmpc_set_defaults 224
__kmpc_doacross_fini 264
__kmpc_taskloop 266
__kmpc_critical_with_hint 270
__kmpc_taskloop_5 285
%endif
kmpc_aligned_malloc 265
kmpc_set_disp_num_buffers 267

View File

@ -3783,6 +3783,12 @@ KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
kmp_int32 sched, kmp_uint64 grainsize,
void *task_dup);
KMP_EXPORT void __kmpc_taskloop_5(ident_t *loc, kmp_int32 gtid,
kmp_task_t *task, kmp_int32 if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
kmp_int32 nogroup, kmp_int32 sched,
kmp_uint64 grainsize, kmp_int32 modifier,
void *task_dup);
KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
KMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data);
KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);

View File

@ -4142,6 +4142,7 @@ public:
// num_tasks Number of tasks to execute
// grainsize Number of loop iterations per task
// extras Number of chunks with grainsize+1 iterations
// last_chunk Reduction of grainsize for last task
// tc Iterations count
// task_dup Tasks duplication routine
// codeptr_ra Return address for OMPT events
@ -4149,7 +4150,7 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
kmp_uint64 ub_glob, kmp_uint64 num_tasks,
kmp_uint64 grainsize, kmp_uint64 extras,
kmp_uint64 tc,
kmp_int64 last_chunk, kmp_uint64 tc,
#if OMPT_SUPPORT
void *codeptr_ra,
#endif
@ -4167,13 +4168,14 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
kmp_task_t *next_task;
kmp_int32 lastpriv = 0;
KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
KMP_DEBUG_ASSERT(
tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
KMP_DEBUG_ASSERT(num_tasks > extras);
KMP_DEBUG_ASSERT(num_tasks > 0);
KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
"extras %lld, i=%lld,%lld(%d)%lld, dup %p\n",
gtid, num_tasks, grainsize, extras, lower, upper, ub_glob, st,
task_dup));
"extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
ub_glob, st, task_dup));
// Launch num_tasks tasks, assign grainsize iterations each task
for (i = 0; i < num_tasks; ++i) {
@ -4185,6 +4187,9 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
--extras; // first extras iterations get bigger chunk (grainsize+1)
}
upper = lower + st * chunk_minus_1;
if (upper > *ub) {
upper = *ub;
}
if (i == num_tasks - 1) {
// schedule the last task, set lastprivate flag if needed
if (st == 1) { // most common case
@ -4248,6 +4253,7 @@ typedef struct __taskloop_params {
kmp_uint64 num_tasks;
kmp_uint64 grainsize;
kmp_uint64 extras;
kmp_int64 last_chunk;
kmp_uint64 tc;
kmp_uint64 num_t_min;
#if OMPT_SUPPORT
@ -4257,7 +4263,8 @@ typedef struct __taskloop_params {
void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *,
kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64,
kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
kmp_uint64,
#if OMPT_SUPPORT
void *,
#endif
@ -4277,6 +4284,7 @@ int __kmp_taskloop_task(int gtid, void *ptask) {
kmp_uint64 num_tasks = p->num_tasks;
kmp_uint64 grainsize = p->grainsize;
kmp_uint64 extras = p->extras;
kmp_int64 last_chunk = p->last_chunk;
kmp_uint64 tc = p->tc;
kmp_uint64 num_t_min = p->num_t_min;
#if OMPT_SUPPORT
@ -4285,22 +4293,23 @@ int __kmp_taskloop_task(int gtid, void *ptask) {
#if KMP_DEBUG
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
KA_TRACE(20, ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
task_dup));
KA_TRACE(20,
("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
st, task_dup));
#endif
KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
if (num_tasks > num_t_min)
__kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, tc, num_t_min,
grainsize, extras, last_chunk, tc, num_t_min,
#if OMPT_SUPPORT
codeptr_ra,
#endif
task_dup);
else
__kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, tc,
grainsize, extras, last_chunk, tc,
#if OMPT_SUPPORT
codeptr_ra,
#endif
@ -4323,6 +4332,7 @@ int __kmp_taskloop_task(int gtid, void *ptask) {
// num_tasks Number of tasks to execute
// grainsize Number of loop iterations per task
// extras Number of chunks with grainsize+1 iterations
// last_chunk Reduction of grainsize for last task
// tc Iterations count
// num_t_min Threshold to launch tasks recursively
// task_dup Tasks duplication routine
@ -4331,7 +4341,8 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
kmp_uint64 ub_glob, kmp_uint64 num_tasks,
kmp_uint64 grainsize, kmp_uint64 extras,
kmp_uint64 tc, kmp_uint64 num_t_min,
kmp_int64 last_chunk, kmp_uint64 tc,
kmp_uint64 num_t_min,
#if OMPT_SUPPORT
void *codeptr_ra,
#endif
@ -4339,10 +4350,11 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
KMP_DEBUG_ASSERT(num_tasks > num_t_min);
KA_TRACE(20, ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
task_dup));
KA_TRACE(20,
("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
st, task_dup));
p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
kmp_uint64 lower = *lb;
kmp_info_t *thread = __kmp_threads[gtid];
@ -4353,16 +4365,23 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
size_t upper_offset =
(char *)ub - (char *)task; // remember offset of ub in the task structure
KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
KMP_DEBUG_ASSERT(
tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
KMP_DEBUG_ASSERT(num_tasks > extras);
KMP_DEBUG_ASSERT(num_tasks > 0);
// split the loop in two halves
kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
kmp_uint64 gr_size0 = grainsize;
kmp_uint64 n_tsk0 = num_tasks >> 1; // num_tasks/2 to execute
kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task
if (n_tsk0 <= extras) {
if (last_chunk < 0) {
ext0 = ext1 = 0;
last_chunk1 = last_chunk;
tc0 = grainsize * n_tsk0;
tc1 = tc - tc0;
} else if (n_tsk0 <= extras) {
gr_size0++; // integrate extras into grainsize
ext0 = 0; // no extra iters in 1st half
ext1 = extras - n_tsk0; // remaining extras
@ -4404,6 +4423,7 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
p->num_tasks = n_tsk1;
p->grainsize = grainsize;
p->extras = ext1;
p->last_chunk = last_chunk1;
p->tc = tc1;
p->num_t_min = num_t_min;
#if OMPT_SUPPORT
@ -4420,44 +4440,28 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
// execute the 1st half of current subrange
if (n_tsk0 > num_t_min)
__kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
ext0, tc0, num_t_min,
ext0, last_chunk0, tc0, num_t_min,
#if OMPT_SUPPORT
codeptr_ra,
#endif
task_dup);
else
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
gr_size0, ext0, tc0,
gr_size0, ext0, last_chunk0, tc0,
#if OMPT_SUPPORT
codeptr_ra,
#endif
task_dup);
KA_TRACE(40, ("__kmpc_taskloop_recur(exit): T#%d\n", gtid));
KA_TRACE(40, ("__kmp_taskloop_recur(exit): T#%d\n", gtid));
}
/*!
@ingroup TASKING
@param loc Source location information
@param gtid Global thread ID
@param task Task structure
@param if_val Value of the if clause
@param lb Pointer to loop lower bound in task structure
@param ub Pointer to loop upper bound in task structure
@param st Loop stride
@param nogroup Flag, 1 if no taskgroup needs to be added, 0 otherwise
@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
@param grainsize Schedule value if specified
@param task_dup Tasks duplication routine
Execute the taskloop construct.
*/
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup) {
static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup, int sched, kmp_uint64 grainsize,
int modifier, void *task_dup) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
__kmp_assert_valid_gtid(gtid);
if (nogroup == 0) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
@ -4474,13 +4478,16 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 upper = task_bounds.get_ub();
kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag
kmp_uint64 num_tasks = 0, extras = 0;
kmp_int64 last_chunk =
0; // reduce grainsize of last task by last_chunk in strict mode
kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *current_task = thread->th.th_current_task;
KA_TRACE(20, ("__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
"grain %llu(%d), dup %p\n",
gtid, taskdata, lower, upper, st, grainsize, sched, task_dup));
KA_TRACE(20, ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
"grain %llu(%d, %d), dup %p\n",
gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
task_dup));
// compute trip count
if (st == 1) { // most common case
@ -4491,7 +4498,7 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
tc = (upper - lower) / st + 1;
}
if (tc == 0) {
KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
KA_TRACE(20, ("__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
// free the pattern task and exit
__kmp_task_start(gtid, task, current_task);
// do not execute anything for zero-trip loop
@ -4533,20 +4540,28 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
break;
case 1: // grainsize provided
if (grainsize > tc) {
num_tasks = 1; // too big grainsize requested, adjust values
grainsize = tc;
num_tasks = 1;
grainsize = tc; // too big grainsize requested, adjust values
extras = 0;
} else {
num_tasks = tc / grainsize;
// adjust grainsize for balanced distribution of iterations
grainsize = tc / num_tasks;
extras = tc % num_tasks;
if (modifier) {
num_tasks = (tc + grainsize - 1) / grainsize;
last_chunk = tc - (num_tasks * grainsize);
extras = 0;
} else {
num_tasks = tc / grainsize;
// adjust grainsize for balanced distribution of iterations
grainsize = tc / num_tasks;
extras = tc % num_tasks;
}
}
break;
default:
KMP_ASSERT2(0, "unknown scheduling of taskloop");
}
KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
KMP_DEBUG_ASSERT(
tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
KMP_DEBUG_ASSERT(num_tasks > extras);
KMP_DEBUG_ASSERT(num_tasks > 0);
// =========================================================================
@ -4558,7 +4573,7 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
// always start serial tasks linearly
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, tc,
grainsize, extras, last_chunk, tc,
#if OMPT_SUPPORT
OMPT_GET_RETURN_ADDRESS(0),
#endif
@ -4566,21 +4581,23 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
// !taskdata->td_flags.native => currently force linear spawning of tasks
// for GOMP_taskloop
} else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
KA_TRACE(20, ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
last_chunk));
__kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, tc, num_tasks_min,
grainsize, extras, last_chunk, tc, num_tasks_min,
#if OMPT_SUPPORT
OMPT_GET_RETURN_ADDRESS(0),
#endif
task_dup);
} else {
KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
KA_TRACE(20, ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
last_chunk));
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
grainsize, extras, tc,
grainsize, extras, last_chunk, tc,
#if OMPT_SUPPORT
OMPT_GET_RETURN_ADDRESS(0),
#endif
@ -4601,5 +4618,59 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
#endif
__kmpc_end_taskgroup(loc, gtid);
}
KA_TRACE(20, ("__kmp_taskloop(exit): T#%d\n", gtid));
}
/*!
@ingroup TASKING
@param loc Source location information
@param gtid Global thread ID
@param task Task structure
@param if_val Value of the if clause
@param lb Pointer to loop lower bound in task structure
@param ub Pointer to loop upper bound in task structure
@param st Loop stride
@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
@param grainsize Schedule value if specified
@param task_dup Tasks duplication routine
Execute the taskloop construct.
*/
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
int sched, kmp_uint64 grainsize, void *task_dup) {
__kmp_assert_valid_gtid(gtid);
KA_TRACE(20, ("__kmpc_taskloop(enter): T#%d\n", gtid));
__kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
0, task_dup);
KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid));
}
/*!
@ingroup TASKING
@param loc Source location information
@param gtid Global thread ID
@param task Task structure
@param if_val Value of the if clause
@param lb Pointer to loop lower bound in task structure
@param ub Pointer to loop upper bound in task structure
@param st Loop stride
@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
@param grainsize Schedule value if specified
@param modifer Modifier 'strict' for sched, 1 if present, 0 otherwise
@param task_dup Tasks duplication routine
Execute the taskloop construct.
*/
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup, int sched, kmp_uint64 grainsize,
int modifier, void *task_dup) {
__kmp_assert_valid_gtid(gtid);
KA_TRACE(20, ("__kmpc_taskloop_5(enter): T#%d\n", gtid));
__kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
modifier, task_dup);
KA_TRACE(20, ("__kmpc_taskloop_5(exit): T#%d\n", gtid));
}

View File

@ -0,0 +1,167 @@
// RUN: %libomp-compile-and-run
// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
#include <stdio.h>
#include <omp.h>
#include "omp_my_sleep.h"
#define N 4
#define ST 3
#define UB 118
#define LB 0
// globals
int counter;
int task_count;
// Compiler-generated code (emulation)
typedef struct ident {
void* dummy;
} ident_t;
typedef struct shar {
int *pcounter;
int *pj;
int *ptask_count;
} *pshareds;
typedef struct task {
pshareds shareds;
int(* routine)(int,struct task*);
int part_id;
unsigned long long lb; // library always uses ULONG
unsigned long long ub;
int st;
int last;
int i;
int j;
int th;
} *ptask, kmp_task_t;
typedef int(* task_entry_t)( int, ptask );
void
__task_dup_entry(ptask task_dst, ptask task_src, int lastpriv)
{
// setup lastprivate flag
task_dst->last = lastpriv;
// could be constructor calls here...
}
// OpenMP RTL interfaces
typedef unsigned long long kmp_uint64;
typedef long long kmp_int64;
#ifdef __cplusplus
extern "C" {
#endif
void
__kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup, int sched, kmp_int64 grainsize, int modifier,
void *task_dup);
ptask
__kmpc_omp_task_alloc(ident_t *loc, int gtid, int flags,
size_t sizeof_kmp_task_t, size_t sizeof_shareds,
task_entry_t task_entry);
void __kmpc_atomic_fixed4_add(void *id_ref, int gtid, int * lhs, int rhs);
int __kmpc_global_thread_num(void *id_ref);
#ifdef __cplusplus
}
#endif
// User's code
int task_entry(int gtid, ptask task)
{
pshareds pshar = task->shareds;
__kmpc_atomic_fixed4_add(NULL, gtid, pshar->ptask_count, 1);
for (task->i = task->lb; task->i <= (int)task->ub; task->i += task->st) {
task->th = omp_get_thread_num();
__kmpc_atomic_fixed4_add(NULL,gtid,pshar->pcounter,1);
task->j = task->i;
}
my_sleep( 0.1 ); // sleep 100 ms in order to allow other threads to steal tasks
if (task->last) {
*(pshar->pj) = task->j; // lastprivate
}
return 0;
}
void task_loop(int sched_type, int sched_val, int modifier)
{
int i, j, gtid = __kmpc_global_thread_num(NULL);
ptask task;
pshareds psh;
omp_set_dynamic(0);
counter = 0;
task_count = 0;
#pragma omp parallel num_threads(N)
{
#pragma omp master
{
int gtid = __kmpc_global_thread_num(NULL);
task = __kmpc_omp_task_alloc(NULL, gtid, 1, sizeof(struct task),
sizeof(struct shar), &task_entry);
psh = task->shareds;
psh->pcounter = &counter;
psh->ptask_count = &task_count;
psh->pj = &j;
task->lb = LB;
task->ub = UB;
task->st = ST;
__kmpc_taskloop_5(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
ST, // loop increment
0, // 1 if nogroup specified
sched_type, // schedule type: 0-none, 1-grainsize, 2-num_tasks
sched_val, // schedule value (ignored for type 0)
modifier, // strict modifier
(void*)&__task_dup_entry // tasks duplication routine
);
} // end master
} // end parallel
// check results
int tc;
if (ST == 1) { // most common case
tc = UB - LB + 1;
} else if (ST < 0) {
tc = (LB - UB) / (-ST) + 1;
} else { // ST > 0
tc = (UB - LB) / ST + 1;
}
int count;
if (sched_type == 1) {
count = (sched_val > tc) ? 1 : (tc + sched_val - 1) / sched_val;
} else {
count = (sched_val > tc) ? tc : sched_val;
}
if (j != LB + (tc - 1) * ST) {
printf("Error in lastprivate, %d != %d\n", j, LB + (tc - 1) * ST);
exit(1);
}
if (counter != tc) {
printf("Error, counter %d != %d\n", counter, tc);
exit(1);
}
if (task_count != count) {
printf("Error, task count %d != %d\n", task_count, count);
exit(1);
}
}
int main(int argc, char *argv[]) {
task_loop(1, 6, 1); // create 7 tasks
task_loop(2, 6, 1); // create 6 tasks
task_loop(1, 50, 1); // create 1 task
task_loop(2, 50, 1); // create 40 tasks
printf("Test passed\n");
return 0;
}