forked from OSchip/llvm-project
[OpenMP] Fix potential integer overflow in dynamic schedule code
Restrict the chunk_size * chunk_num to only occur for valid chunk_nums and reimplement calculating the limit to avoid overflow. Differential Revision: https://reviews.llvm.org/D96747
This commit is contained in:
parent
b0f0115308
commit
e2738b3758
|
@ -392,8 +392,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
|
|||
KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d switching to "
|
||||
"kmp_sch_dynamic_chunked\n",
|
||||
gtid));
|
||||
if (pr->u.p.parm1 <= 0)
|
||||
pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
|
||||
goto dynamic_init;
|
||||
break;
|
||||
} // if
|
||||
} // case
|
||||
|
@ -490,6 +489,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
|
|||
if ((2L * chunk + 1) * nproc >= tc) {
|
||||
/* chunk size too large, switch to dynamic */
|
||||
schedule = kmp_sch_dynamic_chunked;
|
||||
goto dynamic_init;
|
||||
} else {
|
||||
// when remaining iters become less than parm2 - switch to dynamic
|
||||
pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
|
||||
|
@ -519,6 +519,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
|
|||
if ((2L * chunk + 1) * nproc >= tc) {
|
||||
/* chunk size too large, switch to dynamic */
|
||||
schedule = kmp_sch_dynamic_chunked;
|
||||
goto dynamic_init;
|
||||
} else {
|
||||
/* commonly used term: (2 nproc - 1)/(2 nproc) */
|
||||
DBL x;
|
||||
|
@ -643,10 +644,14 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
|
|||
break;
|
||||
case kmp_sch_static_chunked:
|
||||
case kmp_sch_dynamic_chunked:
|
||||
dynamic_init:
|
||||
if (pr->u.p.parm1 <= 0)
|
||||
pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
|
||||
else if (pr->u.p.parm1 > tc)
|
||||
pr->u.p.parm1 = tc;
|
||||
// Store the total number of chunks to prevent integer overflow during
|
||||
// bounds calculations in the get next chunk routine.
|
||||
pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0);
|
||||
KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "
|
||||
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
|
||||
gtid));
|
||||
|
@ -1487,28 +1492,32 @@ int __kmp_dispatch_next_algorithm(int gtid,
|
|||
break;
|
||||
|
||||
case kmp_sch_dynamic_chunked: {
|
||||
T chunk = pr->u.p.parm1;
|
||||
UT chunk_number;
|
||||
UT chunk_size = pr->u.p.parm1;
|
||||
UT nchunks = pr->u.p.parm2;
|
||||
|
||||
KD_TRACE(
|
||||
100,
|
||||
("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",
|
||||
gtid));
|
||||
|
||||
init = chunk * test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
|
||||
trip = pr->u.p.tc - 1;
|
||||
|
||||
if ((status = (init <= trip)) == 0) {
|
||||
chunk_number = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
|
||||
status = (chunk_number < nchunks);
|
||||
if (!status) {
|
||||
*p_lb = 0;
|
||||
*p_ub = 0;
|
||||
if (p_st != NULL)
|
||||
*p_st = 0;
|
||||
} else {
|
||||
init = chunk_size * chunk_number;
|
||||
trip = pr->u.p.tc - 1;
|
||||
start = pr->u.p.lb;
|
||||
limit = chunk + init - 1;
|
||||
incr = pr->u.p.st;
|
||||
|
||||
if ((last = (limit >= trip)) != 0)
|
||||
if ((last = (trip - init < (UT)chunk_size)))
|
||||
limit = trip;
|
||||
else
|
||||
limit = chunk_size + init - 1;
|
||||
|
||||
if (p_st != NULL)
|
||||
*p_st = incr;
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
// RUN: %libomp-compile
|
||||
// RUN: env OMP_WAIT_POLICY=passive OMP_NUM_THREADS=32 %libomp-run 0 134217728 1 134217728
|
||||
//
|
||||
// This test makes sure that large chunks sizes are handled correctly
|
||||
// including internal runtime calculations which incorporate the chunk size
|
||||
// Only one thread should execute all iterations.
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
typedef unsigned long long ull_t;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int i, j, lb, ub, stride, nthreads, actual_nthreads, chunk;
|
||||
ull_t num_iters = 0;
|
||||
ull_t counted_iters = 0;
|
||||
int errs = 0;
|
||||
if (argc != 5) {
|
||||
fprintf(stderr, "error: incorrect number of arguments\n");
|
||||
fprintf(stderr, "usage: %s <lb> <ub> <stride> <chunk>\n", argv[0]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
lb = atoi(argv[1]);
|
||||
ub = atoi(argv[2]);
|
||||
stride = atoi(argv[3]);
|
||||
chunk = atoi(argv[4]);
|
||||
nthreads = omp_get_max_threads();
|
||||
if (lb >= ub) {
|
||||
fprintf(stderr, "error: lb must be less than ub\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (stride <= 0) {
|
||||
fprintf(stderr, "error: stride must be positive integer\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (chunk <= 0) {
|
||||
fprintf(stderr, "error: chunk must be positive integer\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
for (i = lb; i < ub; i += stride)
|
||||
num_iters++;
|
||||
|
||||
#pragma omp parallel num_threads(nthreads)
|
||||
{
|
||||
#pragma omp single
|
||||
actual_nthreads = omp_get_num_threads();
|
||||
|
||||
if (actual_nthreads != nthreads) {
|
||||
printf("did not create enough threads, skipping test.\n");
|
||||
} else {
|
||||
#pragma omp for schedule(dynamic, chunk)
|
||||
for (i = lb; i < ub; i += stride) {
|
||||
counted_iters++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the number of iterations executed is correct
|
||||
if (actual_nthreads == nthreads && counted_iters != num_iters) {
|
||||
fprintf(stderr, "error: wrong number of final iterations counted! "
|
||||
"num_iters=%llu, counted_iters=%llu\n",
|
||||
num_iters, counted_iters);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
Loading…
Reference in New Issue