[OpenMP] Fix potential integer overflow in dynamic schedule code

Restrict the chunk_size * chunk_num to only occur for valid chunk_nums and reimplement calculating the limit to avoid overflow. Differential Revision: https://reviews.llvm.org/D96747
2021-02-01 14:08:51 -06:00 · 2021-02-01 14:08:51 -06:00 · e2738b3758
parent b0f0115308
commit e2738b3758
2 changed files with 85 additions and 9 deletions
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@ -392,8 +392,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
      KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d switching to "
                     "kmp_sch_dynamic_chunked\n",
                     gtid));
-      if (pr->u.p.parm1 <= 0)
-        pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
+      goto dynamic_init;
      break;
    } // if
  } // case
@ -490,6 +489,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
      if ((2L * chunk + 1) * nproc >= tc) {
        /* chunk size too large, switch to dynamic */
        schedule = kmp_sch_dynamic_chunked;
+        goto dynamic_init;
      } else {
        // when remaining iters become less than parm2 - switch to dynamic
        pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
@ -519,6 +519,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
      if ((2L * chunk + 1) * nproc >= tc) {
        /* chunk size too large, switch to dynamic */
        schedule = kmp_sch_dynamic_chunked;
+        goto dynamic_init;
      } else {
        /* commonly used term: (2 nproc - 1)/(2 nproc) */
        DBL x;
@ -643,10 +644,14 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
    break;
  case kmp_sch_static_chunked:
  case kmp_sch_dynamic_chunked:
+  dynamic_init:
    if (pr->u.p.parm1 <= 0)
      pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
    else if (pr->u.p.parm1 > tc)
      pr->u.p.parm1 = tc;
+    // Store the total number of chunks to prevent integer overflow during
+    // bounds calculations in the get next chunk routine.
+    pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0);
    KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "
                   "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
                   gtid));
@ -1487,28 +1492,32 @@ int __kmp_dispatch_next_algorithm(int gtid,
  break;

  case kmp_sch_dynamic_chunked: {
-    T chunk = pr->u.p.parm1;
+    UT chunk_number;
+    UT chunk_size = pr->u.p.parm1;
+    UT nchunks = pr->u.p.parm2;

    KD_TRACE(
        100,
        ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",
         gtid));

-    init = chunk * test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
-    trip = pr->u.p.tc - 1;
-
-    if ((status = (init <= trip)) == 0) {
+    chunk_number = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
+    status = (chunk_number < nchunks);
+    if (!status) {
      *p_lb = 0;
      *p_ub = 0;
      if (p_st != NULL)
        *p_st = 0;
    } else {
+      init = chunk_size * chunk_number;
+      trip = pr->u.p.tc - 1;
      start = pr->u.p.lb;
-      limit = chunk + init - 1;
      incr = pr->u.p.st;

-      if ((last = (limit >= trip)) != 0)
+      if ((last = (trip - init < (UT)chunk_size)))
        limit = trip;
+      else
+        limit = chunk_size + init - 1;

      if (p_st != NULL)
        *p_st = incr;
--- a/openmp/runtime/test/worksharing/for/omp_for_dynamic_large_chunk.c
+++ b/openmp/runtime/test/worksharing/for/omp_for_dynamic_large_chunk.c
@ -0,0 +1,67 @@
+// RUN: %libomp-compile
+// RUN: env OMP_WAIT_POLICY=passive OMP_NUM_THREADS=32 %libomp-run 0 134217728 1 134217728
+//
+// This test makes sure that large chunks sizes are handled correctly
+// including internal runtime calculations which incorporate the chunk size
+// Only one thread should execute all iterations.
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+typedef unsigned long long ull_t;
+
+int main(int argc, char **argv) {
+  int i, j, lb, ub, stride, nthreads, actual_nthreads, chunk;
+  ull_t num_iters = 0;
+  ull_t counted_iters = 0;
+  int errs = 0;
+  if (argc != 5) {
+    fprintf(stderr, "error: incorrect number of arguments\n");
+    fprintf(stderr, "usage: %s <lb> <ub> <stride> <chunk>\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+  lb = atoi(argv[1]);
+  ub = atoi(argv[2]);
+  stride = atoi(argv[3]);
+  chunk = atoi(argv[4]);
+  nthreads = omp_get_max_threads();
+  if (lb >= ub) {
+    fprintf(stderr, "error: lb must be less than ub\n");
+    exit(EXIT_FAILURE);
+  }
+  if (stride <= 0) {
+    fprintf(stderr, "error: stride must be positive integer\n");
+    exit(EXIT_FAILURE);
+  }
+  if (chunk <= 0) {
+    fprintf(stderr, "error: chunk must be positive integer\n");
+    exit(EXIT_FAILURE);
+  }
+  for (i = lb; i < ub; i += stride)
+    num_iters++;
+
+  #pragma omp parallel num_threads(nthreads)
+  {
+    #pragma omp single
+    actual_nthreads = omp_get_num_threads();
+
+    if (actual_nthreads != nthreads) {
+      printf("did not create enough threads, skipping test.\n");
+    } else {
+      #pragma omp for schedule(dynamic, chunk)
+      for (i = lb; i < ub; i += stride) {
+        counted_iters++;
+      }
+    }
+  }
+
+  // Check that the number of iterations executed is correct
+  if (actual_nthreads == nthreads && counted_iters != num_iters) {
+    fprintf(stderr, "error: wrong number of final iterations counted! "
+                    "num_iters=%llu, counted_iters=%llu\n",
+            num_iters, counted_iters);
+    exit(EXIT_FAILURE);
+  }
+
+  return EXIT_SUCCESS;
+}