Fix trip count calculation for parallel loops in runtime

The trip count calculation was incorrect for loops with large bounds. For example,
for(int i=-2,000,000,000; i < 2,000,000,000; i+=50000000), the trip count
calculation had overflow (trying to calculate 2,000,000,000 + 2,000,000,000 with
signed integers) and wasn't giving the right value. This patch fixes this error
in the runtime by using unsigned integers instead. There is still a bug in the
clang compiler component because it warns that there is overflow in the
test case file when there isn't. This error isn't there for the Intel Compiler.
So for now, the test case is designated as XFAIL.

Differential Revision: http://reviews.llvm.org/D19078

llvm-svn: 266677
This commit is contained in:
Jonathan Peyton 2016-04-18 21:38:29 +00:00
parent d8ce87f850
commit 5235a1b603
3 changed files with 109 additions and 26 deletions

View File

@ -757,24 +757,29 @@ __kmp_dispatch_init(
);
}
}
tc = ( ub - lb + st );
if ( st != 1 ) {
if ( st < 0 ) {
if ( lb < ub ) {
tc = 0; // zero-trip
} else { // lb >= ub
tc = (ST)tc / st; // convert to signed division
}
} else { // st > 0
if ( ub < lb ) {
tc = 0; // zero-trip
} else { // lb >= ub
tc /= st;
}
// compute trip count
if ( st == 1 ) { // most common case
if ( ub >= lb ) {
tc = ub - lb + 1;
} else { // ub < lb
tc = 0; // zero-trip
}
} else if ( st < 0 ) {
if ( lb >= ub ) {
// AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B),
// where the division needs to be unsigned regardless of the result type
tc = (UT)(lb - ub) / (-st) + 1;
} else { // lb < ub
tc = 0; // zero-trip
}
} else { // st > 0
if ( ub >= lb ) {
// AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B),
// where the division needs to be unsigned regardless of the result type
tc = (UT)(ub - lb) / st + 1;
} else { // ub < lb
tc = 0; // zero-trip
}
} else if ( ub < lb ) { // st == 1
tc = 0; // zero-trip
}
// Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing
@ -2255,8 +2260,11 @@ __kmp_dist_get_bounds(
trip_count = *pupper - *plower + 1;
} else if(incr == -1) {
trip_count = *plower - *pupper + 1;
} else if ( incr > 0 ) {
// upper-lower can exceed the limit of signed type
trip_count = (UT)(*pupper - *plower) / incr + 1;
} else {
trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
trip_count = (UT)(*plower - *pupper) / ( -incr ) + 1;
}
if( trip_count <= nteams ) {

View File

@ -244,12 +244,11 @@ __kmp_for_static_init(
trip_count = *pupper - *plower + 1;
} else if (incr == -1) {
trip_count = *plower - *pupper + 1;
} else if ( incr > 0 ) {
// upper-lower can exceed the limit of signed type
trip_count = (UT)(*pupper - *plower) / incr + 1;
} else {
if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0
trip_count = (*pupper - *plower) / incr + 1;
} else {
trip_count = (*plower - *pupper) / ( -incr ) + 1;
}
trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
}
if ( __kmp_env_consistency_check ) {
@ -447,8 +446,11 @@ __kmp_dist_for_static_init(
trip_count = *pupper - *plower + 1;
} else if(incr == -1) {
trip_count = *plower - *pupper + 1;
} else if ( incr > 0 ) {
// upper-lower can exceed the limit of signed type
trip_count = (UT)(*pupper - *plower) / incr + 1;
} else {
trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
}
*pstride = *pupper - *plower; // just in case (can be unused)
@ -514,8 +516,11 @@ __kmp_dist_for_static_init(
trip_count = *pupperDist - *plower + 1;
} else if(incr == -1) {
trip_count = *plower - *pupperDist + 1;
} else if ( incr > 1 ) {
// upper-lower can exceed the limit of signed type
trip_count = (UT)(*pupperDist - *plower) / incr + 1;
} else {
trip_count = (ST)(*pupperDist - *plower) / incr + 1;
trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
}
KMP_DEBUG_ASSERT( trip_count );
switch( schedule ) {
@ -684,8 +689,11 @@ __kmp_team_static_init(
trip_count = upper - lower + 1;
} else if(incr == -1) {
trip_count = lower - upper + 1;
} else if ( incr > 0 ) {
// upper-lower can exceed the limit of signed type
trip_count = (UT)(upper - lower) / incr + 1;
} else {
trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
trip_count = (UT)(lower - upper) / (-incr) + 1;
}
if( chunk < 1 )
chunk = 1;

View File

@ -0,0 +1,67 @@
// RUN: %libomp-compile -DMY_SCHEDULE=static && %libomp-run
// RUN: %libomp-compile -DMY_SCHEDULE=dynamic && %libomp-run
// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run
// XFAIL: *
/*
* Test that large bounds are handled properly and calculations of
* loop iterations don't accidently overflow
*/
#include <stdio.h>
#include <omp.h>
#include <stdlib.h>
#include <limits.h>
#include "omp_testsuite.h"
#define INCR 50000000
#define MY_MAX 2000000000
#define MY_MIN -2000000000
#ifndef MY_SCHEDULE
# define MY_SCHEDULE static
#endif
int a, b, a_known_value, b_known_value;
int test_omp_for_bigbounds()
{
a = 0;
b = 0;
#pragma omp parallel
{
int i;
#pragma omp for schedule(MY_SCHEDULE)
for (i = INT_MIN; i < MY_MAX; i+=INCR) {
#pragma omp atomic
a++;
}
#pragma omp for schedule(MY_SCHEDULE)
for (i = INT_MAX; i >= MY_MIN; i-=INCR) {
#pragma omp atomic
b++;
}
}
printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value, b, b_known_value);
return (a == a_known_value && b == b_known_value);
}
int main()
{
int i;
int num_failed=0;
a_known_value = 0;
for (i = INT_MIN; i < MY_MAX; i+=INCR) {
a_known_value++;
}
b_known_value = 0;
for (i = INT_MAX; i >= MY_MIN; i-=INCR) {
b_known_value++;
}
for(i = 0; i < REPETITIONS; i++) {
if(!test_omp_for_bigbounds()) {
num_failed++;
}
}
return num_failed;
}