forked from OSchip/llvm-project
Fix trip count calculation for parallel loops in runtime
The trip count calculation was incorrect for loops with large bounds. For example, for(int i=-2,000,000,000; i < 2,000,000,000; i+=50000000), the trip count calculation had overflow (trying to calculate 2,000,000,000 + 2,000,000,000 with signed integers) and wasn't giving the right value. This patch fixes this error in the runtime by using unsigned integers instead. There is still a bug in the clang compiler component because it warns that there is overflow in the test case file when there isn't. This error isn't there for the Intel Compiler. So for now, the test case is designated as XFAIL. Differential Revision: http://reviews.llvm.org/D19078 llvm-svn: 266677
This commit is contained in:
parent
d8ce87f850
commit
5235a1b603
|
@ -757,24 +757,29 @@ __kmp_dispatch_init(
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
tc = ( ub - lb + st );
|
||||
if ( st != 1 ) {
|
||||
if ( st < 0 ) {
|
||||
if ( lb < ub ) {
|
||||
tc = 0; // zero-trip
|
||||
} else { // lb >= ub
|
||||
tc = (ST)tc / st; // convert to signed division
|
||||
}
|
||||
} else { // st > 0
|
||||
if ( ub < lb ) {
|
||||
tc = 0; // zero-trip
|
||||
} else { // lb >= ub
|
||||
tc /= st;
|
||||
}
|
||||
// compute trip count
|
||||
if ( st == 1 ) { // most common case
|
||||
if ( ub >= lb ) {
|
||||
tc = ub - lb + 1;
|
||||
} else { // ub < lb
|
||||
tc = 0; // zero-trip
|
||||
}
|
||||
} else if ( st < 0 ) {
|
||||
if ( lb >= ub ) {
|
||||
// AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B),
|
||||
// where the division needs to be unsigned regardless of the result type
|
||||
tc = (UT)(lb - ub) / (-st) + 1;
|
||||
} else { // lb < ub
|
||||
tc = 0; // zero-trip
|
||||
}
|
||||
} else { // st > 0
|
||||
if ( ub >= lb ) {
|
||||
// AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B),
|
||||
// where the division needs to be unsigned regardless of the result type
|
||||
tc = (UT)(ub - lb) / st + 1;
|
||||
} else { // ub < lb
|
||||
tc = 0; // zero-trip
|
||||
}
|
||||
} else if ( ub < lb ) { // st == 1
|
||||
tc = 0; // zero-trip
|
||||
}
|
||||
|
||||
// Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing
|
||||
|
@ -2255,8 +2260,11 @@ __kmp_dist_get_bounds(
|
|||
trip_count = *pupper - *plower + 1;
|
||||
} else if(incr == -1) {
|
||||
trip_count = *plower - *pupper + 1;
|
||||
} else if ( incr > 0 ) {
|
||||
// upper-lower can exceed the limit of signed type
|
||||
trip_count = (UT)(*pupper - *plower) / incr + 1;
|
||||
} else {
|
||||
trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
|
||||
trip_count = (UT)(*plower - *pupper) / ( -incr ) + 1;
|
||||
}
|
||||
|
||||
if( trip_count <= nteams ) {
|
||||
|
|
|
@ -244,12 +244,11 @@ __kmp_for_static_init(
|
|||
trip_count = *pupper - *plower + 1;
|
||||
} else if (incr == -1) {
|
||||
trip_count = *plower - *pupper + 1;
|
||||
} else if ( incr > 0 ) {
|
||||
// upper-lower can exceed the limit of signed type
|
||||
trip_count = (UT)(*pupper - *plower) / incr + 1;
|
||||
} else {
|
||||
if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0
|
||||
trip_count = (*pupper - *plower) / incr + 1;
|
||||
} else {
|
||||
trip_count = (*plower - *pupper) / ( -incr ) + 1;
|
||||
}
|
||||
trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
|
||||
}
|
||||
|
||||
if ( __kmp_env_consistency_check ) {
|
||||
|
@ -447,8 +446,11 @@ __kmp_dist_for_static_init(
|
|||
trip_count = *pupper - *plower + 1;
|
||||
} else if(incr == -1) {
|
||||
trip_count = *plower - *pupper + 1;
|
||||
} else if ( incr > 0 ) {
|
||||
// upper-lower can exceed the limit of signed type
|
||||
trip_count = (UT)(*pupper - *plower) / incr + 1;
|
||||
} else {
|
||||
trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
|
||||
trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
|
||||
}
|
||||
|
||||
*pstride = *pupper - *plower; // just in case (can be unused)
|
||||
|
@ -514,8 +516,11 @@ __kmp_dist_for_static_init(
|
|||
trip_count = *pupperDist - *plower + 1;
|
||||
} else if(incr == -1) {
|
||||
trip_count = *plower - *pupperDist + 1;
|
||||
} else if ( incr > 1 ) {
|
||||
// upper-lower can exceed the limit of signed type
|
||||
trip_count = (UT)(*pupperDist - *plower) / incr + 1;
|
||||
} else {
|
||||
trip_count = (ST)(*pupperDist - *plower) / incr + 1;
|
||||
trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
|
||||
}
|
||||
KMP_DEBUG_ASSERT( trip_count );
|
||||
switch( schedule ) {
|
||||
|
@ -684,8 +689,11 @@ __kmp_team_static_init(
|
|||
trip_count = upper - lower + 1;
|
||||
} else if(incr == -1) {
|
||||
trip_count = lower - upper + 1;
|
||||
} else if ( incr > 0 ) {
|
||||
// upper-lower can exceed the limit of signed type
|
||||
trip_count = (UT)(upper - lower) / incr + 1;
|
||||
} else {
|
||||
trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
|
||||
trip_count = (UT)(lower - upper) / (-incr) + 1;
|
||||
}
|
||||
if( chunk < 1 )
|
||||
chunk = 1;
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
// RUN: %libomp-compile -DMY_SCHEDULE=static && %libomp-run
|
||||
// RUN: %libomp-compile -DMY_SCHEDULE=dynamic && %libomp-run
|
||||
// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run
|
||||
// XFAIL: *
|
||||
/*
|
||||
* Test that large bounds are handled properly and calculations of
|
||||
* loop iterations don't accidently overflow
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
#define INCR 50000000
|
||||
#define MY_MAX 2000000000
|
||||
#define MY_MIN -2000000000
|
||||
#ifndef MY_SCHEDULE
|
||||
# define MY_SCHEDULE static
|
||||
#endif
|
||||
|
||||
int a, b, a_known_value, b_known_value;
|
||||
|
||||
int test_omp_for_bigbounds()
|
||||
{
|
||||
a = 0;
|
||||
b = 0;
|
||||
#pragma omp parallel
|
||||
{
|
||||
int i;
|
||||
#pragma omp for schedule(MY_SCHEDULE)
|
||||
for (i = INT_MIN; i < MY_MAX; i+=INCR) {
|
||||
#pragma omp atomic
|
||||
a++;
|
||||
}
|
||||
#pragma omp for schedule(MY_SCHEDULE)
|
||||
for (i = INT_MAX; i >= MY_MIN; i-=INCR) {
|
||||
#pragma omp atomic
|
||||
b++;
|
||||
}
|
||||
}
|
||||
printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value, b, b_known_value);
|
||||
return (a == a_known_value && b == b_known_value);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int i;
|
||||
int num_failed=0;
|
||||
|
||||
a_known_value = 0;
|
||||
for (i = INT_MIN; i < MY_MAX; i+=INCR) {
|
||||
a_known_value++;
|
||||
}
|
||||
|
||||
b_known_value = 0;
|
||||
for (i = INT_MAX; i >= MY_MIN; i-=INCR) {
|
||||
b_known_value++;
|
||||
}
|
||||
|
||||
for(i = 0; i < REPETITIONS; i++) {
|
||||
if(!test_omp_for_bigbounds()) {
|
||||
num_failed++;
|
||||
}
|
||||
}
|
||||
return num_failed;
|
||||
}
|
Loading…
Reference in New Issue