[RTC] Bail if too many parameters are involved in a RTC access.

If too many parameters are involved in accesses used to create RTCs
  we might end up with enormous compile times and RTC expressions.
  The reason is that the lexmin/lexmax is dependent on all these
  parameters and isl might need to create a case for every "ordering"
  of them (e.g., p0 <= p1 <= p2, p1 <= p0 <= p2, ...).

  The exact number of parameters allowed in accesses is defined by the
  command line option -polly-rtc-max-parameters=XXX and set by default
  to 8.

  Differential Revision: http://reviews.llvm.org/D5500

llvm-svn: 218566
This commit is contained in:
Johannes Doerfert 2014-09-27 11:02:39 +00:00
parent f4b9e6b9d9
commit 9143d67aba
4 changed files with 285 additions and 6 deletions

View File

@ -692,7 +692,9 @@ public:
void addAssumption(__isl_take isl_set *Set);
/// @brief Build all alias groups for this SCoP.
void buildAliasGroups(AliasAnalysis &AA);
///
/// @returns True if __no__ error occurred, false otherwise.
bool buildAliasGroups(AliasAnalysis &AA);
/// @brief Return all alias groups for this SCoP.
const MinMaxVectorVectorTy &getAliasGroups() const {

View File

@ -64,6 +64,11 @@ static cl::opt<bool> DisableMultiplicativeReductions(
cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
cl::init(false), cl::cat(PollyCategory));
static cl::opt<unsigned> RunTimeChecksMaxParameters(
"polly-rtc-max-parameters",
cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
/// Translate a 'const SCEV *' expression in an isl_pw_aff.
struct SCEVAffinator : public SCEVVisitor<SCEVAffinator, isl_pw_aff *> {
public:
@ -1137,6 +1142,32 @@ static int buildMinMaxAccess(__isl_take isl_set *Set, void *User) {
isl_aff *OneAff;
unsigned Pos;
// Restrict the number of parameters involved in the access as the lexmin/
// lexmax computation will take too long if this number is high.
//
// Experiments with a simple test case using an i7 4800MQ:
//
// #Parameters involved | Time (in sec)
// 6 | 0.01
// 7 | 0.04
// 8 | 0.12
// 9 | 0.40
// 10 | 1.54
// 11 | 6.78
// 12 | 30.38
//
if (isl_set_n_param(Set) > RunTimeChecksMaxParameters) {
unsigned InvolvedParams = 0;
for (unsigned u = 0, e = isl_set_n_param(Set); u < e; u++)
if (isl_set_involves_dims(Set, isl_dim_param, u, 1))
InvolvedParams++;
if (InvolvedParams > RunTimeChecksMaxParameters) {
isl_set_free(Set);
return -1;
}
}
MinPMA = isl_set_lexmin_pw_multi_aff(isl_set_copy(Set));
MaxPMA = isl_set_lexmax_pw_multi_aff(isl_set_copy(Set));
@ -1160,7 +1191,7 @@ static int buildMinMaxAccess(__isl_take isl_set *Set, void *User) {
return 0;
}
void Scop::buildAliasGroups(AliasAnalysis &AA) {
bool Scop::buildAliasGroups(AliasAnalysis &AA) {
// To create sound alias checks we perform the following steps:
// o) Use the alias analysis and an alias set tracker to build alias sets
// for all memory accesses inside the SCoP.
@ -1207,6 +1238,7 @@ void Scop::buildAliasGroups(AliasAnalysis &AA) {
I = AliasGroups.erase(I);
}
bool Valid = true;
for (AliasGroupTy &AG : AliasGroups) {
MinMaxVectorTy *MinMaxAccesses = new MinMaxVectorTy();
MinMaxAccesses->reserve(AG.size());
@ -1220,11 +1252,16 @@ void Scop::buildAliasGroups(AliasAnalysis &AA) {
Locations = isl_union_set_intersect_params(Locations, getAssumedContext());
Locations = isl_union_set_coalesce(Locations);
Locations = isl_union_set_detect_equalities(Locations);
isl_union_set_foreach_set(Locations, buildMinMaxAccess, MinMaxAccesses);
Valid = (0 == isl_union_set_foreach_set(Locations, buildMinMaxAccess,
MinMaxAccesses));
isl_union_set_free(Locations);
MinMaxAliasGroups.push_back(MinMaxAccesses);
if (!Valid)
break;
}
return Valid;
}
Scop::Scop(TempScop &tempScop, LoopInfo &LI, ScalarEvolution &ScalarEvolution,
@ -1566,9 +1603,28 @@ bool ScopInfo::runOnRegion(Region *R, RGPassManager &RGM) {
scop = new Scop(*tempScop, LI, SE, ctx);
if (PollyUseRuntimeAliasChecks)
scop->buildAliasGroups(AA);
if (!PollyUseRuntimeAliasChecks)
return false;
// If a problem occurs while building the alias groups we need to delete
// this SCoP and pretend it wasn't valid in the first place.
if (scop->buildAliasGroups(AA))
return false;
--ScopFound;
if (tempScop->getMaxLoopDepth() > 0)
--RichScopFound;
DEBUG(dbgs()
<< "\n\nNOTE: Run time checks for " << scop->getNameStr()
<< " could not be created as the number of parameters involved is too "
"high. The SCoP will be "
"dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust the "
"maximal number of parameters but be advised that the compile time "
"might increase exponentially.\n\n");
delete scop;
scop = nullptr;
return false;
}

View File

@ -0,0 +1,91 @@
; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -polly-rtc-max-parameters=8 -analyze < %s | FileCheck %s --check-prefix=MAX8
; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -polly-rtc-max-parameters=7 -analyze < %s | FileCheck %s --check-prefix=MAX7
;
; Check that we allow this SCoP even though it has 10 parameters involved in posisbly aliasing accesses.
; However, only 7 are involved in accesses through B, 8 through C and none in accesses through A.
;
; MAX8: Printing analysis 'Polly - Create polyhedral description of Scops' for region: 'for.cond => for.end' in function 'jd':
; MAX8-NEXT: Function: jd
; MAX7: Printing analysis 'Polly - Create polyhedral description of Scops' for region: 'for.cond => for.end' in function 'jd':
; MAX7-NEXT: Invalid Scop!
;
; void jd(int *A, int *B, int *C, long p1, long p2, long p3, long p4, long p5,
; long p6, long p7, long p8, long p9, long p10) {
; for (int i = 0; i < 1024; i++)
; A[i] = B[p1] - B[p2] + B[-p3] - B[p4] + B[p5] - B[-p6] + B[p7] - C[p3] +
; C[-p4] - C[p5] + C[p6] - C[-p7] + C[p8] - C[p9] + C[-p10];
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @jd(i32* %A, i32* %B, i32* %C, i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8, i64 %p9, i64 %p10) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32* %B, i64 %p1
%tmp = load i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %B, i64 %p2
%tmp1 = load i32* %arrayidx1, align 4
%sub = sub nsw i32 %tmp, %tmp1
%sub2 = sub nsw i64 0, %p3
%arrayidx3 = getelementptr inbounds i32* %B, i64 %sub2
%tmp2 = load i32* %arrayidx3, align 4
%add = add nsw i32 %sub, %tmp2
%arrayidx4 = getelementptr inbounds i32* %B, i64 %p4
%tmp3 = load i32* %arrayidx4, align 4
%sub5 = sub nsw i32 %add, %tmp3
%arrayidx6 = getelementptr inbounds i32* %B, i64 %p5
%tmp4 = load i32* %arrayidx6, align 4
%add7 = add nsw i32 %sub5, %tmp4
%sub8 = sub nsw i64 0, %p6
%arrayidx9 = getelementptr inbounds i32* %B, i64 %sub8
%tmp5 = load i32* %arrayidx9, align 4
%sub10 = sub nsw i32 %add7, %tmp5
%arrayidx11 = getelementptr inbounds i32* %B, i64 %p7
%tmp6 = load i32* %arrayidx11, align 4
%add12 = add nsw i32 %sub10, %tmp6
%arrayidx13 = getelementptr inbounds i32* %C, i64 %p3
%tmp7 = load i32* %arrayidx13, align 4
%sub14 = sub nsw i32 %add12, %tmp7
%sub15 = sub nsw i64 0, %p4
%arrayidx16 = getelementptr inbounds i32* %C, i64 %sub15
%tmp8 = load i32* %arrayidx16, align 4
%add17 = add nsw i32 %sub14, %tmp8
%arrayidx18 = getelementptr inbounds i32* %C, i64 %p5
%tmp9 = load i32* %arrayidx18, align 4
%sub19 = sub nsw i32 %add17, %tmp9
%arrayidx20 = getelementptr inbounds i32* %C, i64 %p6
%tmp10 = load i32* %arrayidx20, align 4
%add21 = add nsw i32 %sub19, %tmp10
%sub22 = sub nsw i64 0, %p7
%arrayidx23 = getelementptr inbounds i32* %C, i64 %sub22
%tmp11 = load i32* %arrayidx23, align 4
%sub24 = sub nsw i32 %add21, %tmp11
%arrayidx25 = getelementptr inbounds i32* %C, i64 %p8
%tmp12 = load i32* %arrayidx25, align 4
%add26 = add nsw i32 %sub24, %tmp12
%arrayidx27 = getelementptr inbounds i32* %C, i64 %p9
%tmp13 = load i32* %arrayidx27, align 4
%sub28 = sub nsw i32 %add26, %tmp13
%sub29 = sub nsw i64 0, %p10
%arrayidx30 = getelementptr inbounds i32* %C, i64 %sub29
%tmp14 = load i32* %arrayidx30, align 4
%add31 = add nsw i32 %sub28, %tmp14
%arrayidx32 = getelementptr inbounds i32* %A, i64 %indvars.iv
store i32 %add31, i32* %arrayidx32, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,130 @@
; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -analyze < %s | FileCheck %s
;
; A valid Scop would print the list of it's statements, we check that we do not
; see that list.
;
; CHECK-NOT: Statements
;
; FIXME: Handling this is an open problem, at the moment we just bail out.
;
; void foo(float *A, float *B,
; long p1,
; long p2,
; long p3,
; long p4,
; long p5,
; long p6,
; long p7,
; long p8,
; long p9,
; long p10,
; long p11,
; long p12) {
; for (long i = 0; i < 100; i++) {
; A[i] =
; B[i + p1] +
; B[i + p2] +
; B[i + p3] +
; B[i + p4] +
; B[i + p5] +
; B[i + p6] +
; B[i + p7] +
; B[i + p8] +
; B[i + p9] +
; B[i + p10] +
; B[i + p11] +
; B[i + p12];
; }
; }
;
; Computing the minimal and maximal element accessed in B is very expensive.
; Expressing the minimal element itself yields a rather complex isl_pw_aff which
; looks as follows:
; { ...
; MemRef_B[(100 + p11)] : p2 <= -1 + p1 and p3 <= -1 + p1 and p4 <= -1 + p1
; and p5 <= -1 + p1 and p6 <= -1 + p1 and
; p7 <= -1 + p1 and p8 <= -1 + p1 and p9 <= -1 + p1
; and p10 <= -1 + p1 and p11 >= p1 and
; p12 <= -1 + p11;
; MemRef_B[(100 + p12)] : p2 <= -1 + p1 and p3 <= -1 + p1 and p4 <= -1 + p1
; and p5 <= -1 + p1 and p6 <= -1 + p1 and
; p7 <= -1 + p1 and p8 <= -1 + p1 and p9 <= -1 + p1
; and p10 <= -1 + p1 and p11 <= -1 + p1 and p12 >= p1;
;
; and this isl_pw_aff is then 1:1 translated into a isl ast expression.
;
; In the best case, we would create a run-time check such as:
;
; if (B[99 + max(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12)] < A[0]
; || A[99] B[min(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9]))
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nounwind uwtable
define void @foo(float* %A, float* %B, i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8, i64 %p9, i64 %p10, i64 %p11, i64 %p12) #0 {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %entry.split, %for.body
%i.01 = phi i64 [ 0, %entry.split ], [ %tmp25, %for.body ]
%tmp = add i64 %p1, %i.01
%arrayidx = getelementptr float* %B, i64 %tmp
%tmp2 = add i64 %p2, %i.01
%arrayidx2 = getelementptr float* %B, i64 %tmp2
%tmp3 = add i64 %p3, %i.01
%arrayidx5 = getelementptr float* %B, i64 %tmp3
%tmp4 = add i64 %p4, %i.01
%arrayidx8 = getelementptr float* %B, i64 %tmp4
%tmp5 = add i64 %p5, %i.01
%arrayidx11 = getelementptr float* %B, i64 %tmp5
%tmp6 = add i64 %p6, %i.01
%arrayidx14 = getelementptr float* %B, i64 %tmp6
%tmp7 = add i64 %p7, %i.01
%arrayidx17 = getelementptr float* %B, i64 %tmp7
%tmp8 = add i64 %p8, %i.01
%arrayidx20 = getelementptr float* %B, i64 %tmp8
%tmp9 = add i64 %p9, %i.01
%arrayidx23 = getelementptr float* %B, i64 %tmp9
%tmp10 = add i64 %p10, %i.01
%arrayidx26 = getelementptr float* %B, i64 %tmp10
%tmp11 = add i64 %p11, %i.01
%arrayidx29 = getelementptr float* %B, i64 %tmp11
%tmp12 = add i64 %p12, %i.01
%arrayidx32 = getelementptr float* %B, i64 %tmp12
%arrayidx34 = getelementptr float* %A, i64 %i.01
%tmp13 = load float* %arrayidx, align 4
%tmp14 = load float* %arrayidx2, align 4
%add3 = fadd float %tmp13, %tmp14
%tmp15 = load float* %arrayidx5, align 4
%add6 = fadd float %add3, %tmp15
%tmp16 = load float* %arrayidx8, align 4
%add9 = fadd float %add6, %tmp16
%tmp17 = load float* %arrayidx11, align 4
%add12 = fadd float %add9, %tmp17
%tmp18 = load float* %arrayidx14, align 4
%add15 = fadd float %add12, %tmp18
%tmp19 = load float* %arrayidx17, align 4
%add18 = fadd float %add15, %tmp19
%tmp20 = load float* %arrayidx20, align 4
%add21 = fadd float %add18, %tmp20
%tmp21 = load float* %arrayidx23, align 4
%add24 = fadd float %add21, %tmp21
%tmp22 = load float* %arrayidx26, align 4
%add27 = fadd float %add24, %tmp22
%tmp23 = load float* %arrayidx29, align 4
%add30 = fadd float %add27, %tmp23
%tmp24 = load float* %arrayidx32, align 4
%add33 = fadd float %add30, %tmp24
store float %add33, float* %arrayidx34, align 4
%tmp25 = add nsw i64 %i.01, 1
%exitcond = icmp ne i64 %tmp25, 100
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}