From 9143d67abaf08028b52e65387a1d96076a58004a Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 27 Sep 2014 11:02:39 +0000 Subject: [PATCH] [RTC] Bail if too many parameters are involved in a RTC access. If too many parameters are involved in accesses used to create RTCs we might end up with enormous compile times and RTC expressions. The reason is that the lexmin/lexmax is dependent on all these parameters and isl might need to create a case for every "ordering" of them (e.g., p0 <= p1 <= p2, p1 <= p0 <= p2, ...). The exact number of parameters allowed in accesses is defined by the command line option -polly-rtc-max-parameters=XXX and set by default to 8. Differential Revision: http://reviews.llvm.org/D5500 llvm-svn: 218566 --- polly/include/polly/ScopInfo.h | 4 +- polly/lib/Analysis/ScopInfo.cpp | 66 ++++++++- ...iasing_many_parameters_not_all_involved.ll | 91 ++++++++++++ .../run-time-check-many-parameters.ll | 130 ++++++++++++++++++ 4 files changed, 285 insertions(+), 6 deletions(-) create mode 100644 polly/test/ScopInfo/aliasing_many_parameters_not_all_involved.ll create mode 100644 polly/test/ScopInfo/run-time-check-many-parameters.ll diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 0f2913c1f0c1..02eb6597a861 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -692,7 +692,9 @@ public: void addAssumption(__isl_take isl_set *Set); /// @brief Build all alias groups for this SCoP. - void buildAliasGroups(AliasAnalysis &AA); + /// + /// @returns True if __no__ error occurred, false otherwise. + bool buildAliasGroups(AliasAnalysis &AA); /// @brief Return all alias groups for this SCoP. const MinMaxVectorVectorTy &getAliasGroups() const { diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index b1f18961e95f..1ecf1a295d21 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -64,6 +64,11 @@ static cl::opt DisableMultiplicativeReductions( cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); +static cl::opt RunTimeChecksMaxParameters( + "polly-rtc-max-parameters", + cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, + cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); + /// Translate a 'const SCEV *' expression in an isl_pw_aff. struct SCEVAffinator : public SCEVVisitor { public: @@ -1137,6 +1142,32 @@ static int buildMinMaxAccess(__isl_take isl_set *Set, void *User) { isl_aff *OneAff; unsigned Pos; + // Restrict the number of parameters involved in the access as the lexmin/ + // lexmax computation will take too long if this number is high. + // + // Experiments with a simple test case using an i7 4800MQ: + // + // #Parameters involved | Time (in sec) + // 6 | 0.01 + // 7 | 0.04 + // 8 | 0.12 + // 9 | 0.40 + // 10 | 1.54 + // 11 | 6.78 + // 12 | 30.38 + // + if (isl_set_n_param(Set) > RunTimeChecksMaxParameters) { + unsigned InvolvedParams = 0; + for (unsigned u = 0, e = isl_set_n_param(Set); u < e; u++) + if (isl_set_involves_dims(Set, isl_dim_param, u, 1)) + InvolvedParams++; + + if (InvolvedParams > RunTimeChecksMaxParameters) { + isl_set_free(Set); + return -1; + } + } + MinPMA = isl_set_lexmin_pw_multi_aff(isl_set_copy(Set)); MaxPMA = isl_set_lexmax_pw_multi_aff(isl_set_copy(Set)); @@ -1160,7 +1191,7 @@ static int buildMinMaxAccess(__isl_take isl_set *Set, void *User) { return 0; } -void Scop::buildAliasGroups(AliasAnalysis &AA) { +bool Scop::buildAliasGroups(AliasAnalysis &AA) { // To create sound alias checks we perform the following steps: // o) Use the alias analysis and an alias set tracker to build alias sets // for all memory accesses inside the SCoP. @@ -1207,6 +1238,7 @@ void Scop::buildAliasGroups(AliasAnalysis &AA) { I = AliasGroups.erase(I); } + bool Valid = true; for (AliasGroupTy &AG : AliasGroups) { MinMaxVectorTy *MinMaxAccesses = new MinMaxVectorTy(); MinMaxAccesses->reserve(AG.size()); @@ -1220,11 +1252,16 @@ void Scop::buildAliasGroups(AliasAnalysis &AA) { Locations = isl_union_set_intersect_params(Locations, getAssumedContext()); Locations = isl_union_set_coalesce(Locations); Locations = isl_union_set_detect_equalities(Locations); - isl_union_set_foreach_set(Locations, buildMinMaxAccess, MinMaxAccesses); + Valid = (0 == isl_union_set_foreach_set(Locations, buildMinMaxAccess, + MinMaxAccesses)); isl_union_set_free(Locations); - MinMaxAliasGroups.push_back(MinMaxAccesses); + + if (!Valid) + break; } + + return Valid; } Scop::Scop(TempScop &tempScop, LoopInfo &LI, ScalarEvolution &ScalarEvolution, @@ -1566,9 +1603,28 @@ bool ScopInfo::runOnRegion(Region *R, RGPassManager &RGM) { scop = new Scop(*tempScop, LI, SE, ctx); - if (PollyUseRuntimeAliasChecks) - scop->buildAliasGroups(AA); + if (!PollyUseRuntimeAliasChecks) + return false; + // If a problem occurs while building the alias groups we need to delete + // this SCoP and pretend it wasn't valid in the first place. + if (scop->buildAliasGroups(AA)) + return false; + + --ScopFound; + if (tempScop->getMaxLoopDepth() > 0) + --RichScopFound; + + DEBUG(dbgs() + << "\n\nNOTE: Run time checks for " << scop->getNameStr() + << " could not be created as the number of parameters involved is too " + "high. The SCoP will be " + "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust the " + "maximal number of parameters but be advised that the compile time " + "might increase exponentially.\n\n"); + + delete scop; + scop = nullptr; return false; } diff --git a/polly/test/ScopInfo/aliasing_many_parameters_not_all_involved.ll b/polly/test/ScopInfo/aliasing_many_parameters_not_all_involved.ll new file mode 100644 index 000000000000..fe3674140a64 --- /dev/null +++ b/polly/test/ScopInfo/aliasing_many_parameters_not_all_involved.ll @@ -0,0 +1,91 @@ +; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -polly-rtc-max-parameters=8 -analyze < %s | FileCheck %s --check-prefix=MAX8 +; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -polly-rtc-max-parameters=7 -analyze < %s | FileCheck %s --check-prefix=MAX7 +; +; Check that we allow this SCoP even though it has 10 parameters involved in posisbly aliasing accesses. +; However, only 7 are involved in accesses through B, 8 through C and none in accesses through A. +; +; MAX8: Printing analysis 'Polly - Create polyhedral description of Scops' for region: 'for.cond => for.end' in function 'jd': +; MAX8-NEXT: Function: jd + +; MAX7: Printing analysis 'Polly - Create polyhedral description of Scops' for region: 'for.cond => for.end' in function 'jd': +; MAX7-NEXT: Invalid Scop! +; +; void jd(int *A, int *B, int *C, long p1, long p2, long p3, long p4, long p5, +; long p6, long p7, long p8, long p9, long p10) { +; for (int i = 0; i < 1024; i++) +; A[i] = B[p1] - B[p2] + B[-p3] - B[p4] + B[p5] - B[-p6] + B[p7] - C[p3] + +; C[-p4] - C[p5] + C[p6] - C[-p7] + C[p8] - C[p9] + C[-p10]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* %A, i32* %B, i32* %C, i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8, i64 %p9, i64 %p10) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %B, i64 %p1 + %tmp = load i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %B, i64 %p2 + %tmp1 = load i32* %arrayidx1, align 4 + %sub = sub nsw i32 %tmp, %tmp1 + %sub2 = sub nsw i64 0, %p3 + %arrayidx3 = getelementptr inbounds i32* %B, i64 %sub2 + %tmp2 = load i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %tmp2 + %arrayidx4 = getelementptr inbounds i32* %B, i64 %p4 + %tmp3 = load i32* %arrayidx4, align 4 + %sub5 = sub nsw i32 %add, %tmp3 + %arrayidx6 = getelementptr inbounds i32* %B, i64 %p5 + %tmp4 = load i32* %arrayidx6, align 4 + %add7 = add nsw i32 %sub5, %tmp4 + %sub8 = sub nsw i64 0, %p6 + %arrayidx9 = getelementptr inbounds i32* %B, i64 %sub8 + %tmp5 = load i32* %arrayidx9, align 4 + %sub10 = sub nsw i32 %add7, %tmp5 + %arrayidx11 = getelementptr inbounds i32* %B, i64 %p7 + %tmp6 = load i32* %arrayidx11, align 4 + %add12 = add nsw i32 %sub10, %tmp6 + %arrayidx13 = getelementptr inbounds i32* %C, i64 %p3 + %tmp7 = load i32* %arrayidx13, align 4 + %sub14 = sub nsw i32 %add12, %tmp7 + %sub15 = sub nsw i64 0, %p4 + %arrayidx16 = getelementptr inbounds i32* %C, i64 %sub15 + %tmp8 = load i32* %arrayidx16, align 4 + %add17 = add nsw i32 %sub14, %tmp8 + %arrayidx18 = getelementptr inbounds i32* %C, i64 %p5 + %tmp9 = load i32* %arrayidx18, align 4 + %sub19 = sub nsw i32 %add17, %tmp9 + %arrayidx20 = getelementptr inbounds i32* %C, i64 %p6 + %tmp10 = load i32* %arrayidx20, align 4 + %add21 = add nsw i32 %sub19, %tmp10 + %sub22 = sub nsw i64 0, %p7 + %arrayidx23 = getelementptr inbounds i32* %C, i64 %sub22 + %tmp11 = load i32* %arrayidx23, align 4 + %sub24 = sub nsw i32 %add21, %tmp11 + %arrayidx25 = getelementptr inbounds i32* %C, i64 %p8 + %tmp12 = load i32* %arrayidx25, align 4 + %add26 = add nsw i32 %sub24, %tmp12 + %arrayidx27 = getelementptr inbounds i32* %C, i64 %p9 + %tmp13 = load i32* %arrayidx27, align 4 + %sub28 = sub nsw i32 %add26, %tmp13 + %sub29 = sub nsw i64 0, %p10 + %arrayidx30 = getelementptr inbounds i32* %C, i64 %sub29 + %tmp14 = load i32* %arrayidx30, align 4 + %add31 = add nsw i32 %sub28, %tmp14 + %arrayidx32 = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 %add31, i32* %arrayidx32, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/run-time-check-many-parameters.ll b/polly/test/ScopInfo/run-time-check-many-parameters.ll new file mode 100644 index 000000000000..65971b411451 --- /dev/null +++ b/polly/test/ScopInfo/run-time-check-many-parameters.ll @@ -0,0 +1,130 @@ +; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -analyze < %s | FileCheck %s +; +; A valid Scop would print the list of it's statements, we check that we do not +; see that list. +; +; CHECK-NOT: Statements +; +; FIXME: Handling this is an open problem, at the moment we just bail out. +; +; void foo(float *A, float *B, +; long p1, +; long p2, +; long p3, +; long p4, +; long p5, +; long p6, +; long p7, +; long p8, +; long p9, +; long p10, +; long p11, +; long p12) { +; for (long i = 0; i < 100; i++) { +; A[i] = +; B[i + p1] + +; B[i + p2] + +; B[i + p3] + +; B[i + p4] + +; B[i + p5] + +; B[i + p6] + +; B[i + p7] + +; B[i + p8] + +; B[i + p9] + +; B[i + p10] + +; B[i + p11] + +; B[i + p12]; +; } +; } +; +; Computing the minimal and maximal element accessed in B is very expensive. +; Expressing the minimal element itself yields a rather complex isl_pw_aff which +; looks as follows: +; { ... +; MemRef_B[(100 + p11)] : p2 <= -1 + p1 and p3 <= -1 + p1 and p4 <= -1 + p1 +; and p5 <= -1 + p1 and p6 <= -1 + p1 and +; p7 <= -1 + p1 and p8 <= -1 + p1 and p9 <= -1 + p1 +; and p10 <= -1 + p1 and p11 >= p1 and +; p12 <= -1 + p11; +; MemRef_B[(100 + p12)] : p2 <= -1 + p1 and p3 <= -1 + p1 and p4 <= -1 + p1 +; and p5 <= -1 + p1 and p6 <= -1 + p1 and +; p7 <= -1 + p1 and p8 <= -1 + p1 and p9 <= -1 + p1 +; and p10 <= -1 + p1 and p11 <= -1 + p1 and p12 >= p1; +; +; and this isl_pw_aff is then 1:1 translated into a isl ast expression. +; +; In the best case, we would create a run-time check such as: +; +; if (B[99 + max(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12)] < A[0] +; || A[99] B[min(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9])) +; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo(float* %A, float* %B, i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8, i64 %p9, i64 %p10, i64 %p11, i64 %p12) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %i.01 = phi i64 [ 0, %entry.split ], [ %tmp25, %for.body ] + %tmp = add i64 %p1, %i.01 + %arrayidx = getelementptr float* %B, i64 %tmp + %tmp2 = add i64 %p2, %i.01 + %arrayidx2 = getelementptr float* %B, i64 %tmp2 + %tmp3 = add i64 %p3, %i.01 + %arrayidx5 = getelementptr float* %B, i64 %tmp3 + %tmp4 = add i64 %p4, %i.01 + %arrayidx8 = getelementptr float* %B, i64 %tmp4 + %tmp5 = add i64 %p5, %i.01 + %arrayidx11 = getelementptr float* %B, i64 %tmp5 + %tmp6 = add i64 %p6, %i.01 + %arrayidx14 = getelementptr float* %B, i64 %tmp6 + %tmp7 = add i64 %p7, %i.01 + %arrayidx17 = getelementptr float* %B, i64 %tmp7 + %tmp8 = add i64 %p8, %i.01 + %arrayidx20 = getelementptr float* %B, i64 %tmp8 + %tmp9 = add i64 %p9, %i.01 + %arrayidx23 = getelementptr float* %B, i64 %tmp9 + %tmp10 = add i64 %p10, %i.01 + %arrayidx26 = getelementptr float* %B, i64 %tmp10 + %tmp11 = add i64 %p11, %i.01 + %arrayidx29 = getelementptr float* %B, i64 %tmp11 + %tmp12 = add i64 %p12, %i.01 + %arrayidx32 = getelementptr float* %B, i64 %tmp12 + %arrayidx34 = getelementptr float* %A, i64 %i.01 + %tmp13 = load float* %arrayidx, align 4 + %tmp14 = load float* %arrayidx2, align 4 + %add3 = fadd float %tmp13, %tmp14 + %tmp15 = load float* %arrayidx5, align 4 + %add6 = fadd float %add3, %tmp15 + %tmp16 = load float* %arrayidx8, align 4 + %add9 = fadd float %add6, %tmp16 + %tmp17 = load float* %arrayidx11, align 4 + %add12 = fadd float %add9, %tmp17 + %tmp18 = load float* %arrayidx14, align 4 + %add15 = fadd float %add12, %tmp18 + %tmp19 = load float* %arrayidx17, align 4 + %add18 = fadd float %add15, %tmp19 + %tmp20 = load float* %arrayidx20, align 4 + %add21 = fadd float %add18, %tmp20 + %tmp21 = load float* %arrayidx23, align 4 + %add24 = fadd float %add21, %tmp21 + %tmp22 = load float* %arrayidx26, align 4 + %add27 = fadd float %add24, %tmp22 + %tmp23 = load float* %arrayidx29, align 4 + %add30 = fadd float %add27, %tmp23 + %tmp24 = load float* %arrayidx32, align 4 + %add33 = fadd float %add30, %tmp24 + store float %add33, float* %arrayidx34, align 4 + %tmp25 = add nsw i64 %i.01, 1 + %exitcond = icmp ne i64 %tmp25, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +}