[Polly] Generate more 'canonical' induction variable

Today Polly generates induction variable in this way: polly.indvar = phi 0, polly.indvar.next ... polly.indvar.next = polly.indvar + stide polly.loop_cond = predicate polly.indvar, (UB - stride) Instead of: polly.indvar = phi 0, polly.indvar.next ... polly.indvar.next = polly.indvar + stide polly.loop_cond = predicate polly.indvar.next, UB The way Polly generate induction variable cause some problem in the indvar simplify pass. This patch make polly generate the later form, by assuming the induction variable never overflow Differential Revision: https://reviews.llvm.org/D33089 llvm-svn: 302866
2017-05-12 02:17:15 +00:00 · 2017-05-12 02:17:15 +00:00 · 4fe342cb75
parent 581072e1a6
commit 4fe342cb75
10 changed files with 30 additions and 30 deletions
--- a/polly/lib/CodeGen/LoopGenerators.cpp
+++ b/polly/lib/CodeGen/LoopGenerators.cpp
@ -17,11 +17,13 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"

 using namespace llvm;
 using namespace polly;
+using namespace PatternMatch;

 static cl::opt<int>
    PollyNumThreads("polly-num-threads",
@ -49,6 +51,9 @@ static cl::opt<int>
 // contains the loop iv 'polly.indvar', the incremented loop iv
 // 'polly.indvar_next' as well as the condition to check if we execute another
 // iteration of the loop. After the loop has finished, we branch to ExitBB.
+// We expect the type of UB, LB, UB+Stride to be large enough for values that
+// UB may take throughout the execution of the loop, including the computation
+// of indvar + Stride before the final abort.
 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
                         PollyIRBuilder &Builder, LoopInfo &LI,
                         DominatorTree &DT, BasicBlock *&ExitBB,
@ -123,10 +128,8 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
  IV->addIncoming(LB, PreHeaderBB);
  Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
  Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
-  Value *LoopCondition;
-  UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub");
-  LoopCondition = Builder.CreateICmp(Predicate, IV, UB);
-  LoopCondition->setName("polly.loop_cond");
+  Value *LoopCondition =
+      Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");

  // Create the loop latch and annotate it as such.
  BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
--- a/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
+++ b/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
@ -2,10 +2,10 @@
 ;
 ; Check that we mark multiple parallel loops correctly including the memory instructions.
 ;
-; CHECK-DAG:  %polly.loop_cond[[COuter:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 1022
+; CHECK-DAG:  %polly.loop_cond[[COuter:[0-9]*]] = icmp sle i64 %polly.indvar_next{{[0-9]*}}, 1023
 ; CHECK-DAG:  br i1 %polly.loop_cond[[COuter]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDOuter:[0-9]*]]
 ;
-; CHECK-DAG:  %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 510
+; CHECK-DAG:  %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar_next{{[0-9]*}}, 511
 ; CHECK-DAG:  br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDInner:[0-9]*]]
 ;
 ; CHECK-DAG: store i32 %{{[a-z_0-9]*}}, i32* %{{[a-z_0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access !4
--- a/polly/test/Isl/CodeGen/MemAccess/generate-all.ll
+++ b/polly/test/Isl/CodeGen/MemAccess/generate-all.ll
@ -15,7 +15,7 @@
 ; SCEV-NEXT:   %p_tmp5 = fadd float %tmp4_p_scalar_, 1.000000e+01
 ; SCEV-NEXT:   store float %p_tmp5, float* %p_tmp3, align 4, !alias.scope !0, !noalias !2
 ; SCEV-NEXT:   %polly.indvar_next = add nsw i64 %polly.indvar, 1
-; SCEV-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar, 98
+; SCEV-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99
 ; SCEV-NEXT:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit

 ; ASTEXPR: 	polly.stmt.bb2:                                   ; preds = %polly.loop_header
@ -27,7 +27,7 @@
 ; ASTEXPR-NEXT:   %polly.access.A2 = getelementptr float, float* %A, i64 %pexp.pdiv_r1
 ; ASTEXPR-NEXT:   store float %p_tmp5, float* %polly.access.A2, align 4, !alias.scope !0, !noalias !2
 ; ASTEXPR-NEXT:   %polly.indvar_next = add nsw i64 %polly.indvar, 1
-; ASTEXPR-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar, 98
+; ASTEXPR-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99
 ; ASTEXPR-NEXT:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit

 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
--- a/polly/test/Isl/CodeGen/MemAccess/update_access_functions.ll
+++ b/polly/test/Isl/CodeGen/MemAccess/update_access_functions.ll
@ -3,21 +3,21 @@
 ; RUN:                 < %s -S | FileCheck %s

 ; CHECK-LABEL: polly.stmt.loop1:
-; CHECK-NEXT:   %3 = mul nsw i64 5, %polly.indvar
+; CHECK-NEXT:   %3 = mul nsw i64 5, %polly.indvar{{[0-9]*}}
 ; CHECK-NEXT:   %4 = sub nsw i64 %3, 10
 ; CHECK-NEXT:   %polly.access.A = getelementptr double, double* %A, i64 %4
 ; CHECK-NEXT:   store double 4.200000e+01, double* %polly.access.A, align 8

 ; CHECK-LABEL: polly.stmt.loop2:
-; CHECK-NEXT:   %polly.access.A10 = getelementptr double, double* %A, i64 42
-; CHECK-NEXT:   %val_p_scalar_ = load double, double* %polly.access.A10, align 8
+; CHECK-NEXT:   %polly.access.A[[Num0:[0-9]*]] = getelementptr double, double* %A, i64 42
+; CHECK-NEXT:   %val_p_scalar_ = load double, double* %polly.access.A[[Num0]], align 8

 ; CHECK-LABEL: polly.stmt.loop3:
 ; CHECK-NEXT:   %val.s2a.reload = load double, double* %val.s2a
-; CHECK-NEXT:   [[REG0:%.*]] = mul nsw i64 13, %polly.indvar16
+; CHECK-NEXT:   [[REG0:%.*]] = mul nsw i64 13, %polly.indvar{{[0-9]*}}
 ; CHECK-NEXT:   [[REG1:%.*]] = add nsw i64 [[REG0]], 5
-; CHECK-NEXT:   %polly.access.A20 = getelementptr double, double* %A, i64 [[REG1]]
-; CHECK-NEXT:   store double %val.s2a.reload, double* %polly.access.A20, align 8,
+; CHECK-NEXT:   %polly.access.A[[Num1:[0-9]*]] = getelementptr double, double* %A, i64 [[REG1]]
+; CHECK-NEXT:   store double %val.s2a.reload, double* %polly.access.A[[Num1]], align 8,

 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

--- a/polly/test/Isl/CodeGen/OpenMP/single_loop.ll
+++ b/polly/test/Isl/CodeGen/OpenMP/single_loop.ll
@ -70,8 +70,7 @@
 ; IR-NEXT:   %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar
 ; IR-NEXT:   store float 1.000000e+00, float* %[[gep]]
 ; IR-NEXT:   %polly.indvar_next = add nsw i64 %polly.indvar, 1
-; IR-NEXT:   %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 1
-; IR-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar, %polly.adjust_ub
+; IR-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.par.UBAdjusted
 ; IR-NEXT:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit

 ; IR-LABEL: polly.loop_preheader:
--- a/polly/test/Isl/CodeGen/non_affine_float_compare.ll
+++ b/polly/test/Isl/CodeGen/non_affine_float_compare.ll
@ -34,7 +34,7 @@
 ; CHECK:   %p_tmp11b = fadd float %tmp10b_p_scalar_, 1.000000e+00
 ; CHECK:   store float %p_tmp11b, float* %scevgep[[R4]], align 4, !alias.scope !0, !noalias !2
 ; CHECK:   %polly.indvar_next = add nsw i64 %polly.indvar, 1
-; CHECK:   %polly.loop_cond = icmp sle i64 %polly.indvar, 1022
+; CHECK:   %polly.loop_cond = icmp sle i64 %polly.indvar_next, 1023
 ; CHECK:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit

 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
--- a/polly/test/Isl/CodeGen/phi_scalar_simple_2.ll
+++ b/polly/test/Isl/CodeGen/phi_scalar_simple_2.ll
@ -31,7 +31,7 @@ entry:
 ; CHECK-NEXT:    store i32 %x, i32* %x.addr.0.phiops
 ; CHECK-NEXT:    sext

-; CHECK-LABEL: polly.merge21:
+; CHECK-LABEL: polly.merge{{[a-z_0-9]*}}:
 ; CHECK:         %x.addr.0.final_reload = load i32, i32* %x.addr.0.s2a

 for.cond:                                         ; preds = %for.inc5, %entry
--- a/polly/test/Isl/single_loop_param_less_equal.ll
+++ b/polly/test/Isl/single_loop_param_less_equal.ll
@ -51,8 +51,7 @@ ret:
 ; CODEGEN:   [[PTR:%[a-zA-Z0-9_\.]+]] = getelementptr [1024 x i32], [1024 x i32]* @A, i64 0, i64 %polly.indvar
 ; CODEGEN:   store i32 1, i32* [[PTR]]
 ; CODEGEN:   %polly.indvar_next = add nsw i64 %polly.indvar, 1
-; CODEGEN:   %polly.adjust_ub = sub i64 %n, 1
-; CODEGEN:   %polly.loop_cond = icmp sle i64 %polly.indvar, %polly.adjust_ub
+; CODEGEN:   %polly.loop_cond = icmp sle i64 %polly.indvar_next, %n
 ; CODEGEN:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit

 ; CODEGEN: polly.loop_preheader:
--- a/polly/test/Isl/single_loop_param_less_than.ll
+++ b/polly/test/Isl/single_loop_param_less_than.ll
@ -49,8 +49,7 @@ ret:
 ; CODEGEN:   [[PTR:%[a-zA-Z0-9_\.]+]] =  getelementptr [1024 x i32], [1024 x i32]* @A, i64 0, i64 %polly.indvar
 ; CODEGEN:   store i32 1, i32* [[PTR]]
 ; CODEGEN:   %polly.indvar_next = add nsw i64 %polly.indvar, 1
-; CODEGEN:   %polly.adjust_ub = sub i64 %n, 1
-; CODEGEN:   %polly.loop_cond = icmp slt i64 %polly.indvar, %polly.adjust_ub
+; CODEGEN:   %polly.loop_cond = icmp slt i64 %polly.indvar_next, %n
 ; CODEGEN:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit

 ; CODEGEN: polly.loop_preheader:
--- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll
+++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll
@ -12,14 +12,14 @@
 ; This test case checks whether Polly generates second level alias metadata
 ; to distinguish the specific accesses in case of the ublas gemm kernel.
 ;
-; CHECK: %tmp22_p_scalar_ = load double, double* %scevgep168, align 8, !alias.scope !10, !noalias !2
-; CHECK: store double %p_tmp23, double* %scevgep168, align 8, !alias.scope !10, !noalias !2
-; CHECK: %tmp22_p_scalar_188 = load double, double* %scevgep187, align 8, !alias.scope !11, !noalias !12
-; CHECK: store double %p_tmp23189, double* %scevgep187, align 8, !alias.scope !11, !noalias !12
-; CHECK: %tmp22_p_scalar_209 = load double, double* %scevgep208, align 8, !alias.scope !13, !noalias !14
-; CHECK: store double %p_tmp23210, double* %scevgep208, align 8, !alias.scope !13, !noalias !14
-; CHECK: %tmp22_p_scalar_230 = load double, double* %scevgep229, align 8, !alias.scope !15, !noalias !16
-; CHECK: store double %p_tmp23231, double* %scevgep229, align 8, !alias.scope !15, !noalias !16
+; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N0:[a-z_0-9]*]], align 8, !alias.scope !10, !noalias !2
+; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N0]], align 8, !alias.scope !10, !noalias !2
+; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N1:[a-z_0-9]*]], align 8, !alias.scope !11, !noalias !12
+; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N1]], align 8, !alias.scope !11, !noalias !12
+; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N2:[a-z_0-9]*]], align 8, !alias.scope !13, !noalias !14
+; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N2]], align 8, !alias.scope !13, !noalias !14
+; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N3:[a-z_0-9]*]], align 8, !alias.scope !15, !noalias !16
+; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N3]], align 8, !alias.scope !15, !noalias !16
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"