[Polly] Generate more 'canonical' induction variable

Today Polly generates induction variable in this way:

polly.indvar = phi 0, polly.indvar.next
...
polly.indvar.next = polly.indvar + stide
polly.loop_cond = predicate polly.indvar, (UB - stride)

Instead of:

polly.indvar = phi 0, polly.indvar.next
...
polly.indvar.next = polly.indvar + stide
polly.loop_cond = predicate polly.indvar.next, UB

The way Polly generate induction variable cause some problem in the indvar simplify pass.
This patch make polly generate the later form, by assuming the induction variable never overflow

Differential Revision: https://reviews.llvm.org/D33089

llvm-svn: 302866
This commit is contained in:
Hongbin Zheng 2017-05-12 02:17:15 +00:00
parent 581072e1a6
commit 4fe342cb75
10 changed files with 30 additions and 30 deletions

View File

@ -17,11 +17,13 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
using namespace polly;
using namespace PatternMatch;
static cl::opt<int>
PollyNumThreads("polly-num-threads",
@ -49,6 +51,9 @@ static cl::opt<int>
// contains the loop iv 'polly.indvar', the incremented loop iv
// 'polly.indvar_next' as well as the condition to check if we execute another
// iteration of the loop. After the loop has finished, we branch to ExitBB.
// We expect the type of UB, LB, UB+Stride to be large enough for values that
// UB may take throughout the execution of the loop, including the computation
// of indvar + Stride before the final abort.
Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
PollyIRBuilder &Builder, LoopInfo &LI,
DominatorTree &DT, BasicBlock *&ExitBB,
@ -123,10 +128,8 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
IV->addIncoming(LB, PreHeaderBB);
Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
Value *LoopCondition;
UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub");
LoopCondition = Builder.CreateICmp(Predicate, IV, UB);
LoopCondition->setName("polly.loop_cond");
Value *LoopCondition =
Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
// Create the loop latch and annotate it as such.
BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);

View File

@ -2,10 +2,10 @@
;
; Check that we mark multiple parallel loops correctly including the memory instructions.
;
; CHECK-DAG: %polly.loop_cond[[COuter:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 1022
; CHECK-DAG: %polly.loop_cond[[COuter:[0-9]*]] = icmp sle i64 %polly.indvar_next{{[0-9]*}}, 1023
; CHECK-DAG: br i1 %polly.loop_cond[[COuter]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDOuter:[0-9]*]]
;
; CHECK-DAG: %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 510
; CHECK-DAG: %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar_next{{[0-9]*}}, 511
; CHECK-DAG: br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDInner:[0-9]*]]
;
; CHECK-DAG: store i32 %{{[a-z_0-9]*}}, i32* %{{[a-z_0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access !4

View File

@ -15,7 +15,7 @@
; SCEV-NEXT: %p_tmp5 = fadd float %tmp4_p_scalar_, 1.000000e+01
; SCEV-NEXT: store float %p_tmp5, float* %p_tmp3, align 4, !alias.scope !0, !noalias !2
; SCEV-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1
; SCEV-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98
; SCEV-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99
; SCEV-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
; ASTEXPR: polly.stmt.bb2: ; preds = %polly.loop_header
@ -27,7 +27,7 @@
; ASTEXPR-NEXT: %polly.access.A2 = getelementptr float, float* %A, i64 %pexp.pdiv_r1
; ASTEXPR-NEXT: store float %p_tmp5, float* %polly.access.A2, align 4, !alias.scope !0, !noalias !2
; ASTEXPR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1
; ASTEXPR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98
; ASTEXPR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99
; ASTEXPR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -3,21 +3,21 @@
; RUN: < %s -S | FileCheck %s
; CHECK-LABEL: polly.stmt.loop1:
; CHECK-NEXT: %3 = mul nsw i64 5, %polly.indvar
; CHECK-NEXT: %3 = mul nsw i64 5, %polly.indvar{{[0-9]*}}
; CHECK-NEXT: %4 = sub nsw i64 %3, 10
; CHECK-NEXT: %polly.access.A = getelementptr double, double* %A, i64 %4
; CHECK-NEXT: store double 4.200000e+01, double* %polly.access.A, align 8
; CHECK-LABEL: polly.stmt.loop2:
; CHECK-NEXT: %polly.access.A10 = getelementptr double, double* %A, i64 42
; CHECK-NEXT: %val_p_scalar_ = load double, double* %polly.access.A10, align 8
; CHECK-NEXT: %polly.access.A[[Num0:[0-9]*]] = getelementptr double, double* %A, i64 42
; CHECK-NEXT: %val_p_scalar_ = load double, double* %polly.access.A[[Num0]], align 8
; CHECK-LABEL: polly.stmt.loop3:
; CHECK-NEXT: %val.s2a.reload = load double, double* %val.s2a
; CHECK-NEXT: [[REG0:%.*]] = mul nsw i64 13, %polly.indvar16
; CHECK-NEXT: [[REG0:%.*]] = mul nsw i64 13, %polly.indvar{{[0-9]*}}
; CHECK-NEXT: [[REG1:%.*]] = add nsw i64 [[REG0]], 5
; CHECK-NEXT: %polly.access.A20 = getelementptr double, double* %A, i64 [[REG1]]
; CHECK-NEXT: store double %val.s2a.reload, double* %polly.access.A20, align 8,
; CHECK-NEXT: %polly.access.A[[Num1:[0-9]*]] = getelementptr double, double* %A, i64 [[REG1]]
; CHECK-NEXT: store double %val.s2a.reload, double* %polly.access.A[[Num1]], align 8,
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -70,8 +70,7 @@
; IR-NEXT: %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar
; IR-NEXT: store float 1.000000e+00, float* %[[gep]]
; IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1
; IR-NEXT: %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 1
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, %polly.adjust_ub
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.par.UBAdjusted
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
; IR-LABEL: polly.loop_preheader:

View File

@ -34,7 +34,7 @@
; CHECK: %p_tmp11b = fadd float %tmp10b_p_scalar_, 1.000000e+00
; CHECK: store float %p_tmp11b, float* %scevgep[[R4]], align 4, !alias.scope !0, !noalias !2
; CHECK: %polly.indvar_next = add nsw i64 %polly.indvar, 1
; CHECK: %polly.loop_cond = icmp sle i64 %polly.indvar, 1022
; CHECK: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 1023
; CHECK: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -31,7 +31,7 @@ entry:
; CHECK-NEXT: store i32 %x, i32* %x.addr.0.phiops
; CHECK-NEXT: sext
; CHECK-LABEL: polly.merge21:
; CHECK-LABEL: polly.merge{{[a-z_0-9]*}}:
; CHECK: %x.addr.0.final_reload = load i32, i32* %x.addr.0.s2a
for.cond: ; preds = %for.inc5, %entry

View File

@ -51,8 +51,7 @@ ret:
; CODEGEN: [[PTR:%[a-zA-Z0-9_\.]+]] = getelementptr [1024 x i32], [1024 x i32]* @A, i64 0, i64 %polly.indvar
; CODEGEN: store i32 1, i32* [[PTR]]
; CODEGEN: %polly.indvar_next = add nsw i64 %polly.indvar, 1
; CODEGEN: %polly.adjust_ub = sub i64 %n, 1
; CODEGEN: %polly.loop_cond = icmp sle i64 %polly.indvar, %polly.adjust_ub
; CODEGEN: %polly.loop_cond = icmp sle i64 %polly.indvar_next, %n
; CODEGEN: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
; CODEGEN: polly.loop_preheader:

View File

@ -49,8 +49,7 @@ ret:
; CODEGEN: [[PTR:%[a-zA-Z0-9_\.]+]] = getelementptr [1024 x i32], [1024 x i32]* @A, i64 0, i64 %polly.indvar
; CODEGEN: store i32 1, i32* [[PTR]]
; CODEGEN: %polly.indvar_next = add nsw i64 %polly.indvar, 1
; CODEGEN: %polly.adjust_ub = sub i64 %n, 1
; CODEGEN: %polly.loop_cond = icmp slt i64 %polly.indvar, %polly.adjust_ub
; CODEGEN: %polly.loop_cond = icmp slt i64 %polly.indvar_next, %n
; CODEGEN: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
; CODEGEN: polly.loop_preheader:

View File

@ -12,14 +12,14 @@
; This test case checks whether Polly generates second level alias metadata
; to distinguish the specific accesses in case of the ublas gemm kernel.
;
; CHECK: %tmp22_p_scalar_ = load double, double* %scevgep168, align 8, !alias.scope !10, !noalias !2
; CHECK: store double %p_tmp23, double* %scevgep168, align 8, !alias.scope !10, !noalias !2
; CHECK: %tmp22_p_scalar_188 = load double, double* %scevgep187, align 8, !alias.scope !11, !noalias !12
; CHECK: store double %p_tmp23189, double* %scevgep187, align 8, !alias.scope !11, !noalias !12
; CHECK: %tmp22_p_scalar_209 = load double, double* %scevgep208, align 8, !alias.scope !13, !noalias !14
; CHECK: store double %p_tmp23210, double* %scevgep208, align 8, !alias.scope !13, !noalias !14
; CHECK: %tmp22_p_scalar_230 = load double, double* %scevgep229, align 8, !alias.scope !15, !noalias !16
; CHECK: store double %p_tmp23231, double* %scevgep229, align 8, !alias.scope !15, !noalias !16
; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N0:[a-z_0-9]*]], align 8, !alias.scope !10, !noalias !2
; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N0]], align 8, !alias.scope !10, !noalias !2
; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N1:[a-z_0-9]*]], align 8, !alias.scope !11, !noalias !12
; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N1]], align 8, !alias.scope !11, !noalias !12
; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N2:[a-z_0-9]*]], align 8, !alias.scope !13, !noalias !14
; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N2]], align 8, !alias.scope !13, !noalias !14
; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N3:[a-z_0-9]*]], align 8, !alias.scope !15, !noalias !16
; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N3]], align 8, !alias.scope !15, !noalias !16
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"