Support truncate operations

Truncate operations are basically modulo operations, thus we can model them that way. However, for large types we assume the operand to fit in the new type size instead of introducing a modulo with a very large constant. llvm-svn: 269300
2016-05-12 15:13:49 +00:00 · 2016-05-12 15:13:49 +00:00 · 6f1bb7a9d9
parent 404a0f81ea
commit 6f1bb7a9d9
10 changed files with 239 additions and 29 deletions
--- a/polly/lib/Support/SCEVAffinator.cpp
+++ b/polly/lib/Support/SCEVAffinator.cpp
@ -39,11 +39,19 @@ static int const MaxDisjunctionsInPwAff = 100;
 // The maximal number of bits for which a zero-extend is modeled precisely.
 static unsigned const MaxZextSmallBitWidth = 7;

+// The maximal number of bits for which a truncate is modeled precisely.
+static unsigned const MaxTruncateSmallBitWidth = 31;
+
 /// @brief Return true if a zero-extend from @p Width bits is precisely modeled.
 static bool isPreciseZeroExtend(unsigned Width) {
  return Width <= MaxZextSmallBitWidth;
 }

+/// @brief Return true if a truncate from @p Width bits is precisely modeled.
+static bool isPreciseTruncate(unsigned Width) {
+  return Width <= MaxTruncateSmallBitWidth;
+}
+
 /// @brief Add the number of basic sets in @p Domain to @p User
 static isl_stat addNumBasicSets(isl_set *Domain, isl_aff *Aff, void *User) {
  auto *NumBasicSets = static_cast<unsigned *>(User);
@ -291,7 +299,33 @@ __isl_give PWACtx SCEVAffinator::visitConstant(const SCEVConstant *Expr) {

 __isl_give PWACtx
 SCEVAffinator::visitTruncateExpr(const SCEVTruncateExpr *Expr) {
-  llvm_unreachable("SCEVTruncateExpr not yet supported");
+  // Truncate operations are basically modulo operations, thus we can
+  // model them that way. However, for large types we assume the operand
+  // to fit in the new type size instead of introducing a modulo with a very
+  // large constant.
+
+  auto *Op = Expr->getOperand();
+  auto OpPWAC = visit(Op);
+
+  unsigned Width = TD.getTypeSizeInBits(Expr->getType());
+  bool Precise = isPreciseTruncate(Width);
+
+  if (Precise) {
+    OpPWAC.first = addModuloSemantic(OpPWAC.first, Expr->getType());
+    return OpPWAC;
+  }
+
+  auto *Dom = isl_pw_aff_domain(isl_pw_aff_copy(OpPWAC.first));
+  auto *ExpPWA = getWidthExpValOnDomain(Width - 1, Dom);
+  auto *GreaterDom =
+      isl_pw_aff_ge_set(isl_pw_aff_copy(OpPWAC.first), isl_pw_aff_copy(ExpPWA));
+  auto *SmallerDom =
+      isl_pw_aff_lt_set(isl_pw_aff_copy(OpPWAC.first), isl_pw_aff_neg(ExpPWA));
+  auto *OutOfBoundsDom = isl_set_union(SmallerDom, GreaterDom);
+  OpPWAC.second = isl_set_union(OpPWAC.second, isl_set_copy(OutOfBoundsDom));
+  S->recordAssumption(UNSIGNED, OutOfBoundsDom, DebugLoc(), AS_RESTRICTION, BB);
+
+  return OpPWAC;
 }

 __isl_give PWACtx
@ -352,8 +386,7 @@ SCEVAffinator::visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {

  auto OpPWAC = visit(Op);
  if (OpCanWrap)
-    OpPWAC.first =
-        addModuloSemantic(OpPWAC.first, Expr->getOperand()->getType());
+    OpPWAC.first = addModuloSemantic(OpPWAC.first, Op->getType());

  // If the width is to big we assume the negative part does not occur.
  if (!Precise) {
--- a/polly/lib/Support/SCEVValidator.cpp
+++ b/polly/lib/Support/SCEVValidator.cpp
@ -136,23 +136,7 @@ public:
  }

  class ValidatorResult visitTruncateExpr(const SCEVTruncateExpr *Expr) {
-    ValidatorResult Op = visit(Expr->getOperand());
-
-    switch (Op.getType()) {
-    case SCEVType::INT:
-    case SCEVType::PARAM:
-      // We currently do not represent a truncate expression as an affine
-      // expression. If it is constant during Scop execution, we treat it as a
-      // parameter.
-      return ValidatorResult(SCEVType::PARAM, Expr);
-    case SCEVType::IV:
-      DEBUG(dbgs() << "INVALID: Truncation of SCEVType::IV expression");
-      return ValidatorResult(SCEVType::INVALID);
-    case SCEVType::INVALID:
-      return Op;
-    }
-
-    llvm_unreachable("Unknown SCEVType");
+    return visit(Expr->getOperand());
  }

  class ValidatorResult visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
--- a/polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll
+++ b/polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll
@ -7,8 +7,6 @@
 ;
 ; CHECK:    polly.split_new_and_old:
 ; CHECK-NOT:  = sdiv i64 0, -4
-; CHECK:      %div43polly = sdiv i64 %param, 2
-; CHECK:      %div44polly = sdiv i64 %div43polly, 2
 ;
 target triple = "x86_64-unknown-linux-gnu"

--- a/polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll
+++ b/polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll
@ -1,7 +1,7 @@
 ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
 ; RUN: opt %loadPolly -polly-codegen -analyze < %s
 ;
-; CHECK: Execution Context: [p_0] -> {  :  }
+; CHECK: Execution Context: [p_0_loaded_from_currpc] -> {  :  }
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

--- a/polly/test/ScopInfo/multiple-types-non-affine-2.ll
+++ b/polly/test/ScopInfo/multiple-types-non-affine-2.ll
@ -24,15 +24,15 @@
 ; CHECK-NEXT: Schedule :=
 ; CHECK-NEXT:     { Stmt_bb2[i0] -> [i0] };
 ; CHECK-NEXT: ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Short[o0] : 0 <= o0 <= 16 };
+; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Short[o0] : 32*floor((8 + i0)/16) = o0 + 16*floor((i0)/8) and -14 + 2i0 - o0 <= 16*floor((i0)/8) <= 16 + 2i0 - o0 }
 ; CHECK-NEXT: MustWriteAccess :=	[Reduction Type: NONE] [Scalar: 0]
 ; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Short[o0] : 2i0 <= o0 <= 1 + 2i0 };
 ; CHECK-NEXT: ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Char[o0] : 0 <= o0 <= 32 };
+; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Char[o0] : 64*floor((8 + i0)/16) = o0 + 32*floor((i0)/8) and -28 + 4i0 - o0 <= 32*floor((i0)/8) <= 32 + 4i0 - o0 }
 ; CHECK-NEXT: MustWriteAccess :=	[Reduction Type: NONE] [Scalar: 0]
 ; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Char[o0] : 4i0 <= o0 <= 3 + 4i0 };
 ; CHECK-NEXT: ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Double[o0] : 0 <= o0 <= 9 };
+; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Double[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
 ; CHECK-NEXT: MustWriteAccess :=	[Reduction Type: NONE] [Scalar: 0]
 ; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Double[i0] };
 ; CHECK-NEXT: }
--- a/polly/test/ScopInfo/multiple-types-non-affine.ll
+++ b/polly/test/ScopInfo/multiple-types-non-affine.ll
@ -24,15 +24,15 @@
 ; CHECK-NEXT: Schedule :=
 ; CHECK-NEXT:     { Stmt_bb2[i0] -> [i0] };
 ; CHECK-NEXT: ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Short[o0] : 0 <= o0 <= 9 };
+; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Short[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
 ; CHECK-NEXT: MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
 ; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Short[i0] };
 ; CHECK-NEXT: ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Float[o0] : 0 <= o0 <= 9 };
+; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Float[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
 ; CHECK-NEXT: MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
 ; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Float[i0] };
 ; CHECK-NEXT: ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Double[o0] : 0 <= o0 <= 15 };
+; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Double[o0] : -7 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
 ; CHECK-NEXT: MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
 ; CHECK-NEXT:     { Stmt_bb2[i0] -> MemRef_Double[i0] };
 ; CHECK-NEXT: }
--- a/polly/test/ScopInfo/truncate-1.ll
+++ b/polly/test/ScopInfo/truncate-1.ll
@ -0,0 +1,45 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(char *A, short N) {
+;      for (char i = 0; i < (char)N; i++)
+;        A[i]++;
+;    }
+;
+; CHECK:       Assumed Context:
+; CHECK-NEXT:  [N] -> {  :  }
+; CHECK-NEXT:  Invalid Context:
+; CHECK-NEXT:  [N] -> {  : 1 = 0 }
+;
+; CHECK:       Domain :=
+; CHECK-NEXT:    [N] -> { Stmt_for_body[i0] : i0 >= 0 and 256*floor((128 + N)/256) < N - i0 };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i8* %A, i16 signext %N) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i8 [ 0, %entry ], [ %inc4, %for.inc ]
+  %conv = sext i8 %i.0 to i32
+  %conv1 = zext i16 %N to i32
+  %sext = shl i32 %conv1, 24
+  %conv2 = ashr exact i32 %sext, 24
+  %cmp = icmp slt i32 %conv, %conv2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i8 %i.0 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %idxprom
+  %tmp = load i8, i8* %arrayidx, align 1
+  %inc = add i8 %tmp, 1
+  store i8 %inc, i8* %arrayidx, align 1
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc4 = add nsw i8 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/truncate-2.ll
+++ b/polly/test/ScopInfo/truncate-2.ll
@ -0,0 +1,43 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(char *A, short N) {
+;      for (short i = 0; i < N; i++)
+;        A[(char)(N)]++;
+;    }
+;
+; CHECK:       Assumed Context:
+; CHECK-NEXT:  [N] -> {  :  }
+; CHECK-NEXT:  Invalid Context:
+; CHECK-NEXT:  [N] -> {  : 1 = 0 }
+;
+; CHECK:         ReadAccess :=	[Reduction Type: +] [Scalar: 0]
+; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 };
+; CHECK-NEXT:    MustWriteAccess :=	[Reduction Type: +] [Scalar: 0]
+; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i8* %A, i16 signext %N) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i16 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %cmp = icmp slt i16 %indvars.iv, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = trunc i16 %N to i8
+  %arrayidx = getelementptr inbounds i8, i8* %A, i8 %idxprom
+  %tmp1 = load i8, i8* %arrayidx, align 1
+  %inc = add i8 %tmp1, 1
+  store i8 %inc, i8* %arrayidx, align 1
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i16 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/zero_ext_of_truncate.ll
+++ b/polly/test/ScopInfo/zero_ext_of_truncate.ll
@ -0,0 +1,53 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(unsigned *restrict I, unsigned *restrict A, unsigned N, unsigned M) {
+;      for (unsigned i = 0; i < N; i++) {
+;        unsigned char V = *I;
+;        if (V < M)
+;          A[i]++;
+;      }
+;    }
+;
+; CHECK:         Assumed Context:
+; CHECK-NEXT:    [N, tmp, M] -> { : }
+; CHECK-NEXT:    Invalid Context:
+; CHECK-NEXT:    [N, tmp, M] -> { : N < 0 or (N > 0 and M < 0) or (N > 0 and 256*floor((128 + tmp)/256) > tmp) }
+;
+; CHECK:         Domain :=
+; CHECK-NEXT:    [N, tmp, M] -> { Stmt_if_then[i0] : 0 <= i0 < N and 256*floor((128 + tmp)/256) > tmp - M };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias %I, i32* noalias %A, i32 %N, i32 %M) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %lftr.wideiv = trunc i64 %indvars.iv to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp = load i32, i32* %I, align 4
+  %conv1 = and i32 %tmp, 255
+  %cmp2 = icmp ult i32 %conv1, %M
+  br i1 %cmp2, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp1 = load i32, i32* %arrayidx, align 4
+  %inc = add i32 %tmp1, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/zero_ext_of_truncate_2.ll
+++ b/polly/test/ScopInfo/zero_ext_of_truncate_2.ll
@ -0,0 +1,54 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(unsigned long *restrict I, unsigned *restrict A, unsigned N) {
+;      for (unsigned i = 0; i < N; i++) {
+;        unsigned V = *I;
+;        if (V < i)
+;          A[i]++;
+;      }
+;    }
+;
+; CHECK:         Assumed Context:
+; CHECK-NEXT:    [N, tmp] -> { : }
+; CHECK-NEXT:    Invalid Context:
+; CHECK-NEXT:    [N, tmp] -> { : N > 0 and (tmp < 0 or tmp >= 2147483648) }
+;
+; CHECK:         Domain :=
+; CHECK-NEXT:    [N, tmp] -> { Stmt_if_then[i0] : i0 > tmp and 0 <= i0 < N };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i64* noalias %I, i32* noalias %A, i32 %N, i32 %M) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %lftr.wideiv = trunc i64 %indvars.iv to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp = load i64, i64* %I, align 8
+  %conv = trunc i64 %tmp to i32
+  %tmp1 = zext i32 %conv to i64
+  %cmp1 = icmp ult i64 %tmp1, %indvars.iv
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  %inc = add i32 %tmp2, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}