Support truncate operations

Truncate operations are basically modulo operations, thus we can model
  them that way. However, for large types we assume the operand to fit
  in the new type size instead of introducing a modulo with a very large
  constant.

llvm-svn: 269300
This commit is contained in:
Johannes Doerfert 2016-05-12 15:13:49 +00:00
parent 404a0f81ea
commit 6f1bb7a9d9
10 changed files with 239 additions and 29 deletions

View File

@ -39,11 +39,19 @@ static int const MaxDisjunctionsInPwAff = 100;
// The maximal number of bits for which a zero-extend is modeled precisely.
static unsigned const MaxZextSmallBitWidth = 7;
// The maximal number of bits for which a truncate is modeled precisely.
static unsigned const MaxTruncateSmallBitWidth = 31;
/// @brief Return true if a zero-extend from @p Width bits is precisely modeled.
static bool isPreciseZeroExtend(unsigned Width) {
return Width <= MaxZextSmallBitWidth;
}
/// @brief Return true if a truncate from @p Width bits is precisely modeled.
static bool isPreciseTruncate(unsigned Width) {
return Width <= MaxTruncateSmallBitWidth;
}
/// @brief Add the number of basic sets in @p Domain to @p User
static isl_stat addNumBasicSets(isl_set *Domain, isl_aff *Aff, void *User) {
auto *NumBasicSets = static_cast<unsigned *>(User);
@ -291,7 +299,33 @@ __isl_give PWACtx SCEVAffinator::visitConstant(const SCEVConstant *Expr) {
__isl_give PWACtx
SCEVAffinator::visitTruncateExpr(const SCEVTruncateExpr *Expr) {
llvm_unreachable("SCEVTruncateExpr not yet supported");
// Truncate operations are basically modulo operations, thus we can
// model them that way. However, for large types we assume the operand
// to fit in the new type size instead of introducing a modulo with a very
// large constant.
auto *Op = Expr->getOperand();
auto OpPWAC = visit(Op);
unsigned Width = TD.getTypeSizeInBits(Expr->getType());
bool Precise = isPreciseTruncate(Width);
if (Precise) {
OpPWAC.first = addModuloSemantic(OpPWAC.first, Expr->getType());
return OpPWAC;
}
auto *Dom = isl_pw_aff_domain(isl_pw_aff_copy(OpPWAC.first));
auto *ExpPWA = getWidthExpValOnDomain(Width - 1, Dom);
auto *GreaterDom =
isl_pw_aff_ge_set(isl_pw_aff_copy(OpPWAC.first), isl_pw_aff_copy(ExpPWA));
auto *SmallerDom =
isl_pw_aff_lt_set(isl_pw_aff_copy(OpPWAC.first), isl_pw_aff_neg(ExpPWA));
auto *OutOfBoundsDom = isl_set_union(SmallerDom, GreaterDom);
OpPWAC.second = isl_set_union(OpPWAC.second, isl_set_copy(OutOfBoundsDom));
S->recordAssumption(UNSIGNED, OutOfBoundsDom, DebugLoc(), AS_RESTRICTION, BB);
return OpPWAC;
}
__isl_give PWACtx
@ -352,8 +386,7 @@ SCEVAffinator::visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
auto OpPWAC = visit(Op);
if (OpCanWrap)
OpPWAC.first =
addModuloSemantic(OpPWAC.first, Expr->getOperand()->getType());
OpPWAC.first = addModuloSemantic(OpPWAC.first, Op->getType());
// If the width is to big we assume the negative part does not occur.
if (!Precise) {

View File

@ -136,23 +136,7 @@ public:
}
class ValidatorResult visitTruncateExpr(const SCEVTruncateExpr *Expr) {
ValidatorResult Op = visit(Expr->getOperand());
switch (Op.getType()) {
case SCEVType::INT:
case SCEVType::PARAM:
// We currently do not represent a truncate expression as an affine
// expression. If it is constant during Scop execution, we treat it as a
// parameter.
return ValidatorResult(SCEVType::PARAM, Expr);
case SCEVType::IV:
DEBUG(dbgs() << "INVALID: Truncation of SCEVType::IV expression");
return ValidatorResult(SCEVType::INVALID);
case SCEVType::INVALID:
return Op;
}
llvm_unreachable("Unknown SCEVType");
return visit(Expr->getOperand());
}
class ValidatorResult visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {

View File

@ -7,8 +7,6 @@
;
; CHECK: polly.split_new_and_old:
; CHECK-NOT: = sdiv i64 0, -4
; CHECK: %div43polly = sdiv i64 %param, 2
; CHECK: %div44polly = sdiv i64 %div43polly, 2
;
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,7 +1,7 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-codegen -analyze < %s
;
; CHECK: Execution Context: [p_0] -> { : }
; CHECK: Execution Context: [p_0_loaded_from_currpc] -> { : }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -24,15 +24,15 @@
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: { Stmt_bb2[i0] -> [i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 0 <= o0 <= 16 };
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 32*floor((8 + i0)/16) = o0 + 16*floor((i0)/8) and -14 + 2i0 - o0 <= 16*floor((i0)/8) <= 16 + 2i0 - o0 }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 2i0 <= o0 <= 1 + 2i0 };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Char[o0] : 0 <= o0 <= 32 };
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Char[o0] : 64*floor((8 + i0)/16) = o0 + 32*floor((i0)/8) and -28 + 4i0 - o0 <= 32*floor((i0)/8) <= 32 + 4i0 - o0 }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Char[o0] : 4i0 <= o0 <= 3 + 4i0 };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : 0 <= o0 <= 9 };
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[i0] };
; CHECK-NEXT: }

View File

@ -24,15 +24,15 @@
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: { Stmt_bb2[i0] -> [i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 0 <= o0 <= 9 };
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Float[o0] : 0 <= o0 <= 9 };
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Float[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Float[i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : 0 <= o0 <= 15 };
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : -7 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[i0] };
; CHECK-NEXT: }

View File

@ -0,0 +1,45 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(char *A, short N) {
; for (char i = 0; i < (char)N; i++)
; A[i]++;
; }
;
; CHECK: Assumed Context:
; CHECK-NEXT: [N] -> { : }
; CHECK-NEXT: Invalid Context:
; CHECK-NEXT: [N] -> { : 1 = 0 }
;
; CHECK: Domain :=
; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : i0 >= 0 and 256*floor((128 + N)/256) < N - i0 };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i8* %A, i16 signext %N) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i8 [ 0, %entry ], [ %inc4, %for.inc ]
%conv = sext i8 %i.0 to i32
%conv1 = zext i16 %N to i32
%sext = shl i32 %conv1, 24
%conv2 = ashr exact i32 %sext, 24
%cmp = icmp slt i32 %conv, %conv2
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%idxprom = sext i8 %i.0 to i64
%arrayidx = getelementptr inbounds i8, i8* %A, i64 %idxprom
%tmp = load i8, i8* %arrayidx, align 1
%inc = add i8 %tmp, 1
store i8 %inc, i8* %arrayidx, align 1
br label %for.inc
for.inc: ; preds = %for.body
%inc4 = add nsw i8 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,43 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(char *A, short N) {
; for (short i = 0; i < N; i++)
; A[(char)(N)]++;
; }
;
; CHECK: Assumed Context:
; CHECK-NEXT: [N] -> { : }
; CHECK-NEXT: Invalid Context:
; CHECK-NEXT: [N] -> { : 1 = 0 }
;
; CHECK: ReadAccess := [Reduction Type: +] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i8* %A, i16 signext %N) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i16 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%cmp = icmp slt i16 %indvars.iv, %N
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%idxprom = trunc i16 %N to i8
%arrayidx = getelementptr inbounds i8, i8* %A, i8 %idxprom
%tmp1 = load i8, i8* %arrayidx, align 1
%inc = add i8 %tmp1, 1
store i8 %inc, i8* %arrayidx, align 1
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i16 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,53 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(unsigned *restrict I, unsigned *restrict A, unsigned N, unsigned M) {
; for (unsigned i = 0; i < N; i++) {
; unsigned char V = *I;
; if (V < M)
; A[i]++;
; }
; }
;
; CHECK: Assumed Context:
; CHECK-NEXT: [N, tmp, M] -> { : }
; CHECK-NEXT: Invalid Context:
; CHECK-NEXT: [N, tmp, M] -> { : N < 0 or (N > 0 and M < 0) or (N > 0 and 256*floor((128 + tmp)/256) > tmp) }
;
; CHECK: Domain :=
; CHECK-NEXT: [N, tmp, M] -> { Stmt_if_then[i0] : 0 <= i0 < N and 256*floor((128 + tmp)/256) > tmp - M };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* noalias %I, i32* noalias %A, i32 %N, i32 %M) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%lftr.wideiv = trunc i64 %indvars.iv to i32
%exitcond = icmp ne i32 %lftr.wideiv, %N
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%tmp = load i32, i32* %I, align 4
%conv1 = and i32 %tmp, 255
%cmp2 = icmp ult i32 %conv1, %M
br i1 %cmp2, label %if.then, label %if.end
if.then: ; preds = %for.body
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp1 = load i32, i32* %arrayidx, align 4
%inc = add i32 %tmp1, 1
store i32 %inc, i32* %arrayidx, align 4
br label %if.end
if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,54 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(unsigned long *restrict I, unsigned *restrict A, unsigned N) {
; for (unsigned i = 0; i < N; i++) {
; unsigned V = *I;
; if (V < i)
; A[i]++;
; }
; }
;
; CHECK: Assumed Context:
; CHECK-NEXT: [N, tmp] -> { : }
; CHECK-NEXT: Invalid Context:
; CHECK-NEXT: [N, tmp] -> { : N > 0 and (tmp < 0 or tmp >= 2147483648) }
;
; CHECK: Domain :=
; CHECK-NEXT: [N, tmp] -> { Stmt_if_then[i0] : i0 > tmp and 0 <= i0 < N };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i64* noalias %I, i32* noalias %A, i32 %N, i32 %M) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%lftr.wideiv = trunc i64 %indvars.iv to i32
%exitcond = icmp ne i32 %lftr.wideiv, %N
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%tmp = load i64, i64* %I, align 8
%conv = trunc i64 %tmp to i32
%tmp1 = zext i32 %conv to i64
%cmp1 = icmp ult i64 %tmp1, %indvars.iv
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp2 = load i32, i32* %arrayidx, align 4
%inc = add i32 %tmp2, 1
store i32 %inc, i32* %arrayidx, align 4
br label %if.end
if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}