[FIX] Synthezise Sdiv/Srem/Udiv instructions correctly.

This patch simplifies the Sdiv/Srem/Udiv expansion and thereby
  prevents errors, e.g., regarding the insertion point.

llvm-svn: 270408
This commit is contained in:
Johannes Doerfert 2016-05-23 08:55:43 +00:00
parent e6542002fc
commit 1a4ad8f771
3 changed files with 77 additions and 31 deletions

View File

@ -246,15 +246,6 @@ private:
const Region &R;
ValueMapT *VMap;
/// @brief Return the Value for @p E if it is not zero or else the value 1.
Value *selectOneIfZero(const SCEV *E, Instruction *IP) {
auto *Ty = E->getType();
auto *RHS = Expander.expandCodeFor(E, Ty, IP);
auto *Zero = ConstantInt::get(Ty, 0);
auto *Cond = new ICmpInst(IP, ICmpInst::ICMP_NE, RHS, Zero);
return SelectInst::Create(Cond, RHS, ConstantInt::get(Ty, 1), "", IP);
}
const SCEV *visitUnknown(const SCEVUnknown *E) {
// If a value mapping was given try if the underlying value is remapped.
@ -278,15 +269,14 @@ private:
Instruction *StartIP = R.getEnteringBlock()->getTerminator();
const SCEV *LHSScev = visit(SE.getSCEV(Inst->getOperand(0)));
const SCEV *RHSScev = visit(SE.getSCEV(Inst->getOperand(1)));
const SCEV *LHSScev = SE.getSCEV(Inst->getOperand(0));
const SCEV *RHSScev = SE.getSCEV(Inst->getOperand(1));
Value *LHS = Expander.expandCodeFor(LHSScev, E->getType(), StartIP);
Value *RHS = nullptr;
if (SE.isKnownNonZero(RHSScev))
RHS = Expander.expandCodeFor(RHSScev, E->getType(), StartIP);
else
RHS = selectOneIfZero(RHSScev, StartIP);
if (!SE.isKnownNonZero(RHSScev))
RHSScev = SE.getUMaxExpr(RHSScev, SE.getConstant(E->getType(), 1));
Value *LHS = expandCodeFor(LHSScev, E->getType(), StartIP);
Value *RHS = expandCodeFor(RHSScev, E->getType(), StartIP);
Inst = BinaryOperator::Create((Instruction::BinaryOps)Inst->getOpcode(),
LHS, RHS, Inst->getName() + Name, StartIP);
@ -308,12 +298,10 @@ private:
return SE.getSignExtendExpr(visit(E->getOperand()), E->getType());
}
const SCEV *visitUDivExpr(const SCEVUDivExpr *E) {
if (SE.isKnownNonZero(E->getRHS()))
return SE.getUDivExpr(visit(E->getLHS()), visit(E->getRHS()));
auto *RHSScev = visit(E->getRHS());
auto *IP = R.getEnteringBlock()->getTerminator();
auto *RHS = selectOneIfZero(RHSScev, IP);
return SE.getUDivExpr(visit(E->getLHS()), SE.getSCEV(RHS));
if (!SE.isKnownNonZero(E->getRHS()))
RHSScev = SE.getUMaxExpr(RHSScev, SE.getConstant(E->getType(), 1));
return SE.getUDivExpr(visit(E->getLHS()), RHSScev);
}
const SCEV *visitAddExpr(const SCEVAddExpr *E) {
SmallVector<const SCEV *, 4> NewOps;

View File

@ -13,15 +13,15 @@
; A[i] += A[(a / b) / (c / d)];
; }
;
; IR: %[[R0:[0-9]*]] = icmp ne i32 %d, 0
; IR-NEXT: %[[R1:[0-9]*]] = select i1 %[[R0]], i32 %d, i32 1
; IR-NEXT: %[[R2:[0-9]*]] = udiv i32 %c, %[[R1]]
; IR-NEXT: %[[R3:[0-9]*]] = icmp ne i32 %2, 0
; IR-NEXT: %[[R4:[0-9]*]] = select i1 %[[R3]], i32 %[[R2]], i32 1
; IR-NEXT: %[[R5:[0-9]*]] = icmp ne i32 %b, 0
; IR-NEXT: %[[R6:[0-9]*]] = select i1 %[[R5]], i32 %b, i32 1
; IR-NEXT: %[[R7:[0-9]*]] = udiv i32 %a, %[[R6]]
; IR-NEXT: %[[R8:[0-9]*]] = udiv i32 %[[R7]], %[[R4]]
; IR: %[[R0:[.a-zA-Z0-9]*]] = icmp ugt i32 %b, 1
; IR-NEXT: %[[R1:[.a-zA-Z0-9]*]] = select i1 %[[R0]], i32 %b, i32 1
; IR-NEXT: %[[R2:[.a-zA-Z0-9]*]] = udiv i32 %a, %[[R1]]
; IR-NEXT: %[[R5:[.a-zA-Z0-9]*]] = icmp ugt i32 %d, 1
; IR-NEXT: %[[R6:[.a-zA-Z0-9]*]] = select i1 %[[R5]], i32 %d, i32 1
; IR-NEXT: %[[R7:[.a-zA-Z0-9]*]] = udiv i32 %c, %[[R6]]
; IR-NEXT: %[[R3:[.a-zA-Z0-9]*]] = icmp ugt i32 %[[R7]], 1
; IR-NEXT: %[[R4:[.a-zA-Z0-9]*]] = select i1 %[[R3]], i32 %[[R7]], i32 1
; IR-NEXT: %[[R8:[.a-zA-Z0-9]*]] = udiv i32 %[[R2]], %[[R4]]
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -0,0 +1,58 @@
; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
;
; Verify we do not crash when we synthezise code for the udiv in the SCoP.
;
; CHECK: polly.start
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind uwtable
define void @RestartModel() #0 {
entry:
br label %for.cond32.preheader
for.cond32.preheader: ; preds = %entry, %for.body50.7
%i.13 = phi i32 [ 0, %entry ], [ %inc60, %for.body50.7 ]
%add = add i32 %i.13, 2
%div44 = udiv i32 undef, %add
%sub45 = sub i32 16384, %div44
%conv46 = trunc i32 %sub45 to i16
br label %for.body35
for.body35: ; preds = %for.cond32.preheader
br label %for.body50
for.body50: ; preds = %for.body35
br label %for.body50.1
for.cond62: ; preds = %for.body50.7
%conv46.lcssa = phi i16 [ %conv46, %for.body50.7 ]
store i16 %conv46.lcssa, i16* undef, align 2
br label %for.end83
for.end83: ; preds = %for.cond62
ret void
for.body50.1: ; preds = %for.body50
br label %for.body50.2
for.body50.2: ; preds = %for.body50.1
br label %for.body50.3
for.body50.3: ; preds = %for.body50.2
br label %for.body50.4
for.body50.4: ; preds = %for.body50.3
br label %for.body50.5
for.body50.5: ; preds = %for.body50.4
br label %for.body50.6
for.body50.6: ; preds = %for.body50.5
br label %for.body50.7
for.body50.7: ; preds = %for.body50.6
%inc60 = add i32 %i.13, 1
%cmp29 = icmp ult i32 %inc60, 128
br i1 %cmp29, label %for.cond32.preheader, label %for.cond62
}