forked from OSchip/llvm-project
[IndVars] Eliminate op.with.overflow when possible
Summary: If we can prove that an op.with.overflow intrinsic does not overflow, we can get rid of the intrinsic, and replace it with non-wrapping arithmetic. Reviewers: atrick, regehr Subscribers: sanjoy, mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D18685 llvm-svn: 265913
This commit is contained in:
parent
3c529a40ca
commit
a07ad647ee
|
@ -71,6 +71,7 @@ namespace {
|
|||
|
||||
bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
|
||||
|
||||
bool eliminateOverflowIntrinsic(CallInst *CI);
|
||||
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
|
||||
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
|
||||
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
|
||||
|
@ -318,6 +319,108 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
|
|||
DeadInsts.emplace_back(Rem);
|
||||
}
|
||||
|
||||
bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
|
||||
auto *F = CI->getCalledFunction();
|
||||
if (!F)
|
||||
return false;
|
||||
|
||||
typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
|
||||
const SCEV *, const SCEV *, SCEV::NoWrapFlags);
|
||||
typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
|
||||
const SCEV *, Type *);
|
||||
|
||||
OperationFunctionTy Operation;
|
||||
ExtensionFunctionTy Extension;
|
||||
|
||||
Instruction::BinaryOps RawOp;
|
||||
|
||||
// We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we
|
||||
// have nuw.
|
||||
bool NoSignedOverflow;
|
||||
|
||||
switch (F->getIntrinsicID()) {
|
||||
default:
|
||||
return false;
|
||||
|
||||
case Intrinsic::sadd_with_overflow:
|
||||
Operation = &ScalarEvolution::getAddExpr;
|
||||
Extension = &ScalarEvolution::getSignExtendExpr;
|
||||
RawOp = Instruction::Add;
|
||||
NoSignedOverflow = true;
|
||||
break;
|
||||
|
||||
case Intrinsic::uadd_with_overflow:
|
||||
Operation = &ScalarEvolution::getAddExpr;
|
||||
Extension = &ScalarEvolution::getZeroExtendExpr;
|
||||
RawOp = Instruction::Add;
|
||||
NoSignedOverflow = false;
|
||||
break;
|
||||
|
||||
case Intrinsic::ssub_with_overflow:
|
||||
Operation = &ScalarEvolution::getMinusSCEV;
|
||||
Extension = &ScalarEvolution::getSignExtendExpr;
|
||||
RawOp = Instruction::Sub;
|
||||
NoSignedOverflow = true;
|
||||
break;
|
||||
|
||||
case Intrinsic::usub_with_overflow:
|
||||
Operation = &ScalarEvolution::getMinusSCEV;
|
||||
Extension = &ScalarEvolution::getZeroExtendExpr;
|
||||
RawOp = Instruction::Sub;
|
||||
NoSignedOverflow = false;
|
||||
break;
|
||||
}
|
||||
|
||||
const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0));
|
||||
const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1));
|
||||
|
||||
auto *NarrowTy = cast<IntegerType>(LHS->getType());
|
||||
auto *WideTy =
|
||||
IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
|
||||
|
||||
const SCEV *A =
|
||||
(SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy);
|
||||
const SCEV *B =
|
||||
(SE->*Operation)((SE->*Extension)(LHS, WideTy),
|
||||
(SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap);
|
||||
|
||||
if (A != B)
|
||||
return false;
|
||||
|
||||
// Proved no overflow, nuke the overflow check and, if possible, the overflow
|
||||
// intrinsic as well.
|
||||
|
||||
BinaryOperator *NewResult = BinaryOperator::Create(
|
||||
RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI);
|
||||
|
||||
if (NoSignedOverflow)
|
||||
NewResult->setHasNoSignedWrap(true);
|
||||
else
|
||||
NewResult->setHasNoUnsignedWrap(true);
|
||||
|
||||
SmallVector<ExtractValueInst *, 4> ToDelete;
|
||||
|
||||
for (auto *U : CI->users()) {
|
||||
if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
|
||||
if (EVI->getIndices()[0] == 1)
|
||||
EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext()));
|
||||
else {
|
||||
assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
|
||||
EVI->replaceAllUsesWith(NewResult);
|
||||
}
|
||||
ToDelete.push_back(EVI);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *EVI : ToDelete)
|
||||
EVI->eraseFromParent();
|
||||
|
||||
if (CI->use_empty())
|
||||
CI->eraseFromParent();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Eliminate an operation that consumes a simple IV and has no observable
|
||||
/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
|
||||
/// but UseInst may not be.
|
||||
|
@ -335,6 +438,10 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
|
|||
}
|
||||
}
|
||||
|
||||
if (auto *CI = dyn_cast<CallInst>(UseInst))
|
||||
if (eliminateOverflowIntrinsic(CI))
|
||||
return true;
|
||||
|
||||
if (eliminateIdentitySCEV(UseInst, IVOperand))
|
||||
return true;
|
||||
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
; RUN: opt -S -indvars < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @f_sadd(i8* %a) {
|
||||
; CHECK-LABEL: @f_sadd(
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %cont
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %entry, %cont
|
||||
%i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
|
||||
%idxprom = sext i32 %i.04 to i64
|
||||
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
|
||||
store i8 0, i8* %arrayidx, align 1
|
||||
%0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1)
|
||||
%1 = extractvalue { i32, i1 } %0, 1
|
||||
; CHECK: for.body:
|
||||
; CHECK-NOT: @llvm.sadd.with.overflow
|
||||
; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
|
||||
br i1 %1, label %trap, label %cont, !nosanitize !{}
|
||||
|
||||
trap: ; preds = %for.body
|
||||
tail call void @llvm.trap() #2, !nosanitize !{}
|
||||
unreachable, !nosanitize !{}
|
||||
|
||||
cont: ; preds = %for.body
|
||||
%2 = extractvalue { i32, i1 } %0, 0
|
||||
%cmp = icmp slt i32 %2, 16
|
||||
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
||||
}
|
||||
|
||||
define void @f_uadd(i8* %a) {
|
||||
; CHECK-LABEL: @f_uadd(
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %cont
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %entry, %cont
|
||||
%i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
|
||||
%idxprom = sext i32 %i.04 to i64
|
||||
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
|
||||
store i8 0, i8* %arrayidx, align 1
|
||||
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1)
|
||||
%1 = extractvalue { i32, i1 } %0, 1
|
||||
; CHECK: for.body:
|
||||
; CHECK-NOT: @llvm.uadd.with.overflow
|
||||
; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
|
||||
br i1 %1, label %trap, label %cont, !nosanitize !{}
|
||||
|
||||
trap: ; preds = %for.body
|
||||
tail call void @llvm.trap(), !nosanitize !{}
|
||||
unreachable, !nosanitize !{}
|
||||
|
||||
cont: ; preds = %for.body
|
||||
%2 = extractvalue { i32, i1 } %0, 0
|
||||
%cmp = icmp slt i32 %2, 16
|
||||
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
||||
}
|
||||
|
||||
define void @f_ssub(i8* nocapture %a) {
|
||||
; CHECK-LABEL: @f_ssub(
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %cont
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %entry, %cont
|
||||
%i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
|
||||
%idxprom = sext i32 %i.04 to i64
|
||||
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
|
||||
store i8 0, i8* %arrayidx, align 1
|
||||
%0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1)
|
||||
%1 = extractvalue { i32, i1 } %0, 1
|
||||
; CHECK: for.body:
|
||||
; CHECK-NOT: @llvm.ssub.with.overflow.i32
|
||||
; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
|
||||
br i1 %1, label %trap, label %cont, !nosanitize !{}
|
||||
|
||||
trap: ; preds = %for.body
|
||||
tail call void @llvm.trap(), !nosanitize !{}
|
||||
unreachable, !nosanitize !{}
|
||||
|
||||
cont: ; preds = %for.body
|
||||
%2 = extractvalue { i32, i1 } %0, 0
|
||||
%cmp = icmp sgt i32 %2, -1
|
||||
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
||||
}
|
||||
|
||||
define void @f_usub(i8* nocapture %a) {
|
||||
; CHECK-LABEL: @f_usub(
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %cont
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %entry, %cont
|
||||
%i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
|
||||
%idxprom = sext i32 %i.04 to i64
|
||||
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
|
||||
store i8 0, i8* %arrayidx, align 1
|
||||
%0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1)
|
||||
%1 = extractvalue { i32, i1 } %0, 1
|
||||
|
||||
; It is theoretically possible to prove this, but SCEV cannot
|
||||
; represent non-unsigned-wrapping subtraction operations.
|
||||
|
||||
; CHECK: for.body:
|
||||
; CHECK: [[COND:%[^ ]+]] = extractvalue { i32, i1 } %1, 1
|
||||
; CHECK-NEXT: br i1 [[COND]], label %trap, label %cont, !nosanitize !0
|
||||
br i1 %1, label %trap, label %cont, !nosanitize !{}
|
||||
|
||||
trap: ; preds = %for.body
|
||||
tail call void @llvm.trap(), !nosanitize !{}
|
||||
unreachable, !nosanitize !{}
|
||||
|
||||
cont: ; preds = %for.body
|
||||
%2 = extractvalue { i32, i1 } %0, 0
|
||||
%cmp = icmp sgt i32 %2, -1
|
||||
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
||||
}
|
||||
|
||||
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
|
||||
|
||||
declare void @llvm.trap() #2
|
|
@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx"
|
|||
; CHECK-LABEL: loop2:
|
||||
; CHECK-NOT: extractvalue
|
||||
; CHECK: add nuw
|
||||
; CHECK: @llvm.sadd.with.overflow
|
||||
; CHECK-NOT: @llvm.sadd.with.overflow
|
||||
; CHECK-LABEL: loop3:
|
||||
; CHECK-NOT: extractvalue
|
||||
; CHECK: ret
|
||||
|
|
Loading…
Reference in New Issue