Add support to indvars for optimizing sadd.with.overflow.

Split sadd.with.overflow into add + sadd.with.overflow to allow
analysis and optimization. This should ideally be done after
InstCombine, which can perform code motion (eventually indvars should
run after all canonical instcombines). We want ISEL to recombine the
add and the check, at least on x86.

This is currently under an option for reducing live induction
variables: -liv-reduce. The next step is reducing liveness of IVs that
are live out of the overflow check paths. Once the related
optimizations are fully developed, reviewed and tested, I do expect
this to become default.

llvm-svn: 197926
This commit is contained in:
Andrew Trick 2013-12-23 23:31:49 +00:00
parent b275d7f8f3
commit 0ba77a0740
4 changed files with 165 additions and 4 deletions

View File

@ -22,6 +22,7 @@
namespace llvm {
class CastInst;
class DominatorTree;
class IVUsers;
class Loop;
class LPPassManager;
@ -31,9 +32,25 @@ class ScalarEvolution;
/// Interface for visiting interesting IV users that are recognized but not
/// simplified by this utility.
class IVVisitor {
protected:
const DominatorTree *DT;
bool ShouldSplitOverflowIntrinsics;
virtual void anchor();
public:
IVVisitor(): DT(NULL), ShouldSplitOverflowIntrinsics(false) {}
virtual ~IVVisitor() {}
const DominatorTree *getDomTree() const { return DT; }
bool shouldSplitOverflowInstrinsics() const {
return ShouldSplitOverflowIntrinsics;
}
void setSplitOverflowIntrinsics() {
ShouldSplitOverflowIntrinsics = true;
assert(DT && "Splitting overflow intrinsics requires a DomTree.");
}
virtual void visitCast(CastInst *Cast) = 0;
};

View File

@ -63,6 +63,9 @@ static cl::opt<bool> VerifyIndvars(
"verify-indvars", cl::Hidden,
cl::desc("Verify the ScalarEvolution result after running indvars"));
static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
cl::desc("Reduce live induction variables."));
namespace {
class IndVarSimplify : public LoopPass {
LoopInfo *LI;
@ -643,8 +646,11 @@ namespace {
WideIVInfo WI;
WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
const DataLayout *TData) :
SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
const DataLayout *TData, const DominatorTree *DTree):
SE(SCEV), TD(TData) {
DT = DTree;
WI.NarrowIV = NarrowIV;
}
// Implement the interface used by simplifyUsersOfIV.
virtual void visitCast(CastInst *Cast);
@ -1114,7 +1120,9 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
WideIVVisitor WIV(CurrIV, SE, TD);
WideIVVisitor WIV(CurrIV, SE, TD, DT);
if (ReduceLiveIVs)
WIV.setSplitOverflowIntrinsics();
Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);

View File

@ -18,12 +18,16 @@
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@ -75,6 +79,9 @@ namespace {
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
bool IsSigned);
Instruction *splitOverflowIntrinsic(Instruction *IVUser,
const DominatorTree *DT);
};
}
@ -263,6 +270,71 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
return true;
}
/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
/// analysis and optimization.
///
/// \return A new value representing the non-overflowing add if possible,
/// otherwise return the original value.
Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
const DominatorTree *DT) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser);
if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow)
return IVUser;
// Find a branch guarded by the overflow check.
BranchInst *Branch = 0;
Instruction *AddVal = 0;
for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
UI != E; ++UI) {
if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(*UI)) {
if (ExtractInst->getNumIndices() != 1)
continue;
if (ExtractInst->getIndices()[0] == 0)
AddVal = ExtractInst;
else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse())
Branch = dyn_cast<BranchInst>(ExtractInst->use_back());
}
}
if (!AddVal || !Branch)
return IVUser;
BasicBlock *ContinueBB = Branch->getSuccessor(1);
if (llvm::next(pred_begin(ContinueBB)) != pred_end(ContinueBB))
return IVUser;
// Check if all users of the add are provably NSW.
bool AllNSW = true;
for (Value::use_iterator UI = AddVal->use_begin(), E = AddVal->use_end();
UI != E; ++UI) {
if (Instruction *UseInst = dyn_cast<Instruction>(*UI)) {
BasicBlock *UseBB = UseInst->getParent();
if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
UseBB = PHI->getIncomingBlock(UI);
if (!DT->dominates(ContinueBB, UseBB)) {
AllNSW = false;
break;
}
}
}
if (!AllNSW)
return IVUser;
// Go for it...
IRBuilder<> Builder(IVUser);
Instruction *AddInst = dyn_cast<Instruction>(
Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1)));
// The caller expects the new add to have the same form as the intrinsic. The
// IV operand position must be the same.
assert((AddInst->getOpcode() == Instruction::Add &&
AddInst->getOperand(0) == II->getOperand(0)) &&
"Bad add instruction created from overflow intrinsic.");
AddVal->replaceAllUsesWith(AddInst);
DeadInsts.push_back(AddVal);
return AddInst;
}
/// pushIVUsers - Add all uses of Def to the current IV's worklist.
///
static void pushIVUsers(
@ -334,8 +406,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
while (!SimpleIVUsers.empty()) {
std::pair<Instruction*, Instruction*> UseOper =
SimpleIVUsers.pop_back_val();
Instruction *UseInst = UseOper.first;
// Bypass back edges to avoid extra work.
if (UseOper.first == CurrIV) continue;
if (UseInst == CurrIV) continue;
if (V && V->shouldSplitOverflowInstrinsics()) {
UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree());
if (!UseInst)
continue;
}
Instruction *IVOperand = UseOper.second;
for (unsigned N = 0; IVOperand; ++N) {

View File

@ -0,0 +1,56 @@
; RUN: opt < %s -indvars -liv-reduce -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx"
; CHECK-LABEL: @addwithoverflow
; CHECK-LABEL: loop1:
; CHECK-NOT: zext
; CHECK: add nsw
; CHECK: @llvm.sadd.with.overflow
; CHECK-LABEL: loop2:
; CHECK-NOT: extractvalue
; CHECK: add nuw nsw
; CHECK: @llvm.sadd.with.overflow
; CHECK-LABEL: loop3:
; CHECK-NOT: extractvalue
; CHECK: ret
define i64 @addwithoverflow(i32 %n, i64* %a) {
entry:
br label %loop0
loop0:
%i = phi i32 [ 0, %entry ], [ %i1val, %loop3 ]
%s = phi i32 [ 0, %entry ], [ %addsval, %loop3 ]
%bc = icmp ult i32 %i, %n
br i1 %bc, label %loop1, label %exit
loop1:
%zxt = zext i32 %i to i64
%ofs = shl nuw nsw i64 %zxt, 3
%gep = getelementptr i64* %a, i64 %zxt
%v = load i64* %gep, align 8
%truncv = trunc i64 %v to i32
%adds = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %s, i32 %truncv)
%ovflows = extractvalue { i32, i1 } %adds, 1
br i1 %ovflows, label %exit, label %loop2
loop2:
%addsval = extractvalue { i32, i1 } %adds, 0
%i1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i, i32 1)
%i1check = extractvalue { i32, i1 } %i1, 1
br i1 %i1check, label %exit, label %loop3
loop3:
%i1val = extractvalue { i32, i1 } %i1, 0
%test = icmp slt i32 %i1val, %n
br i1 %test, label %return, label %loop0
return:
%ret = zext i32 %addsval to i64
ret i64 %ret
exit:
unreachable
}
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)