Add support to indvars for optimizing sadd.with.overflow.

Split sadd.with.overflow into add + sadd.with.overflow to allow analysis and optimization. This should ideally be done after InstCombine, which can perform code motion (eventually indvars should run after all canonical instcombines). We want ISEL to recombine the add and the check, at least on x86. This is currently under an option for reducing live induction variables: -liv-reduce. The next step is reducing liveness of IVs that are live out of the overflow check paths. Once the related optimizations are fully developed, reviewed and tested, I do expect this to become default. llvm-svn: 197926
2013-12-23 23:31:49 +00:00 · 2013-12-23 23:31:49 +00:00 · 0ba77a0740
parent b275d7f8f3
commit 0ba77a0740
4 changed files with 165 additions and 4 deletions
--- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
@ -22,6 +22,7 @@
 namespace llvm {

 class CastInst;
+class DominatorTree;
 class IVUsers;
 class Loop;
 class LPPassManager;
@ -31,9 +32,25 @@ class ScalarEvolution;
 /// Interface for visiting interesting IV users that are recognized but not
 /// simplified by this utility.
 class IVVisitor {
+protected:
+  const DominatorTree *DT;
+  bool ShouldSplitOverflowIntrinsics;
+
  virtual void anchor();
 public:
+  IVVisitor(): DT(NULL), ShouldSplitOverflowIntrinsics(false) {}
  virtual ~IVVisitor() {}
+
+  const DominatorTree *getDomTree() const { return DT; }
+
+  bool shouldSplitOverflowInstrinsics() const {
+    return ShouldSplitOverflowIntrinsics;
+  }
+  void setSplitOverflowIntrinsics() {
+    ShouldSplitOverflowIntrinsics = true;
+    assert(DT && "Splitting overflow intrinsics requires a DomTree.");
+  }
+
  virtual void visitCast(CastInst *Cast) = 0;
 };

--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@ -63,6 +63,9 @@ static cl::opt<bool> VerifyIndvars(
  "verify-indvars", cl::Hidden,
  cl::desc("Verify the ScalarEvolution result after running indvars"));

+static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
+  cl::desc("Reduce live induction variables."));
+
 namespace {
  class IndVarSimplify : public LoopPass {
    LoopInfo        *LI;
@ -643,8 +646,11 @@ namespace {
    WideIVInfo WI;

    WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
-                  const DataLayout *TData) :
-      SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
+                  const DataLayout *TData, const DominatorTree *DTree):
+      SE(SCEV), TD(TData) {
+      DT = DTree;
+      WI.NarrowIV = NarrowIV;
+    }

    // Implement the interface used by simplifyUsersOfIV.
    virtual void visitCast(CastInst *Cast);
@ -1114,7 +1120,9 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
      PHINode *CurrIV = LoopPhis.pop_back_val();

      // Information about sign/zero extensions of CurrIV.
-      WideIVVisitor WIV(CurrIV, SE, TD);
+      WideIVVisitor WIV(CurrIV, SE, TD, DT);
+      if (ReduceLiveIVs)
+        WIV.setSplitOverflowIntrinsics();

      Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);

--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@ -18,12 +18,16 @@
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@ -75,6 +79,9 @@ namespace {
    void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
    void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
                              bool IsSigned);
+
+    Instruction *splitOverflowIntrinsic(Instruction *IVUser,
+                                        const DominatorTree *DT);
  };
 }

@ -263,6 +270,71 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
  return true;
 }

+/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
+/// analysis and optimization.
+///
+/// \return A new value representing the non-overflowing add if possible,
+/// otherwise return the original value.
+Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
+                                                    const DominatorTree *DT) {
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser);
+  if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow)
+    return IVUser;
+
+  // Find a branch guarded by the overflow check.
+  BranchInst *Branch = 0;
+  Instruction *AddVal = 0;
+  for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+       UI != E; ++UI) {
+    if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(*UI)) {
+      if (ExtractInst->getNumIndices() != 1)
+        continue;
+      if (ExtractInst->getIndices()[0] == 0)
+        AddVal = ExtractInst;
+      else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse())
+        Branch = dyn_cast<BranchInst>(ExtractInst->use_back());
+    }
+  }
+  if (!AddVal || !Branch)
+    return IVUser;
+
+  BasicBlock *ContinueBB = Branch->getSuccessor(1);
+  if (llvm::next(pred_begin(ContinueBB)) != pred_end(ContinueBB))
+    return IVUser;
+
+  // Check if all users of the add are provably NSW.
+  bool AllNSW = true;
+  for (Value::use_iterator UI = AddVal->use_begin(), E = AddVal->use_end();
+       UI != E; ++UI) {
+    if (Instruction *UseInst = dyn_cast<Instruction>(*UI)) {
+      BasicBlock *UseBB = UseInst->getParent();
+      if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
+        UseBB = PHI->getIncomingBlock(UI);
+      if (!DT->dominates(ContinueBB, UseBB)) {
+        AllNSW = false;
+        break;
+      }
+    }
+  }
+  if (!AllNSW)
+    return IVUser;
+
+  // Go for it...
+  IRBuilder<> Builder(IVUser);
+  Instruction *AddInst = dyn_cast<Instruction>(
+    Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1)));
+
+  // The caller expects the new add to have the same form as the intrinsic. The
+  // IV operand position must be the same.
+  assert((AddInst->getOpcode() == Instruction::Add &&
+          AddInst->getOperand(0) == II->getOperand(0)) &&
+         "Bad add instruction created from overflow intrinsic.");
+
+  AddVal->replaceAllUsesWith(AddInst);
+  DeadInsts.push_back(AddVal);
+  return AddInst;
+}
+
 /// pushIVUsers - Add all uses of Def to the current IV's worklist.
 ///
 static void pushIVUsers(
@ -334,8 +406,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
  while (!SimpleIVUsers.empty()) {
    std::pair<Instruction*, Instruction*> UseOper =
      SimpleIVUsers.pop_back_val();
+    Instruction *UseInst = UseOper.first;
+
    // Bypass back edges to avoid extra work.
-    if (UseOper.first == CurrIV) continue;
+    if (UseInst == CurrIV) continue;
+
+    if (V && V->shouldSplitOverflowInstrinsics()) {
+      UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree());
+      if (!UseInst)
+        continue;
+    }

    Instruction *IVOperand = UseOper.second;
    for (unsigned N = 0; IVOperand; ++N) {
--- a/llvm/test/Transforms/IndVarSimplify/overflowcheck.ll
+++ b/llvm/test/Transforms/IndVarSimplify/overflowcheck.ll
@ -0,0 +1,56 @@
+; RUN: opt < %s -indvars -liv-reduce -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; CHECK-LABEL: @addwithoverflow
+; CHECK-LABEL: loop1:
+; CHECK-NOT: zext
+; CHECK: add nsw
+; CHECK: @llvm.sadd.with.overflow
+; CHECK-LABEL: loop2:
+; CHECK-NOT: extractvalue
+; CHECK: add nuw nsw
+; CHECK: @llvm.sadd.with.overflow
+; CHECK-LABEL: loop3:
+; CHECK-NOT: extractvalue
+; CHECK: ret
+define i64 @addwithoverflow(i32 %n, i64* %a) {
+entry:
+  br label %loop0
+
+loop0:
+  %i = phi i32 [ 0, %entry ], [ %i1val, %loop3 ]
+  %s = phi i32 [ 0, %entry ], [ %addsval, %loop3 ]
+  %bc = icmp ult i32 %i, %n
+  br i1 %bc, label %loop1, label %exit
+
+loop1:
+  %zxt = zext i32 %i to i64
+  %ofs = shl nuw nsw i64 %zxt, 3
+  %gep = getelementptr i64* %a, i64 %zxt
+  %v = load i64* %gep, align 8
+  %truncv = trunc i64 %v to i32
+  %adds = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %s, i32 %truncv)
+  %ovflows = extractvalue { i32, i1 } %adds, 1
+  br i1 %ovflows, label %exit, label %loop2
+
+loop2:
+  %addsval = extractvalue { i32, i1 } %adds, 0
+  %i1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i, i32 1)
+  %i1check = extractvalue { i32, i1 } %i1, 1
+  br i1 %i1check, label %exit, label %loop3
+
+loop3:
+  %i1val = extractvalue { i32, i1 } %i1, 0
+  %test = icmp slt i32 %i1val, %n
+  br i1 %test, label %return, label %loop0
+
+return:
+  %ret = zext i32 %addsval to i64
+  ret i64 %ret
+
+exit:
+  unreachable
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)