[LoopReroll] Make root-finding more aggressive.

Allow using an instruction other than a mul or phi as the base for root-finding. For example, the included testcase includes a loop which requires using a getelementptr as the base for root-finding. Differential Revision: https://reviews.llvm.org/D26529 llvm-svn: 287588
2016-11-21 22:35:34 +00:00 · 2016-11-21 22:35:34 +00:00 · c0bba1a96d
parent 6cad0115e1
commit c0bba1a96d
2 changed files with 89 additions and 50 deletions
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@ -371,11 +371,12 @@ namespace {
    protected:
      typedef MapVector<Instruction*, BitVector> UsesTy;
-      bool findRootsRecursive(Instruction *IVU,
+      void findRootsRecursive(Instruction *IVU,
                              SmallInstructionSet SubsumedInsts);
      bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
      bool collectPossibleRoots(Instruction *Base,
                                std::map<int64_t,Instruction*> &Roots);
      bool validateRootSet(DAGRootSet &DRS);
      bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
      void collectInLoopUserSet(const SmallInstructionVector &Roots,
@ -827,7 +828,8 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
    Roots[V] = cast<Instruction>(I);
  }
-  if (Roots.empty())
+  // Make sure we have at least two roots.
  if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
    return false;
  // If we found non-loop-inc, non-root users of Base, assume they are
@ -861,40 +863,61 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
  return true;
 }
-bool LoopReroll::DAGRootTracker::
+void LoopReroll::DAGRootTracker::
 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
  // Does the user look like it could be part of a root set?
  // All its users must be simple arithmetic ops.
  if (I->getNumUses() > IL_MaxRerollIterations)
-    return false;
+    return;
-  if ((I->getOpcode() == Instruction::Mul ||
+  if (I != IV && findRootsBase(I, SubsumedInsts))
-       I->getOpcode() == Instruction::PHI) &&
+    return;
      I != IV &&
      findRootsBase(I, SubsumedInsts))
    return true;
  SubsumedInsts.insert(I);
  for (User *V : I->users()) {
-    Instruction *I = dyn_cast<Instruction>(V);
+    Instruction *I = cast<Instruction>(V);
    if (is_contained(LoopIncs, I))
      continue;
-    if (!I || !isSimpleArithmeticOp(I) ||
+    if (!isSimpleArithmeticOp(I))
-        !findRootsRecursive(I, SubsumedInsts))
+      continue;
-      return false;
+
    // The recursive call makes a copy of SubsumedInsts.
    findRootsRecursive(I, SubsumedInsts);
  }
 }
 bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
  if (DRS.Roots.empty())
    return false;
  // Consider a DAGRootSet with N-1 roots (so N different values including
  //   BaseInst).
  // Define d = Roots[0] - BaseInst, which should be the same as
  //   Roots[I] - Roots[I-1] for all I in [1..N).
  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
  //   loop iteration J.
  //
  // Now, For the loop iterations to be consecutive:
  //   D = d * N
  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
  if (!ADR)
    return false;
  unsigned N = DRS.Roots.size() + 1;
  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
    return false;
  return true;
 }
 bool LoopReroll::DAGRootTracker::
 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
-
+  // The base of a RootSet must be an AddRec, so it can be erased.
-  // The base instruction needs to be a multiply so
+  const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
-  // that we can erase it.
+  if (!IVU_ADR || IVU_ADR->getLoop() != L)
  if (IVU->getOpcode() != Instruction::Mul &&
      IVU->getOpcode() != Instruction::PHI)
    return false;
  std::map<int64_t, Instruction*> V;
@ -910,6 +933,8 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
  DAGRootSet DRS;
  DRS.BaseInst = nullptr;
  SmallVector<DAGRootSet, 16> PotentialRootSets;
  for (auto &KV : V) {
    if (!DRS.BaseInst) {
      DRS.BaseInst = KV.second;
@ -920,13 +945,22 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
      DRS.Roots.push_back(KV.second);
    } else {
      // Linear sequence terminated.
-      RootSets.push_back(DRS);
+      if (!validateRootSet(DRS))
        return false;
      // Construct a new DAGRootSet with the next sequence.
      PotentialRootSets.push_back(DRS);
      DRS.BaseInst = KV.second;
      DRS.SubsumedInsts = SubsumedInsts;
      DRS.Roots.clear();
    }
  }
-  RootSets.push_back(DRS);
+
  if (!validateRootSet(DRS))
    return false;
  PotentialRootSets.push_back(DRS);
  RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
  return true;
 }
@ -940,8 +974,7 @@ bool LoopReroll::DAGRootTracker::findRoots() {
      if (isLoopIncrement(IVU, IV))
        LoopIncs.push_back(cast<Instruction>(IVU));
    }
-    if (!findRootsRecursive(IV, SmallInstructionSet()))
+    findRootsRecursive(IV, SmallInstructionSet());
      return false;
    LoopIncs.push_back(IV);
  } else {
    if (!findRootsBase(IV, SmallInstructionSet()))
@ -961,31 +994,6 @@ bool LoopReroll::DAGRootTracker::findRoots() {
    }
  }
  // And ensure all loop iterations are consecutive. We rely on std::map
  // providing ordered traversal.
  for (auto &V : RootSets) {
    const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(V.BaseInst));
    if (!ADR)
      return false;
    // Consider a DAGRootSet with N-1 roots (so N different values including
    //   BaseInst).
    // Define d = Roots[0] - BaseInst, which should be the same as
    //   Roots[I] - Roots[I-1] for all I in [1..N).
    // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
    //   loop iteration J.
    //
    // Now, For the loop iterations to be consecutive:
    //   D = d * N
    unsigned N = V.Roots.size() + 1;
    const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(V.Roots[0]), ADR);
    const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
    if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) {
      DEBUG(dbgs() << "LRR: Aborting because iterations are not consecutive\n");
      return false;
    }
  }
  Scale = RootSets[0].Roots.size() + 1;
  if (Scale > IL_MaxRerollIterations) {
@ -1498,8 +1506,8 @@ void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
  { // Limit the lifetime of SCEVExpander.
    const DataLayout &DL = Header->getModule()->getDataLayout();
    SCEVExpander Expander(*SE, DL, "reroll");
-    Value *NewIV =
+    Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
-        Expander.expandCodeFor(NewIVSCEV, InstIV->getType(), &Header->front());
+                                          Header->getFirstNonPHIOrDbg());
    for (auto &KV : Uses)
      if (KV.second.find_first() == 0)
--- a/llvm/test/Transforms/LoopReroll/basic.ll
+++ b/llvm/test/Transforms/LoopReroll/basic.ll
@ -575,6 +575,37 @@ for.end:                                          ; preds = %for.body
  ret void
 }
 define void @gep-indexing(i32* nocapture %x) {
 entry:
  %call = tail call i32 @foo(i32 0) #1
  br label %for.body
 for.body:                                         ; preds = %for.body, %entry
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  %0 = mul nsw i64 %indvars.iv, 3
  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
  store i32 %call, i32* %arrayidx, align 4
  %arrayidx4 = getelementptr inbounds i32, i32* %arrayidx, i64 1
  store i32 %call, i32* %arrayidx4, align 4
  %arrayidx9 = getelementptr inbounds i32, i32* %arrayidx, i64 2
  store i32 %call, i32* %arrayidx9, align 4
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, 500
  br i1 %exitcond, label %for.end, label %for.body
 ; CHECK-LABEL: @gep-indexing
 ; CHECK:      for.body:
 ; CHECK-NEXT:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
 ; CHECK-NEXT:   %scevgep = getelementptr i32, i32* %x, i64 %indvars.iv
 ; CHECK-NEXT:   store i32 %call, i32* %scevgep, align 4
 ; CHECK-NEXT:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; CHECK-NEXT:   %exitcond2 = icmp eq i32* %scevgep, %scevgep1
 ; CHECK-NEXT:   br i1 %exitcond2, label %for.end, label %for.body
 for.end:                                          ; preds = %for.body
  ret void
 }
 define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
 ; CHECK-LABEL: @unordered_atomic_ops(