diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index ec51ad71abc5..48d8e457ba7c 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -145,6 +145,7 @@ class IndVarSimplify { bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); bool rewriteFirstIterationLoopExitValues(Loop *L); + bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter); @@ -524,6 +525,29 @@ struct RewritePhi { // As a side effect, reduces the amount of IV processing within the loop. //===----------------------------------------------------------------------===// +bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { + SmallPtrSet Visited; + SmallVector WorkList; + Visited.insert(I); + WorkList.push_back(I); + while (!WorkList.empty()) { + const Instruction *Curr = WorkList.pop_back_val(); + // This use is outside the loop, nothing to do. + if (!L->contains(Curr)) + continue; + // Do we assume it is a "hard" use which will not be eliminated easily? + if (Curr->mayHaveSideEffects()) + return true; + // Otherwise, add all its users to worklist. + for (auto U : Curr->users()) { + auto *UI = cast(U); + if (Visited.insert(UI).second) + WorkList.push_back(UI); + } + } + return false; +} + /// Check to see if this loop has a computable loop-invariant execution count. /// If so, this means that we can compute the final value of any expressions /// that are recurrent in the loop, and substitute the exit values from the loop @@ -598,19 +622,8 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Computing the value outside of the loop brings no benefit if it is // definitely used inside the loop in a way which can not be optimized // away. - if (ExitValue->getSCEVType()>=scMulExpr) { - bool HasHardInternalUses = false; - for (auto *IB : Inst->users()) { - Instruction *UseInstr = cast(IB); - unsigned Opc = UseInstr->getOpcode(); - if (L->contains(UseInstr) && Opc == Instruction::Call) { - HasHardInternalUses = true; - break; - } - } - if (HasHardInternalUses) - continue; - } + if (!isa(ExitValue) && hasHardUserWithinLoop(L, Inst)) + continue; bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); diff --git a/llvm/test/Analysis/ScalarEvolution/pr28705.ll b/llvm/test/Analysis/ScalarEvolution/pr28705.ll index 8fbc08e3ca63..9a8487a6c662 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr28705.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr28705.ll @@ -1,11 +1,11 @@ ; PR28705 ; RUN: opt < %s -indvars -S | FileCheck %s -; Check IndVarSimplify replaces the exitval use of the induction var "%inc.i.i" -; with "%.sroa.speculated + 1". +; Check IndVarSimplify doesn't replace external use of the induction var +; "%inc.i.i" with "%.sroa.speculated + 1" because it is not profitable. ; ; CHECK-LABEL: @foo( -; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1 +; CHECK: %[[EXIT:.+]] = phi i32 [ %inc.i.i, %for.body650 ] ; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ] ; define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr { diff --git a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll b/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll index c87cd6596c62..22087710a9ca 100644 --- a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll +++ b/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll @@ -123,3 +123,54 @@ for.end: ; preds = %for.body tail call void @func(i32 %soft_use) ret void } + +; CHECK-LABEL: @test5( +define void @test5(i32 %m) nounwind uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %add = add i32 %a.05, %m + %soft_use = add i32 %add, 123 +; CHECK: tail call void @func(i32 %soft_use) + tail call void @func(i32 %soft_use) + %inc = add nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 186 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body +; CHECK: for.end: +; CHECK-NOT: mul i32 %m, 186 +; CHECK:%add.lcssa = phi i32 [ %add, %for.body ] +; CHECK-NEXT: tail call void @func(i32 %add.lcssa) + tail call void @func(i32 %add) + ret void +} + +; CHECK-LABEL: @test6( +define void @test6(i32 %m, i32* %p) nounwind uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %add = add i32 %a.05, %m + %soft_use = add i32 %add, 123 +; CHECK: store i32 %soft_use, i32* %pidx + %pidx = getelementptr i32, i32* %p, i32 %add + store i32 %soft_use, i32* %pidx + %inc = add nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 186 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body +; CHECK: for.end: +; CHECK-NOT: mul i32 %m, 186 +; CHECK:%add.lcssa = phi i32 [ %add, %for.body ] +; CHECK-NEXT: tail call void @func(i32 %add.lcssa) + tail call void @func(i32 %add) + ret void +} diff --git a/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll b/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll index 961c9fd944d9..fff76675f175 100644 --- a/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll +++ b/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -indvars < %s | FileCheck %s +; Do not rewrite the user outside the loop because we must keep the instruction +; inside the loop due to store. Rewrite doesn't give us any profit. define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) { ; CHECK-LABEL: @f( not_zero11.preheader: @@ -22,6 +24,42 @@ not_zero11: %tmp23 = icmp slt i32 %tmp22, %tmp14 br i1 %tmp23, label %not_zero11, label %main.exit.selector +main.exit.selector: +; CHECK-LABEL: main.exit.selector: +; CHECK: %tmp22.lcssa = phi i32 [ %tmp22, %not_zero11 ] +; CHECK: %tmp24 = icmp slt i32 %tmp22.lcssa, %length. + %tmp24 = icmp slt i32 %tmp22, %length.i + br i1 %tmp24, label %not_zero11.postloop, label %leave + +leave: + ret void + +not_zero11.postloop: + ret void +} + +; Rewrite the user outside the loop because there is no hard users inside the loop. +define void @f1(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) { +; CHECK-LABEL: @f1( +not_zero11.preheader: + %tmp13 = icmp ugt i32 %length.i, %length.i.88 + %tmp14 = select i1 %tmp13, i32 %length.i.88, i32 %length.i + %tmp15 = icmp sgt i32 %tmp14, 0 + br i1 %tmp15, label %not_zero11, label %not_zero11.postloop + +not_zero11: + %v_1 = phi i32 [ %tmp22, %not_zero11 ], [ 0, %not_zero11.preheader ] + %tmp16 = zext i32 %v_1 to i64 + %tmp17 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp16 + %tmp18 = load i8, i8* %tmp17, align 1 + %tmp19 = zext i8 %tmp18 to i32 + %tmp20 = or i32 %tmp19, %tmp10 + %tmp21 = trunc i32 %tmp20 to i8 + %addr22 = getelementptr inbounds i8, i8* %tmp12, i64 %tmp16 + %tmp22 = add nuw nsw i32 %v_1, 1 + %tmp23 = icmp slt i32 %tmp22, %tmp14 + br i1 %tmp23, label %not_zero11, label %main.exit.selector + main.exit.selector: ; CHECK-LABEL: main.exit.selector: ; CHECK: %tmp24 = icmp slt i32 %tmp14, %length.i