diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index ec51ad71abc5..48d8e457ba7c 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -145,6 +145,7 @@ class IndVarSimplify {
   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
   bool rewriteFirstIterationLoopExitValues(Loop *L);
+  bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
 
   bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
                                  PHINode *IndVar, SCEVExpander &Rewriter);
@@ -524,6 +525,29 @@ struct RewritePhi {
 // As a side effect, reduces the amount of IV processing within the loop.
 //===----------------------------------------------------------------------===//
 
+bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
+  SmallPtrSet<const Instruction *, 8> Visited;
+  SmallVector<const Instruction *, 8> WorkList;
+  Visited.insert(I);
+  WorkList.push_back(I);
+  while (!WorkList.empty()) {
+    const Instruction *Curr = WorkList.pop_back_val();
+    // This use is outside the loop, nothing to do.
+    if (!L->contains(Curr))
+      continue;
+    // Do we assume it is a "hard" use which will not be eliminated easily?
+    if (Curr->mayHaveSideEffects())
+      return true;
+    // Otherwise, add all its users to worklist.
+    for (auto U : Curr->users()) {
+      auto *UI = cast<Instruction>(U);
+      if (Visited.insert(UI).second)
+        WorkList.push_back(UI);
+    }
+  }
+  return false;
+}
+
 /// Check to see if this loop has a computable loop-invariant execution count.
 /// If so, this means that we can compute the final value of any expressions
 /// that are recurrent in the loop, and substitute the exit values from the loop
@@ -598,19 +622,8 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
         // Computing the value outside of the loop brings no benefit if it is
         // definitely used inside the loop in a way which can not be optimized
         // away.
-        if (ExitValue->getSCEVType()>=scMulExpr) {
-          bool HasHardInternalUses = false;
-          for (auto *IB : Inst->users()) {
-            Instruction *UseInstr = cast<Instruction>(IB);
-            unsigned Opc = UseInstr->getOpcode();
-            if (L->contains(UseInstr) && Opc == Instruction::Call) {
-              HasHardInternalUses = true;
-              break;
-            }
-          }
-          if (HasHardInternalUses)
-            continue;
-        }
+        if (!isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst))
+          continue;
 
         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
diff --git a/llvm/test/Analysis/ScalarEvolution/pr28705.ll b/llvm/test/Analysis/ScalarEvolution/pr28705.ll
index 8fbc08e3ca63..9a8487a6c662 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr28705.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr28705.ll
@@ -1,11 +1,11 @@
 ; PR28705
 ; RUN: opt < %s -indvars -S | FileCheck %s
 
-; Check IndVarSimplify replaces the exitval use of the induction var "%inc.i.i"
-; with "%.sroa.speculated + 1".
+; Check IndVarSimplify doesn't replace external use of the induction var
+; "%inc.i.i" with "%.sroa.speculated + 1" because it is not profitable.
 ;
 ; CHECK-LABEL: @foo(
-; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1
+; CHECK: %[[EXIT:.+]] = phi i32 [ %inc.i.i, %for.body650 ]
 ; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ]
 ;
 define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr {
diff --git a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll b/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll
index c87cd6596c62..22087710a9ca 100644
--- a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll
+++ b/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll
@@ -123,3 +123,54 @@ for.end:                                          ; preds = %for.body
   tail call void @func(i32 %soft_use)
   ret void
 }
+
+; CHECK-LABEL: @test5(
+define void @test5(i32 %m) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %add = add i32 %a.05, %m
+  %soft_use = add i32 %add, 123
+; CHECK: tail call void @func(i32 %soft_use)
+  tail call void @func(i32 %soft_use)
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 186
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+; CHECK: for.end:
+; CHECK-NOT: mul i32 %m, 186
+; CHECK:%add.lcssa = phi i32 [ %add, %for.body ]
+; CHECK-NEXT: tail call void @func(i32 %add.lcssa)
+  tail call void @func(i32 %add)
+  ret void
+}
+
+; CHECK-LABEL: @test6(
+define void @test6(i32 %m, i32* %p) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %add = add i32 %a.05, %m
+  %soft_use = add i32 %add, 123
+; CHECK: store i32 %soft_use, i32* %pidx
+  %pidx = getelementptr i32, i32* %p, i32 %add
+  store i32 %soft_use, i32* %pidx
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 186
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+; CHECK: for.end:
+; CHECK-NOT: mul i32 %m, 186
+; CHECK:%add.lcssa = phi i32 [ %add, %for.body ]
+; CHECK-NEXT: tail call void @func(i32 %add.lcssa)
+  tail call void @func(i32 %add)
+  ret void
+}
diff --git a/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll b/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll
index 961c9fd944d9..fff76675f175 100644
--- a/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll
@@ -1,5 +1,7 @@
 ; RUN: opt -S -indvars < %s | FileCheck %s
 
+; Do not rewrite the user outside the loop because we must keep the instruction
+; inside the loop due to store. Rewrite doesn't give us any profit.
 define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) {
 ; CHECK-LABEL: @f(
 not_zero11.preheader:
@@ -22,6 +24,42 @@ not_zero11:
   %tmp23 = icmp slt i32 %tmp22, %tmp14
   br i1 %tmp23, label %not_zero11, label %main.exit.selector
 
+main.exit.selector:
+; CHECK-LABEL: main.exit.selector:
+; CHECK:   %tmp22.lcssa = phi i32 [ %tmp22, %not_zero11 ]
+; CHECK:   %tmp24 = icmp slt i32 %tmp22.lcssa, %length.
+  %tmp24 = icmp slt i32 %tmp22, %length.i
+  br i1 %tmp24, label %not_zero11.postloop, label %leave
+
+leave:
+  ret void
+
+not_zero11.postloop:
+  ret void
+}
+
+; Rewrite the user outside the loop because there is no hard users inside the loop.
+define void @f1(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) {
+; CHECK-LABEL: @f1(
+not_zero11.preheader:
+  %tmp13 = icmp ugt i32 %length.i, %length.i.88
+  %tmp14 = select i1 %tmp13, i32 %length.i.88, i32 %length.i
+  %tmp15 = icmp sgt i32 %tmp14, 0
+  br i1 %tmp15, label %not_zero11, label %not_zero11.postloop
+
+not_zero11:
+  %v_1 = phi i32 [ %tmp22, %not_zero11 ], [ 0, %not_zero11.preheader ]
+  %tmp16 = zext i32 %v_1 to i64
+  %tmp17 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp16
+  %tmp18 = load i8, i8* %tmp17, align 1
+  %tmp19 = zext i8 %tmp18 to i32
+  %tmp20 = or i32 %tmp19, %tmp10
+  %tmp21 = trunc i32 %tmp20 to i8
+  %addr22 = getelementptr inbounds i8, i8* %tmp12, i64 %tmp16
+  %tmp22 = add nuw nsw i32 %v_1, 1
+  %tmp23 = icmp slt i32 %tmp22, %tmp14
+  br i1 %tmp23, label %not_zero11, label %main.exit.selector
+
 main.exit.selector:
 ; CHECK-LABEL: main.exit.selector:
 ; CHECK: %tmp24 = icmp slt i32 %tmp14, %length.i