[DSE] Move isOverwrite into DSEState. NFC

This moves the isOverwrite function into the DSEState so that it can share the analyses and members from the state. A few extra loop tests were also added to test stores in and around multi block loops for D100464.
2021-05-14 09:16:51 +01:00 · 2021-05-14 09:16:51 +01:00 · f7cb654763
parent c82a0ae70e
commit f7cb654763
2 changed files with 468 additions and 129 deletions
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@ -355,125 +355,6 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later,
  return OW_Complete;
 }

-/// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
-/// instruction) completely overwrites a store to the 'Earlier' location.
-/// (by \p EarlierI instruction).
-/// Return OW_MaybePartial if \p Later does not completely overwrite
-/// \p Earlier, but they both write to the same underlying object. In that
-/// case, use isPartialOverwrite to check if \p Later partially overwrites
-/// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
-static OverwriteResult
-isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
-            const MemoryLocation &Later, const MemoryLocation &Earlier,
-            const DataLayout &DL, const TargetLibraryInfo &TLI,
-            int64_t &EarlierOff, int64_t &LaterOff, BatchAAResults &AA,
-            const Function *F) {
-  // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
-  // get imprecise values here, though (except for unknown sizes).
-  if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
-    // In case no constant size is known, try to an IR values for the number
-    // of bytes written and check if they match.
-    const auto *LaterMemI = dyn_cast<MemIntrinsic>(LaterI);
-    const auto *EarlierMemI = dyn_cast<MemIntrinsic>(EarlierI);
-    if (LaterMemI && EarlierMemI) {
-      const Value *LaterV = LaterMemI->getLength();
-      const Value *EarlierV = EarlierMemI->getLength();
-      if (LaterV == EarlierV && AA.isMustAlias(Earlier, Later))
-        return OW_Complete;
-    }
-
-    // Masked stores have imprecise locations, but we can reason about them
-    // to some extent.
-    return isMaskedStoreOverwrite(LaterI, EarlierI, AA);
-  }
-
-  const uint64_t LaterSize = Later.Size.getValue();
-  const uint64_t EarlierSize = Earlier.Size.getValue();
-
-  // Query the alias information
-  AliasResult AAR = AA.alias(Later, Earlier);
-
-  // If the start pointers are the same, we just have to compare sizes to see if
-  // the later store was larger than the earlier store.
-  if (AAR == AliasResult::MustAlias) {
-    // Make sure that the Later size is >= the Earlier size.
-    if (LaterSize >= EarlierSize)
-      return OW_Complete;
-  }
-
-  // If we hit a partial alias we may have a full overwrite
-  if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
-    int32_t Off = AAR.getOffset();
-    if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize)
-      return OW_Complete;
-  }
-
-  // Check to see if the later store is to the entire object (either a global,
-  // an alloca, or a byval/inalloca argument).  If so, then it clearly
-  // overwrites any other store to the same object.
-  const Value *P1 = Earlier.Ptr->stripPointerCasts();
-  const Value *P2 = Later.Ptr->stripPointerCasts();
-  const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
-
-  // If we can't resolve the same pointers to the same object, then we can't
-  // analyze them at all.
-  if (UO1 != UO2)
-    return OW_Unknown;
-
-  // If the "Later" store is to a recognizable object, get its size.
-  uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F);
-  if (ObjectSize != MemoryLocation::UnknownSize)
-    if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
-      return OW_Complete;
-
-  // Okay, we have stores to two completely different pointers.  Try to
-  // decompose the pointer into a "base + constant_offset" form.  If the base
-  // pointers are equal, then we can reason about the two stores.
-  EarlierOff = 0;
-  LaterOff = 0;
-  const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
-  const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
-
-  // If the base pointers still differ, we have two completely different stores.
-  if (BP1 != BP2)
-    return OW_Unknown;
-
-  // The later access completely overlaps the earlier store if and only if
-  // both start and end of the earlier one is "inside" the later one:
-  //    |<->|--earlier--|<->|
-  //    |-------later-------|
-  // Accesses may overlap if and only if start of one of them is "inside"
-  // another one:
-  //    |<->|--earlier--|<----->|
-  //    |-------later-------|
-  //           OR
-  //    |----- earlier -----|
-  //    |<->|---later---|<----->|
-  //
-  // We have to be careful here as *Off is signed while *.Size is unsigned.
-
-  // Check if the earlier access starts "not before" the later one.
-  if (EarlierOff >= LaterOff) {
-    // If the earlier access ends "not after" the later access then the earlier
-    // one is completely overwritten by the later one.
-    if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
-      return OW_Complete;
-    // If start of the earlier access is "before" end of the later access then
-    // accesses overlap.
-    else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
-      return OW_MaybePartial;
-  }
-  // If start of the later access is "before" end of the earlier access then
-  // accesses overlap.
-  else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
-    return OW_MaybePartial;
-  }
-
-  // Can reach here only if accesses are known not to overlap. There is no
-  // dedicated code to indicate no overlap so signal "unknown".
-  return OW_Unknown;
-}
-
 /// Return 'OW_Complete' if a store to the 'Later' location completely
 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
@ -1033,6 +914,123 @@ struct DSEState {
    return State;
  }

+  /// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
+  /// instruction) completely overwrites a store to the 'Earlier' location.
+  /// (by \p EarlierI instruction).
+  /// Return OW_MaybePartial if \p Later does not completely overwrite
+  /// \p Earlier, but they both write to the same underlying object. In that
+  /// case, use isPartialOverwrite to check if \p Later partially overwrites
+  /// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
+  OverwriteResult
+  isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
+              const MemoryLocation &Later, const MemoryLocation &Earlier,
+              int64_t &EarlierOff, int64_t &LaterOff) {
+    // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
+    // get imprecise values here, though (except for unknown sizes).
+    if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
+      // In case no constant size is known, try to an IR values for the number
+      // of bytes written and check if they match.
+      const auto *LaterMemI = dyn_cast<MemIntrinsic>(LaterI);
+      const auto *EarlierMemI = dyn_cast<MemIntrinsic>(EarlierI);
+      if (LaterMemI && EarlierMemI) {
+        const Value *LaterV = LaterMemI->getLength();
+        const Value *EarlierV = EarlierMemI->getLength();
+        if (LaterV == EarlierV && BatchAA.isMustAlias(Earlier, Later))
+          return OW_Complete;
+      }
+
+      // Masked stores have imprecise locations, but we can reason about them
+      // to some extent.
+      return isMaskedStoreOverwrite(LaterI, EarlierI, BatchAA);
+    }
+
+    const uint64_t LaterSize = Later.Size.getValue();
+    const uint64_t EarlierSize = Earlier.Size.getValue();
+
+    // Query the alias information
+    AliasResult AAR = BatchAA.alias(Later, Earlier);
+
+    // If the start pointers are the same, we just have to compare sizes to see if
+    // the later store was larger than the earlier store.
+    if (AAR == AliasResult::MustAlias) {
+      // Make sure that the Later size is >= the Earlier size.
+      if (LaterSize >= EarlierSize)
+        return OW_Complete;
+    }
+
+    // If we hit a partial alias we may have a full overwrite
+    if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
+      int32_t Off = AAR.getOffset();
+      if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize)
+        return OW_Complete;
+    }
+
+    // Check to see if the later store is to the entire object (either a global,
+    // an alloca, or a byval/inalloca argument).  If so, then it clearly
+    // overwrites any other store to the same object.
+    const Value *P1 = Earlier.Ptr->stripPointerCasts();
+    const Value *P2 = Later.Ptr->stripPointerCasts();
+    const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
+
+    // If we can't resolve the same pointers to the same object, then we can't
+    // analyze them at all.
+    if (UO1 != UO2)
+      return OW_Unknown;
+
+    // If the "Later" store is to a recognizable object, get its size.
+    uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, &F);
+    if (ObjectSize != MemoryLocation::UnknownSize)
+      if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
+        return OW_Complete;
+
+    // Okay, we have stores to two completely different pointers.  Try to
+    // decompose the pointer into a "base + constant_offset" form.  If the base
+    // pointers are equal, then we can reason about the two stores.
+    EarlierOff = 0;
+    LaterOff = 0;
+    const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
+    const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
+
+    // If the base pointers still differ, we have two completely different stores.
+    if (BP1 != BP2)
+      return OW_Unknown;
+
+    // The later access completely overlaps the earlier store if and only if
+    // both start and end of the earlier one is "inside" the later one:
+    //    |<->|--earlier--|<->|
+    //    |-------later-------|
+    // Accesses may overlap if and only if start of one of them is "inside"
+    // another one:
+    //    |<->|--earlier--|<----->|
+    //    |-------later-------|
+    //           OR
+    //    |----- earlier -----|
+    //    |<->|---later---|<----->|
+    //
+    // We have to be careful here as *Off is signed while *.Size is unsigned.
+
+    // Check if the earlier access starts "not before" the later one.
+    if (EarlierOff >= LaterOff) {
+      // If the earlier access ends "not after" the later access then the earlier
+      // one is completely overwritten by the later one.
+      if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
+        return OW_Complete;
+      // If start of the earlier access is "before" end of the later access then
+      // accesses overlap.
+      else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
+        return OW_MaybePartial;
+    }
+    // If start of the later access is "before" end of the earlier access then
+    // accesses overlap.
+    else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
+      return OW_MaybePartial;
+    }
+
+    // Can reach here only if accesses are known not to overlap. There is no
+    // dedicated code to indicate no overlap so signal "unknown".
+    return OW_Unknown;
+  }
+
  bool isInvisibleToCallerAfterRet(const Value *V) {
    if (isa<AllocaInst>(V))
      return true;
@ -1120,8 +1118,8 @@ struct DSEState {

    int64_t InstWriteOffset, DepWriteOffset;
    if (auto CC = getLocForWriteEx(UseInst))
-      return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset,
-                         InstWriteOffset, BatchAA, &F) == OW_Complete;
+      return isOverwrite(UseInst, DefInst, *CC, DefLoc, DepWriteOffset,
+                         InstWriteOffset) == OW_Complete;
    return false;
  }

@ -1224,9 +1222,8 @@ struct DSEState {
      return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
    }
    int64_t InstWriteOffset, DepWriteOffset;
-    return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI,
-                       DepWriteOffset, InstWriteOffset, BatchAA,
-                       &F) == OW_Complete;
+    return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DepWriteOffset,
+                       InstWriteOffset) == OW_Complete;
  }

  // Returns true if \p Use may read from \p DefLoc.
@ -1422,8 +1419,8 @@ struct DSEState {
        continue;
      } else {
        int64_t InstWriteOffset, DepWriteOffset;
-        auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI,
-                              DepWriteOffset, InstWriteOffset, BatchAA, &F);
+        auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc,
+                              DepWriteOffset, InstWriteOffset);
        // If Current does not write to the same object as KillingDef, check
        // the next candidate.
        if (OR == OW_Unknown) {
@ -1940,9 +1937,8 @@ bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
      } else {
        // Check if NI overwrites SI.
        int64_t InstWriteOffset, DepWriteOffset;
-        OverwriteResult OR =
-            isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset,
-                        InstWriteOffset, State.BatchAA, &F);
+        OverwriteResult OR = State.isOverwrite(SI, NI, SILoc, NILoc,
+                                               DepWriteOffset, InstWriteOffset);
        if (OR == OW_MaybePartial) {
          auto Iter = State.IOLs.insert(
              std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
--- a/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll
@ -356,3 +356,346 @@ if.end10:                                         ; preds = %do.body
  store i16 1, i16* %arrayidx2, align 1
  ret i16 0
 }
+
+; Similar to above, but with an irreducible loop. The stores should not be removed.
+define i16 @irreducible(i1 %c) {
+; CHECK-LABEL: @irreducible(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[A:%.*]], label [[B:%.*]]
+; CHECK:       A:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[B]] ]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]]
+; CHECK-NEXT:    br label [[B]]
+; CHECK:       B:
+; CHECK-NEXT:    [[J_0:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[I_0]], [[A]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[J_0]]
+; CHECK-NEXT:    store i16 2, i16* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[J_0]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i16 [[J_0]], 4
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[A]]
+; CHECK:       exit:
+; CHECK-NEXT:    store i16 1, i16* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  br i1 %c, label %A, label %B
+
+A:
+  %i.0 = phi i16 [ 0, %entry ], [ %inc, %B ]
+  %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0
+  br label %B
+
+B:
+  %j.0 = phi i16 [ 0, %entry ], [ %i.0, %A ]
+  %arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %j.0
+  store i16 2, i16* %arrayidx, align 1
+  %inc = add nuw nsw i16 %j.0, 1
+  %exitcond = icmp eq i16 %j.0, 4
+  br i1 %exitcond, label %exit, label %A
+
+exit:
+  store i16 1, i16* %arrayidx, align 1
+  ret i16 0
+}
+
+; An irreducible loop inside another loop.
+define i16 @irreducible_nested() {
+; CHECK-LABEL: @irreducible_nested(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[OUTER:%.*]]
+; CHECK:       outer:
+; CHECK-NEXT:    [[X:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INCX:%.*]], [[OUTERL:%.*]] ]
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i16 [[X]], 2
+; CHECK-NEXT:    br i1 [[C]], label [[A:%.*]], label [[B:%.*]]
+; CHECK:       A:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i16 [ 0, [[OUTER]] ], [ [[INC:%.*]], [[B]] ]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]]
+; CHECK-NEXT:    br label [[B]]
+; CHECK:       B:
+; CHECK-NEXT:    [[J_0:%.*]] = phi i16 [ 0, [[OUTER]] ], [ [[I_0]], [[A]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[J_0]]
+; CHECK-NEXT:    store i16 2, i16* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[J_0]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i16 [[J_0]], 4
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[OUTERL]], label [[A]]
+; CHECK:       outerl:
+; CHECK-NEXT:    store i16 1, i16* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[INCX]] = add nuw nsw i16 [[X]], 1
+; CHECK-NEXT:    [[EXITCONDX:%.*]] = icmp eq i16 [[X]], 4
+; CHECK-NEXT:    br i1 [[EXITCONDX]], label [[END:%.*]], label [[OUTER]]
+; CHECK:       end:
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  br label %outer
+
+outer:
+  %x = phi i16 [ 0, %entry ], [ %incx, %outerl ]
+  %c = icmp sgt i16 %x, 2
+  br i1 %c, label %A, label %B
+
+A:
+  %i.0 = phi i16 [ 0, %outer ], [ %inc, %B ]
+  %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0
+  br label %B
+
+B:
+  %j.0 = phi i16 [ 0, %outer ], [ %i.0, %A ]
+  %arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %j.0
+  store i16 2, i16* %arrayidx, align 1
+  %inc = add nuw nsw i16 %j.0, 1
+  %exitcond = icmp eq i16 %j.0, 4
+  br i1 %exitcond, label %outerl, label %A
+
+outerl:
+  store i16 1, i16* %arrayidx, align 1
+  %incx = add nuw nsw i16 %x, 1
+  %exitcondx = icmp eq i16 %x, 4
+  br i1 %exitcondx, label %end, label %outer
+
+end:
+  ret i16 0
+}
+
+define i16 @multi_overwrite(i1 %cond) {
+; CHECK-LABEL: @multi_overwrite(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END2:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]]
+; CHECK-NEXT:    store i16 2, i16* [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[DO_STORE:%.*]], label [[IF_END2]]
+; CHECK:       do.store:
+; CHECK-NEXT:    store i16 3, i16* [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    br label [[IF_END2]]
+; CHECK:       if.end2:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[I_0]], 1
+; CHECK-NEXT:    br label [[DO_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store i16 1, i16* [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  br label %do.body
+
+do.body:
+  %i.0 = phi i16 [ 0, %entry ], [ %inc, %if.end2 ]
+  %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0
+  store i16 2, i16* %arrayidx2, align 1
+  %exitcond = icmp eq i16 %i.0, 4
+  br i1 %exitcond, label %exit, label %if.end
+
+if.end:
+  br i1 %cond, label %do.store, label %if.end2
+
+do.store:
+  store i16 3, i16* %arrayidx2, align 1
+  br label %if.end2
+
+if.end2:
+  %inc = add nuw nsw i16 %i.0, 1
+  br label %do.body
+
+exit:
+  store i16 1, i16* %arrayidx2, align 1
+  ret i16 0
+}
+
+define void @test(i8* noalias %data1, i8* %data2, i16* %data3, i32 %i1)
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[I1:%.*]], 0
+; CHECK-NEXT:    br label [[PH0:%.*]]
+; CHECK:       ph0:
+; CHECK-NEXT:    br label [[HEADER0:%.*]]
+; CHECK:       header0:
+; CHECK-NEXT:    [[P1:%.*]] = phi i32 [ 0, [[PH0]] ], [ [[PN1:%.*]], [[END1:%.*]] ]
+; CHECK-NEXT:    [[PN1]] = add i32 [[P1]], 1
+; CHECK-NEXT:    [[PC1:%.*]] = icmp slt i32 [[PN1]], 5
+; CHECK-NEXT:    [[V2:%.*]] = getelementptr [10 x i16], [10 x i16]* @x, i32 0, i32 [[P1]]
+; CHECK-NEXT:    store i16 1, i16* [[V2]], align 2
+; CHECK-NEXT:    br i1 [[C]], label [[THEN1:%.*]], label [[ELSE1:%.*]]
+; CHECK:       then1:
+; CHECK-NEXT:    store i16 2, i16* [[V2]], align 2
+; CHECK-NEXT:    br label [[END1]]
+; CHECK:       else1:
+; CHECK-NEXT:    br label [[END1]]
+; CHECK:       end1:
+; CHECK-NEXT:    br i1 [[PC1]], label [[HEADER0]], label [[END0:%.*]]
+; CHECK:       end0:
+; CHECK-NEXT:    br label [[HEADER2:%.*]]
+; CHECK:       header2:
+; CHECK-NEXT:    [[P3:%.*]] = phi i32 [ 0, [[END0]] ], [ [[PN3:%.*]], [[HEADER2]] ]
+; CHECK-NEXT:    [[PN3]] = add i32 [[P3]], 1
+; CHECK-NEXT:    [[PC3:%.*]] = icmp slt i32 [[PN3]], 5
+; CHECK-NEXT:    store i16 4, i16* [[V2]], align 2
+; CHECK-NEXT:    br i1 [[PC3]], label [[HEADER2]], label [[END2:%.*]]
+; CHECK:       end2:
+; CHECK-NEXT:    ret void
+;
+{
+  %c = icmp eq i32 %i1, 0
+  br label %ph0
+ph0:
+  br label %header0
+header0:
+  %p1 = phi i32 [0, %ph0], [%pn1, %end1]
+  %pn1 = add i32 %p1, 1
+  %pc1 = icmp slt i32 %pn1, 5
+  %v2 = getelementptr [10 x i16], [10 x i16]* @x, i32 0, i32 %p1
+  store i16 1, i16* %v2
+  br i1 %c, label %then1, label %else1
+then1:
+  store i16 2, i16* %v2
+  br label %end1
+else1:
+  br label %end1
+end1:
+  br i1 %pc1, label %header0, label %end0
+end0:
+  br label %header2
+header2:
+  %p3 = phi i32 [0, %end0], [%pn3, %header2]
+  %pn3 = add i32 %p3, 1
+  %pc3 = icmp slt i32 %pn3, 5
+  store i16 4, i16* %v2
+  br i1 %pc3, label %header2, label %end2
+end2:
+  ret void
+}
+
+; Similar to above, but with multiple partial overlaps
+define i16 @partial_override_fromloop(i1 %c, i32 %i) {
+; CHECK-LABEL: @partial_override_fromloop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END2:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]]
+; CHECK-NEXT:    store i16 2, i16* [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[DO_STORE:%.*]], label [[IF_END2]]
+; CHECK:       do.store:
+; CHECK-NEXT:    store i16 3, i16* [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    br label [[IF_END2]]
+; CHECK:       if.end2:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[I_0]], 1
+; CHECK-NEXT:    br label [[DO_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i16* [[ARRAYIDX2]] to i8*
+; CHECK-NEXT:    [[BC2:%.*]] = getelementptr inbounds i8, i8* [[BC]], i32 1
+; CHECK-NEXT:    store i8 10, i8* [[BC]], align 1
+; CHECK-NEXT:    store i8 11, i8* [[BC2]], align 1
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  br label %do.body
+
+do.body:
+  %i.0 = phi i16 [ 0, %entry ], [ %inc, %if.end2 ]
+  %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0
+  store i16 2, i16* %arrayidx2, align 1
+  %exitcond = icmp eq i16 %i.0, 4
+  br i1 %exitcond, label %exit, label %if.end
+
+if.end:
+  br i1 %c, label %do.store, label %if.end2
+
+do.store:
+  store i16 3, i16* %arrayidx2, align 1
+  br label %if.end2
+
+if.end2:
+  %inc = add nuw nsw i16 %i.0, 1
+  br label %do.body
+
+exit:
+  %bc = bitcast i16* %arrayidx2 to i8*
+  %bc2 = getelementptr inbounds i8, i8* %bc, i32 1
+  store i8 10, i8* %bc, align 1
+  store i8 11, i8* %bc2, align 1
+  ret i16 0
+}
+
+
+define i16 @partial_override_overloop(i1 %c, i32 %i) {
+; CHECK-LABEL: @partial_override_overloop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i32 [[I:%.*]]
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]]
+; CHECK-NEXT:    store i16 2, i16* [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4
+; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[I_0]], 1
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[DO_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i16* [[ARRAYIDX]] to i8*
+; CHECK-NEXT:    [[BC2:%.*]] = getelementptr inbounds i8, i8* [[BC]], i32 1
+; CHECK-NEXT:    store i8 10, i8* [[BC]], align 1
+; CHECK-NEXT:    store i8 11, i8* [[BC2]], align 1
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  %arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i32 %i
+  store i16 1, i16* %arrayidx, align 1
+  br label %do.body
+
+do.body:
+  %i.0 = phi i16 [ 0, %entry ], [ %inc, %do.body ]
+  %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0
+  store i16 2, i16* %arrayidx2, align 1
+  %exitcond = icmp eq i16 %i.0, 4
+  %inc = add nuw nsw i16 %i.0, 1
+  br i1 %exitcond, label %exit, label %do.body
+
+exit:
+  %bc = bitcast i16* %arrayidx to i8*
+  %bc2 = getelementptr inbounds i8, i8* %bc, i32 1
+  store i8 10, i8* %bc, align 1
+  store i8 11, i8* %bc2, align 1
+  ret i16 0
+}
+
+define i16 @partial_override_multi(i1 %c, i32 %i) {
+; CHECK-LABEL: @partial_override_multi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]]
+; CHECK-NEXT:    store i16 10, i16* [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i16* [[ARRAYIDX2]] to i8*
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4
+; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[I_0]], 1
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[DO_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[BC2:%.*]] = getelementptr inbounds i8, i8* [[BC]], i32 1
+; CHECK-NEXT:    store i8 11, i8* [[BC2]], align 1
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  br label %do.body
+
+do.body:
+  %i.0 = phi i16 [ 0, %entry ], [ %inc, %do.body ]
+  %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0
+  store i16 2, i16* %arrayidx2, align 1
+  %bc = bitcast i16* %arrayidx2 to i8*
+  store i8 10, i8* %bc, align 1
+  %exitcond = icmp eq i16 %i.0, 4
+  %inc = add nuw nsw i16 %i.0, 1
+  br i1 %exitcond, label %exit, label %do.body
+
+exit:
+  %bc2 = getelementptr inbounds i8, i8* %bc, i32 1
+  store i8 11, i8* %bc2, align 1
+  ret i16 0
+}
+