diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 6922d84db4e9..5d4a88871951 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -355,125 +355,6 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later, return OW_Complete; } -/// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI -/// instruction) completely overwrites a store to the 'Earlier' location. -/// (by \p EarlierI instruction). -/// Return OW_MaybePartial if \p Later does not completely overwrite -/// \p Earlier, but they both write to the same underlying object. In that -/// case, use isPartialOverwrite to check if \p Later partially overwrites -/// \p Earlier. Returns 'OW_Unknown' if nothing can be determined. -static OverwriteResult -isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, - const MemoryLocation &Later, const MemoryLocation &Earlier, - const DataLayout &DL, const TargetLibraryInfo &TLI, - int64_t &EarlierOff, int64_t &LaterOff, BatchAAResults &AA, - const Function *F) { - // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll - // get imprecise values here, though (except for unknown sizes). - if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { - // In case no constant size is known, try to an IR values for the number - // of bytes written and check if they match. - const auto *LaterMemI = dyn_cast(LaterI); - const auto *EarlierMemI = dyn_cast(EarlierI); - if (LaterMemI && EarlierMemI) { - const Value *LaterV = LaterMemI->getLength(); - const Value *EarlierV = EarlierMemI->getLength(); - if (LaterV == EarlierV && AA.isMustAlias(Earlier, Later)) - return OW_Complete; - } - - // Masked stores have imprecise locations, but we can reason about them - // to some extent. - return isMaskedStoreOverwrite(LaterI, EarlierI, AA); - } - - const uint64_t LaterSize = Later.Size.getValue(); - const uint64_t EarlierSize = Earlier.Size.getValue(); - - // Query the alias information - AliasResult AAR = AA.alias(Later, Earlier); - - // If the start pointers are the same, we just have to compare sizes to see if - // the later store was larger than the earlier store. - if (AAR == AliasResult::MustAlias) { - // Make sure that the Later size is >= the Earlier size. - if (LaterSize >= EarlierSize) - return OW_Complete; - } - - // If we hit a partial alias we may have a full overwrite - if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) { - int32_t Off = AAR.getOffset(); - if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize) - return OW_Complete; - } - - // Check to see if the later store is to the entire object (either a global, - // an alloca, or a byval/inalloca argument). If so, then it clearly - // overwrites any other store to the same object. - const Value *P1 = Earlier.Ptr->stripPointerCasts(); - const Value *P2 = Later.Ptr->stripPointerCasts(); - const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2); - - // If we can't resolve the same pointers to the same object, then we can't - // analyze them at all. - if (UO1 != UO2) - return OW_Unknown; - - // If the "Later" store is to a recognizable object, get its size. - uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F); - if (ObjectSize != MemoryLocation::UnknownSize) - if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) - return OW_Complete; - - // Okay, we have stores to two completely different pointers. Try to - // decompose the pointer into a "base + constant_offset" form. If the base - // pointers are equal, then we can reason about the two stores. - EarlierOff = 0; - LaterOff = 0; - const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); - const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); - - // If the base pointers still differ, we have two completely different stores. - if (BP1 != BP2) - return OW_Unknown; - - // The later access completely overlaps the earlier store if and only if - // both start and end of the earlier one is "inside" the later one: - // |<->|--earlier--|<->| - // |-------later-------| - // Accesses may overlap if and only if start of one of them is "inside" - // another one: - // |<->|--earlier--|<----->| - // |-------later-------| - // OR - // |----- earlier -----| - // |<->|---later---|<----->| - // - // We have to be careful here as *Off is signed while *.Size is unsigned. - - // Check if the earlier access starts "not before" the later one. - if (EarlierOff >= LaterOff) { - // If the earlier access ends "not after" the later access then the earlier - // one is completely overwritten by the later one. - if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) - return OW_Complete; - // If start of the earlier access is "before" end of the later access then - // accesses overlap. - else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize) - return OW_MaybePartial; - } - // If start of the later access is "before" end of the earlier access then - // accesses overlap. - else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) { - return OW_MaybePartial; - } - - // Can reach here only if accesses are known not to overlap. There is no - // dedicated code to indicate no overlap so signal "unknown". - return OW_Unknown; -} - /// Return 'OW_Complete' if a store to the 'Later' location completely /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the @@ -1033,6 +914,123 @@ struct DSEState { return State; } + /// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI + /// instruction) completely overwrites a store to the 'Earlier' location. + /// (by \p EarlierI instruction). + /// Return OW_MaybePartial if \p Later does not completely overwrite + /// \p Earlier, but they both write to the same underlying object. In that + /// case, use isPartialOverwrite to check if \p Later partially overwrites + /// \p Earlier. Returns 'OW_Unknown' if nothing can be determined. + OverwriteResult + isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, + const MemoryLocation &Later, const MemoryLocation &Earlier, + int64_t &EarlierOff, int64_t &LaterOff) { + // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll + // get imprecise values here, though (except for unknown sizes). + if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { + // In case no constant size is known, try to an IR values for the number + // of bytes written and check if they match. + const auto *LaterMemI = dyn_cast(LaterI); + const auto *EarlierMemI = dyn_cast(EarlierI); + if (LaterMemI && EarlierMemI) { + const Value *LaterV = LaterMemI->getLength(); + const Value *EarlierV = EarlierMemI->getLength(); + if (LaterV == EarlierV && BatchAA.isMustAlias(Earlier, Later)) + return OW_Complete; + } + + // Masked stores have imprecise locations, but we can reason about them + // to some extent. + return isMaskedStoreOverwrite(LaterI, EarlierI, BatchAA); + } + + const uint64_t LaterSize = Later.Size.getValue(); + const uint64_t EarlierSize = Earlier.Size.getValue(); + + // Query the alias information + AliasResult AAR = BatchAA.alias(Later, Earlier); + + // If the start pointers are the same, we just have to compare sizes to see if + // the later store was larger than the earlier store. + if (AAR == AliasResult::MustAlias) { + // Make sure that the Later size is >= the Earlier size. + if (LaterSize >= EarlierSize) + return OW_Complete; + } + + // If we hit a partial alias we may have a full overwrite + if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) { + int32_t Off = AAR.getOffset(); + if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize) + return OW_Complete; + } + + // Check to see if the later store is to the entire object (either a global, + // an alloca, or a byval/inalloca argument). If so, then it clearly + // overwrites any other store to the same object. + const Value *P1 = Earlier.Ptr->stripPointerCasts(); + const Value *P2 = Later.Ptr->stripPointerCasts(); + const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2); + + // If we can't resolve the same pointers to the same object, then we can't + // analyze them at all. + if (UO1 != UO2) + return OW_Unknown; + + // If the "Later" store is to a recognizable object, get its size. + uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, &F); + if (ObjectSize != MemoryLocation::UnknownSize) + if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) + return OW_Complete; + + // Okay, we have stores to two completely different pointers. Try to + // decompose the pointer into a "base + constant_offset" form. If the base + // pointers are equal, then we can reason about the two stores. + EarlierOff = 0; + LaterOff = 0; + const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); + const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); + + // If the base pointers still differ, we have two completely different stores. + if (BP1 != BP2) + return OW_Unknown; + + // The later access completely overlaps the earlier store if and only if + // both start and end of the earlier one is "inside" the later one: + // |<->|--earlier--|<->| + // |-------later-------| + // Accesses may overlap if and only if start of one of them is "inside" + // another one: + // |<->|--earlier--|<----->| + // |-------later-------| + // OR + // |----- earlier -----| + // |<->|---later---|<----->| + // + // We have to be careful here as *Off is signed while *.Size is unsigned. + + // Check if the earlier access starts "not before" the later one. + if (EarlierOff >= LaterOff) { + // If the earlier access ends "not after" the later access then the earlier + // one is completely overwritten by the later one. + if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) + return OW_Complete; + // If start of the earlier access is "before" end of the later access then + // accesses overlap. + else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize) + return OW_MaybePartial; + } + // If start of the later access is "before" end of the earlier access then + // accesses overlap. + else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) { + return OW_MaybePartial; + } + + // Can reach here only if accesses are known not to overlap. There is no + // dedicated code to indicate no overlap so signal "unknown". + return OW_Unknown; + } + bool isInvisibleToCallerAfterRet(const Value *V) { if (isa(V)) return true; @@ -1120,8 +1118,8 @@ struct DSEState { int64_t InstWriteOffset, DepWriteOffset; if (auto CC = getLocForWriteEx(UseInst)) - return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset, - InstWriteOffset, BatchAA, &F) == OW_Complete; + return isOverwrite(UseInst, DefInst, *CC, DefLoc, DepWriteOffset, + InstWriteOffset) == OW_Complete; return false; } @@ -1224,9 +1222,8 @@ struct DSEState { return BatchAA.isMustAlias(TermLoc.Ptr, LocUO); } int64_t InstWriteOffset, DepWriteOffset; - return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI, - DepWriteOffset, InstWriteOffset, BatchAA, - &F) == OW_Complete; + return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DepWriteOffset, + InstWriteOffset) == OW_Complete; } // Returns true if \p Use may read from \p DefLoc. @@ -1422,8 +1419,8 @@ struct DSEState { continue; } else { int64_t InstWriteOffset, DepWriteOffset; - auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, - DepWriteOffset, InstWriteOffset, BatchAA, &F); + auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, + DepWriteOffset, InstWriteOffset); // If Current does not write to the same object as KillingDef, check // the next candidate. if (OR == OW_Unknown) { @@ -1940,9 +1937,8 @@ bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, } else { // Check if NI overwrites SI. int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = - isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset, - InstWriteOffset, State.BatchAA, &F); + OverwriteResult OR = State.isOverwrite(SI, NI, SILoc, NILoc, + DepWriteOffset, InstWriteOffset); if (OR == OW_MaybePartial) { auto Iter = State.IOLs.insert( std::make_pair( diff --git a/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll index 9f833b2bd481..6910209364d1 100644 --- a/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll @@ -356,3 +356,346 @@ if.end10: ; preds = %do.body store i16 1, i16* %arrayidx2, align 1 ret i16 0 } + +; Similar to above, but with an irreducible loop. The stores should not be removed. +define i16 @irreducible(i1 %c) { +; CHECK-LABEL: @irreducible( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[B]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: br label [[B]] +; CHECK: B: +; CHECK-NEXT: [[J_0:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[I_0]], [[A]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[J_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[J_0]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[J_0]], 4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[A]] +; CHECK: exit: +; CHECK-NEXT: store i16 1, i16* [[ARRAYIDX]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + br i1 %c, label %A, label %B + +A: + %i.0 = phi i16 [ 0, %entry ], [ %inc, %B ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + br label %B + +B: + %j.0 = phi i16 [ 0, %entry ], [ %i.0, %A ] + %arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %j.0 + store i16 2, i16* %arrayidx, align 1 + %inc = add nuw nsw i16 %j.0, 1 + %exitcond = icmp eq i16 %j.0, 4 + br i1 %exitcond, label %exit, label %A + +exit: + store i16 1, i16* %arrayidx, align 1 + ret i16 0 +} + +; An irreducible loop inside another loop. +define i16 @irreducible_nested() { +; CHECK-LABEL: @irreducible_nested( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[X:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INCX:%.*]], [[OUTERL:%.*]] ] +; CHECK-NEXT: [[C:%.*]] = icmp sgt i16 [[X]], 2 +; CHECK-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[OUTER]] ], [ [[INC:%.*]], [[B]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: br label [[B]] +; CHECK: B: +; CHECK-NEXT: [[J_0:%.*]] = phi i16 [ 0, [[OUTER]] ], [ [[I_0]], [[A]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[J_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[J_0]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[J_0]], 4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[OUTERL]], label [[A]] +; CHECK: outerl: +; CHECK-NEXT: store i16 1, i16* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INCX]] = add nuw nsw i16 [[X]], 1 +; CHECK-NEXT: [[EXITCONDX:%.*]] = icmp eq i16 [[X]], 4 +; CHECK-NEXT: br i1 [[EXITCONDX]], label [[END:%.*]], label [[OUTER]] +; CHECK: end: +; CHECK-NEXT: ret i16 0 +; +entry: + br label %outer + +outer: + %x = phi i16 [ 0, %entry ], [ %incx, %outerl ] + %c = icmp sgt i16 %x, 2 + br i1 %c, label %A, label %B + +A: + %i.0 = phi i16 [ 0, %outer ], [ %inc, %B ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + br label %B + +B: + %j.0 = phi i16 [ 0, %outer ], [ %i.0, %A ] + %arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %j.0 + store i16 2, i16* %arrayidx, align 1 + %inc = add nuw nsw i16 %j.0, 1 + %exitcond = icmp eq i16 %j.0, 4 + br i1 %exitcond, label %outerl, label %A + +outerl: + store i16 1, i16* %arrayidx, align 1 + %incx = add nuw nsw i16 %x, 1 + %exitcondx = icmp eq i16 %x, 4 + br i1 %exitcondx, label %end, label %outer + +end: + ret i16 0 +} + +define i16 @multi_overwrite(i1 %cond) { +; CHECK-LABEL: @multi_overwrite( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END2:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[DO_STORE:%.*]], label [[IF_END2]] +; CHECK: do.store: +; CHECK-NEXT: store i16 3, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: br label [[IF_END2]] +; CHECK: if.end2: +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[I_0]], 1 +; CHECK-NEXT: br label [[DO_BODY]] +; CHECK: exit: +; CHECK-NEXT: store i16 1, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + br label %do.body + +do.body: + %i.0 = phi i16 [ 0, %entry ], [ %inc, %if.end2 ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + store i16 2, i16* %arrayidx2, align 1 + %exitcond = icmp eq i16 %i.0, 4 + br i1 %exitcond, label %exit, label %if.end + +if.end: + br i1 %cond, label %do.store, label %if.end2 + +do.store: + store i16 3, i16* %arrayidx2, align 1 + br label %if.end2 + +if.end2: + %inc = add nuw nsw i16 %i.0, 1 + br label %do.body + +exit: + store i16 1, i16* %arrayidx2, align 1 + ret i16 0 +} + +define void @test(i8* noalias %data1, i8* %data2, i16* %data3, i32 %i1) +; CHECK-LABEL: @test( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[I1:%.*]], 0 +; CHECK-NEXT: br label [[PH0:%.*]] +; CHECK: ph0: +; CHECK-NEXT: br label [[HEADER0:%.*]] +; CHECK: header0: +; CHECK-NEXT: [[P1:%.*]] = phi i32 [ 0, [[PH0]] ], [ [[PN1:%.*]], [[END1:%.*]] ] +; CHECK-NEXT: [[PN1]] = add i32 [[P1]], 1 +; CHECK-NEXT: [[PC1:%.*]] = icmp slt i32 [[PN1]], 5 +; CHECK-NEXT: [[V2:%.*]] = getelementptr [10 x i16], [10 x i16]* @x, i32 0, i32 [[P1]] +; CHECK-NEXT: store i16 1, i16* [[V2]], align 2 +; CHECK-NEXT: br i1 [[C]], label [[THEN1:%.*]], label [[ELSE1:%.*]] +; CHECK: then1: +; CHECK-NEXT: store i16 2, i16* [[V2]], align 2 +; CHECK-NEXT: br label [[END1]] +; CHECK: else1: +; CHECK-NEXT: br label [[END1]] +; CHECK: end1: +; CHECK-NEXT: br i1 [[PC1]], label [[HEADER0]], label [[END0:%.*]] +; CHECK: end0: +; CHECK-NEXT: br label [[HEADER2:%.*]] +; CHECK: header2: +; CHECK-NEXT: [[P3:%.*]] = phi i32 [ 0, [[END0]] ], [ [[PN3:%.*]], [[HEADER2]] ] +; CHECK-NEXT: [[PN3]] = add i32 [[P3]], 1 +; CHECK-NEXT: [[PC3:%.*]] = icmp slt i32 [[PN3]], 5 +; CHECK-NEXT: store i16 4, i16* [[V2]], align 2 +; CHECK-NEXT: br i1 [[PC3]], label [[HEADER2]], label [[END2:%.*]] +; CHECK: end2: +; CHECK-NEXT: ret void +; +{ + %c = icmp eq i32 %i1, 0 + br label %ph0 +ph0: + br label %header0 +header0: + %p1 = phi i32 [0, %ph0], [%pn1, %end1] + %pn1 = add i32 %p1, 1 + %pc1 = icmp slt i32 %pn1, 5 + %v2 = getelementptr [10 x i16], [10 x i16]* @x, i32 0, i32 %p1 + store i16 1, i16* %v2 + br i1 %c, label %then1, label %else1 +then1: + store i16 2, i16* %v2 + br label %end1 +else1: + br label %end1 +end1: + br i1 %pc1, label %header0, label %end0 +end0: + br label %header2 +header2: + %p3 = phi i32 [0, %end0], [%pn3, %header2] + %pn3 = add i32 %p3, 1 + %pc3 = icmp slt i32 %pn3, 5 + store i16 4, i16* %v2 + br i1 %pc3, label %header2, label %end2 +end2: + ret void +} + +; Similar to above, but with multiple partial overlaps +define i16 @partial_override_fromloop(i1 %c, i32 %i) { +; CHECK-LABEL: @partial_override_fromloop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END2:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: br i1 [[C:%.*]], label [[DO_STORE:%.*]], label [[IF_END2]] +; CHECK: do.store: +; CHECK-NEXT: store i16 3, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: br label [[IF_END2]] +; CHECK: if.end2: +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[I_0]], 1 +; CHECK-NEXT: br label [[DO_BODY]] +; CHECK: exit: +; CHECK-NEXT: [[BC:%.*]] = bitcast i16* [[ARRAYIDX2]] to i8* +; CHECK-NEXT: [[BC2:%.*]] = getelementptr inbounds i8, i8* [[BC]], i32 1 +; CHECK-NEXT: store i8 10, i8* [[BC]], align 1 +; CHECK-NEXT: store i8 11, i8* [[BC2]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + br label %do.body + +do.body: + %i.0 = phi i16 [ 0, %entry ], [ %inc, %if.end2 ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + store i16 2, i16* %arrayidx2, align 1 + %exitcond = icmp eq i16 %i.0, 4 + br i1 %exitcond, label %exit, label %if.end + +if.end: + br i1 %c, label %do.store, label %if.end2 + +do.store: + store i16 3, i16* %arrayidx2, align 1 + br label %if.end2 + +if.end2: + %inc = add nuw nsw i16 %i.0, 1 + br label %do.body + +exit: + %bc = bitcast i16* %arrayidx2 to i8* + %bc2 = getelementptr inbounds i8, i8* %bc, i32 1 + store i8 10, i8* %bc, align 1 + store i8 11, i8* %bc2, align 1 + ret i16 0 +} + + +define i16 @partial_override_overloop(i1 %c, i32 %i) { +; CHECK-LABEL: @partial_override_overloop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i32 [[I:%.*]] +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[I_0]], 1 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[DO_BODY]] +; CHECK: exit: +; CHECK-NEXT: [[BC:%.*]] = bitcast i16* [[ARRAYIDX]] to i8* +; CHECK-NEXT: [[BC2:%.*]] = getelementptr inbounds i8, i8* [[BC]], i32 1 +; CHECK-NEXT: store i8 10, i8* [[BC]], align 1 +; CHECK-NEXT: store i8 11, i8* [[BC2]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + %arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i32 %i + store i16 1, i16* %arrayidx, align 1 + br label %do.body + +do.body: + %i.0 = phi i16 [ 0, %entry ], [ %inc, %do.body ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + store i16 2, i16* %arrayidx2, align 1 + %exitcond = icmp eq i16 %i.0, 4 + %inc = add nuw nsw i16 %i.0, 1 + br i1 %exitcond, label %exit, label %do.body + +exit: + %bc = bitcast i16* %arrayidx to i8* + %bc2 = getelementptr inbounds i8, i8* %bc, i32 1 + store i8 10, i8* %bc, align 1 + store i8 11, i8* %bc2, align 1 + ret i16 0 +} + +define i16 @partial_override_multi(i1 %c, i32 %i) { +; CHECK-LABEL: @partial_override_multi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: store i16 10, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[BC:%.*]] = bitcast i16* [[ARRAYIDX2]] to i8* +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[I_0]], 1 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[DO_BODY]] +; CHECK: exit: +; CHECK-NEXT: [[BC2:%.*]] = getelementptr inbounds i8, i8* [[BC]], i32 1 +; CHECK-NEXT: store i8 11, i8* [[BC2]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + br label %do.body + +do.body: + %i.0 = phi i16 [ 0, %entry ], [ %inc, %do.body ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + store i16 2, i16* %arrayidx2, align 1 + %bc = bitcast i16* %arrayidx2 to i8* + store i8 10, i8* %bc, align 1 + %exitcond = icmp eq i16 %i.0, 4 + %inc = add nuw nsw i16 %i.0, 1 + br i1 %exitcond, label %exit, label %do.body + +exit: + %bc2 = getelementptr inbounds i8, i8* %bc, i32 1 + store i8 11, i8* %bc2, align 1 + ret i16 0 +} +