[LV] Try to sink users recursively for first-order recurrences.

Update isFirstOrderRecurrence to  explore all uses of a recurrence phi
and check if we can sink them. If there are multiple users to sink, they
are all mapped to the previous instruction.

Fixes PR44286 (and another PR or two).

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D84951
This commit is contained in:
Florian Hahn 2021-05-31 12:06:28 +01:00
parent 73b759a33a
commit aa00b1d763
No known key found for this signature in database
GPG Key ID: 61D7554B5CECDC0D
7 changed files with 252 additions and 68 deletions

View File

@ -14,6 +14,7 @@
#define LLVM_ANALYSIS_IVDESCRIPTORS_H #define LLVM_ANALYSIS_IVDESCRIPTORS_H
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringRef.h"
@ -174,7 +175,7 @@ public:
/// to handle Phi as a first-order recurrence. /// to handle Phi as a first-order recurrence.
static bool static bool
isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
DenseMap<Instruction *, Instruction *> &SinkAfter, MapVector<Instruction *, Instruction *> &SinkAfter,
DominatorTree *DT); DominatorTree *DT);
RecurKind getRecurrenceKind() const { return Kind; } RecurKind getRecurrenceKind() const { return Kind; }

View File

@ -295,7 +295,7 @@ public:
RecurrenceSet &getFirstOrderRecurrences() { return FirstOrderRecurrences; } RecurrenceSet &getFirstOrderRecurrences() { return FirstOrderRecurrences; }
/// Return the set of instructions to sink to handle first-order recurrences. /// Return the set of instructions to sink to handle first-order recurrences.
DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; } MapVector<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
/// Returns the widest induction type. /// Returns the widest induction type.
Type *getWidestInductionType() { return WidestIndTy; } Type *getWidestInductionType() { return WidestIndTy; }
@ -518,7 +518,7 @@ private:
/// Holds instructions that need to sink past other instructions to handle /// Holds instructions that need to sink past other instructions to handle
/// first-order recurrences. /// first-order recurrences.
DenseMap<Instruction *, Instruction *> SinkAfter; MapVector<Instruction *, Instruction *> SinkAfter;
/// Holds the widest induction type encountered. /// Holds the widest induction type encountered.
Type *WidestIndTy = nullptr; Type *WidestIndTy = nullptr;

View File

@ -34,6 +34,8 @@
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h" #include "llvm/Support/KnownBits.h"
#include <set>
using namespace llvm; using namespace llvm;
using namespace llvm::PatternMatch; using namespace llvm::PatternMatch;
@ -715,7 +717,7 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
bool RecurrenceDescriptor::isFirstOrderRecurrence( bool RecurrenceDescriptor::isFirstOrderRecurrence(
PHINode *Phi, Loop *TheLoop, PHINode *Phi, Loop *TheLoop,
DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
// Ensure the phi node is in the loop header and has two incoming values. // Ensure the phi node is in the loop header and has two incoming values.
if (Phi->getParent() != TheLoop->getHeader() || if (Phi->getParent() != TheLoop->getHeader() ||
@ -741,51 +743,76 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
return false; return false;
// Ensure every user of the phi node is dominated by the previous value. // Ensure every user of the phi node (recursively) is dominated by the
// The dominance requirement ensures the loop vectorizer will not need to // previous value. The dominance requirement ensures the loop vectorizer will
// vectorize the initial value prior to the first iteration of the loop. // not need to vectorize the initial value prior to the first iteration of the
// TODO: Consider extending this sinking to handle memory instructions and // loop.
// phis with multiple users. // TODO: Consider extending this sinking to handle memory instructions.
// Returns true, if all users of I are dominated by DominatedBy. // We optimistically assume we can sink all users after Previous. Keep a set
auto allUsesDominatedBy = [DT](Instruction *I, Instruction *DominatedBy) { // of instructions to sink after Previous ordered by dominance in the common
return all_of(I->uses(), [DT, DominatedBy](Use &U) { // basic block. It will be applied to SinkAfter if all users can be sunk.
return DT->dominates(DominatedBy, U); auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
}); return A->comesBefore(B);
}; };
std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
CompareByComesBefore);
if (Phi->hasOneUse()) { BasicBlock *PhiBB = Phi->getParent();
Instruction *I = Phi->user_back(); SmallVector<Instruction *, 8> WorkList;
auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
// Already sunk SinkCandidate.
if (SinkCandidate->getParent() == PhiBB &&
InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
return true;
// If the user of the PHI is also the incoming value, we potentially have a // Cyclic dependence.
// reduction and which cannot be handled by sinking. if (Previous == SinkCandidate)
if (Previous == I)
return false; return false;
// We cannot sink terminator instructions. if (DT->dominates(Previous,
if (I->getParent()->getTerminator() == I) SinkCandidate)) // We already are good w/o sinking.
return true;
if (SinkCandidate->getParent() != PhiBB ||
SinkCandidate->mayHaveSideEffects() ||
SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
return false; return false;
// Do not try to sink an instruction multiple times (if multiple operands // Do not try to sink an instruction multiple times (if multiple operands
// are first order recurrences). // are first order recurrences).
// TODO: We can support this case, by sinking the instruction after the // TODO: We can support this case, by sinking the instruction after the
// 'deepest' previous instruction. // 'deepest' previous instruction.
if (SinkAfter.find(I) != SinkAfter.end()) if (SinkAfter.find(SinkCandidate) != SinkAfter.end())
return false; return false;
if (DT->dominates(Previous, I)) // We already are good w/o sinking. // If we reach a PHI node that is not dominated by Previous, we reached a
// header PHI. No need for sinking.
if (isa<PHINode>(SinkCandidate))
return true; return true;
// We can sink any instruction without side effects, as long as all users // Sink User tentatively and check its users
// are dominated by the instruction we are sinking after. InstrsToSink.insert(SinkCandidate);
if (I->getParent() == Phi->getParent() && !I->mayHaveSideEffects() && WorkList.push_back(SinkCandidate);
allUsesDominatedBy(I, Previous)) { return true;
SinkAfter[I] = Previous; };
return true;
WorkList.push_back(Phi);
// Try to recursively sink instructions and their users after Previous.
while (!WorkList.empty()) {
Instruction *Current = WorkList.pop_back_val();
for (User *User : Current->users()) {
if (!TryToPushSinkCandidate(cast<Instruction>(User)))
return false;
} }
} }
return allUsesDominatedBy(Phi, Previous); // We can sink all users of Phi. Update the mapping.
for (Instruction *I : InstrsToSink) {
SinkAfter[I] = Previous;
Previous = I;
}
return true;
} }
/// This function returns the identity element (or neutral element) for /// This function returns the identity element (or neutral element) for

View File

@ -344,7 +344,7 @@ private:
/// Legal. This method is only used for the legacy inner loop vectorizer. /// Legal. This method is only used for the legacy inner loop vectorizer.
VPlanPtr buildVPlanWithVPRecipes( VPlanPtr buildVPlanWithVPRecipes(
VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions, VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
const DenseMap<Instruction *, Instruction *> &SinkAfter); const MapVector<Instruction *, Instruction *> &SinkAfter);
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is /// according to the information gathered by Legal when it checked if it is

View File

@ -8994,7 +8994,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
auto &ConditionalAssumes = Legal->getConditionalAssumes(); auto &ConditionalAssumes = Legal->getConditionalAssumes();
DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end()); DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end());
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter(); MapVector<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
// Dead instructions do not need sinking. Remove them from SinkAfter. // Dead instructions do not need sinking. Remove them from SinkAfter.
for (Instruction *I : DeadInstructions) for (Instruction *I : DeadInstructions)
SinkAfter.erase(I); SinkAfter.erase(I);
@ -9010,7 +9010,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions, VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
const DenseMap<Instruction *, Instruction *> &SinkAfter) { const MapVector<Instruction *, Instruction *> &SinkAfter) {
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups; SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;

View File

@ -173,20 +173,54 @@ exit:
ret void ret void
} }
; FIXME: Currently we can only sink a single instruction. For the example below, ; Sink users of %pre.phi recursively.
; we also have to sink users. define void @can_sink_with_additional_user(i32 %x, i32* %ptr, i64 %tc) {
define void @cannot_sink_with_additional_user(i32 %x, i32* %ptr, i64 %tc) { ; CHECK-LABEL: @can_sink_with_additional_user(
; CHECK-LABEL: @cannot_sink_with_additional_user(
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[PREHEADER:%.*]] ; CHECK-NEXT: br label [[PREHEADER:%.*]]
; CHECK: preheader: ; CHECK: preheader:
; CHECK-NEXT: [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1 ; CHECK-NEXT: [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1
; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1996
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1999, 1996
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ]
; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK-NEXT: br label [[FOR:%.*]]
; CHECK: for: ; CHECK: for:
; CHECK-NEXT: [[PRE_PHI:%.*]] = phi i32 [ [[DOTPRE]], [[PREHEADER]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ] ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[FOR]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[PRE_PHI]], [[X:%.*]] ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[SCALAR_RECUR]], [[X]]
; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[X]] ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[X]]
; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 [[IV]] ; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 [[IV]]
; CHECK-NEXT: [[PRE_NEXT]] = load i32, i32* [[IDX_1]], align 4 ; CHECK-NEXT: [[PRE_NEXT]] = load i32, i32* [[IDX_1]], align 4
@ -196,14 +230,13 @@ define void @cannot_sink_with_additional_user(i32 %x, i32* %ptr, i64 %tc) {
; CHECK-NEXT: store i32 [[ADD_4]], i32* [[IDX_2]], align 4 ; CHECK-NEXT: store i32 [[ADD_4]], i32* [[IDX_2]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: exit: ; CHECK: exit:
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
entry: entry:
br label %preheader br label %preheader
@ -378,7 +411,9 @@ bb74: ; preds = %bb13
ret void ret void
} }
; Users that are phi nodes cannot be sunk. ; The (first) reason `%first_time.1` cannot be sunk is because it appears outside
; the header and is not dominated by Previous. The fact that it feeds Previous
; is a second sinking-preventing reason.
define void @cannot_sink_phi(i32* %ptr) { define void @cannot_sink_phi(i32* %ptr) {
; CHECK-LABEL: @cannot_sink_phi( ; CHECK-LABEL: @cannot_sink_phi(
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
@ -407,19 +442,19 @@ define void @cannot_sink_phi(i32* %ptr) {
entry: entry:
br label %loop.header br label %loop.header
loop.header: ; preds = %if.end128, %for.cond108.preheader loop.header:
%iv = phi i64 [ 1, %entry ], [ %iv.next, %loop.latch ] %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop.latch ]
%for = phi i32 [ 0, %entry ], [ %for.next, %loop.latch ] %for = phi i32 [ 0, %entry ], [ %for.next, %loop.latch ]
%c.1 = icmp ult i64 %iv, 500 %c.1 = icmp ult i64 %iv, 500
br i1 %c.1, label %if.truebb, label %if.falsebb br i1 %c.1, label %if.truebb, label %if.falsebb
if.truebb: ; preds = %for.body114 if.truebb:
br label %loop.latch br label %loop.latch
if.falsebb: ; preds = %for.body114 if.falsebb:
br label %loop.latch br label %loop.latch
loop.latch: ; preds = %if.then122, %for.body114.if.end128_crit_edge loop.latch:
%first_time.1 = phi i32 [ 20, %if.truebb ], [ %for, %if.falsebb ] %first_time.1 = phi i32 [ 20, %if.truebb ], [ %for, %if.falsebb ]
%c.2 = icmp ult i64 %iv, 800 %c.2 = icmp ult i64 %iv, 800
%for.next = select i1 %c.2, i32 30, i32 %first_time.1 %for.next = select i1 %c.2, i32 30, i32 %first_time.1
@ -606,20 +641,67 @@ if.end:
define void @sink_dominance(i32* %ptr, i32 %N) { define void @sink_dominance(i32* %ptr, i32 %N) {
; CHECK-LABEL: @sink_dominance( ; CHECK-LABEL: @sink_dominance(
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1
; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP0]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP1:%.*]] = add i32 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0
; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]]
; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4
; CHECK-NEXT: [[TMP12]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP12]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32>
; CHECK-NEXT: [[TMP15:%.*]] = icmp slt <4 x i32> [[TMP14]], <i32 213, i32 213, i32 213, i32 213>
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP14]], <4 x i32> <i32 22, i32 22, i32 22, i32 22>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP17]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP12]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP12]], i32 2
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop: ; CHECK: loop:
; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[FOR_TRUNC:%.*]] = trunc i64 [[FOR]] to i32 ; CHECK-NEXT: [[FOR_TRUNC:%.*]] = trunc i64 [[SCALAR_RECUR]] to i32
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[FOR_TRUNC]], 213 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[FOR_TRUNC]], 213
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[FOR_TRUNC]], i32 22 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[FOR_TRUNC]], i32 22
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 [[IV]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 [[IV]]
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP]], align 4 ; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP]], align 4
; CHECK-NEXT: [[FOR_NEXT]] = zext i32 [[LV]] to i64 ; CHECK-NEXT: [[FOR_NEXT]] = zext i32 [[LV]] to i64
; CHECK-NEXT: store i32 [[SELECT]], i32* [[GEP]], align 4 ; CHECK-NEXT: store i32 [[SELECT]], i32* [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP73:%.*]] = icmp ugt i32 [[N:%.*]], [[IV_NEXT]] ; CHECK-NEXT: [[CMP73:%.*]] = icmp ugt i32 [[N]], [[IV_NEXT]]
; CHECK-NEXT: br i1 [[CMP73]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK-NEXT: br i1 [[CMP73]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: exit: ; CHECK: exit:
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -652,22 +734,71 @@ exit:
define void @sink_dominance_2(i32* %ptr, i32 %N) { define void @sink_dominance_2(i32* %ptr, i32 %N) {
; CHECK-LABEL: @sink_dominance_2( ; CHECK-LABEL: @sink_dominance_2(
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1
; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP0]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP1:%.*]] = add i32 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0
; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]]
; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4
; CHECK-NEXT: [[TMP12]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP12]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32>
; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], <i32 2, i32 2, i32 2, i32 2>
; CHECK-NEXT: [[TMP16:%.*]] = mul <4 x i32> [[TMP15]], <i32 99, i32 99, i32 99, i32 99>
; CHECK-NEXT: [[TMP17:%.*]] = icmp slt <4 x i32> [[TMP14]], <i32 213, i32 213, i32 213, i32 213>
; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP17]], <4 x i32> [[TMP14]], <4 x i32> [[TMP16]]
; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP19]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP12]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP12]], i32 2
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop: ; CHECK: loop:
; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[FOR_TRUNC:%.*]] = trunc i64 [[FOR]] to i32 ; CHECK-NEXT: [[FOR_TRUNC:%.*]] = trunc i64 [[SCALAR_RECUR]] to i32
; CHECK-NEXT: [[STEP:%.*]] = add i32 [[FOR_TRUNC]], 2 ; CHECK-NEXT: [[STEP:%.*]] = add i32 [[FOR_TRUNC]], 2
; CHECK-NEXT: [[STEP_2:%.*]] = mul i32 [[STEP]], 99 ; CHECK-NEXT: [[STEP_2:%.*]] = mul i32 [[STEP]], 99
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[FOR_TRUNC]], 213 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[FOR_TRUNC]], 213
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[FOR_TRUNC]], i32 [[STEP_2]] ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[FOR_TRUNC]], i32 [[STEP_2]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 [[IV]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 [[IV]]
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP]], align 4 ; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP]], align 4
; CHECK-NEXT: [[FOR_NEXT]] = zext i32 [[LV]] to i64 ; CHECK-NEXT: [[FOR_NEXT]] = zext i32 [[LV]] to i64
; CHECK-NEXT: store i32 [[SELECT]], i32* [[GEP]], align 4 ; CHECK-NEXT: store i32 [[SELECT]], i32* [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP73:%.*]] = icmp ugt i32 [[N:%.*]], [[IV_NEXT]] ; CHECK-NEXT: [[CMP73:%.*]] = icmp ugt i32 [[N]], [[IV_NEXT]]
; CHECK-NEXT: br i1 [[CMP73]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK-NEXT: br i1 [[CMP73]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: exit: ; CHECK: exit:
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;

View File

@ -545,11 +545,36 @@ for.end:
; b[i] = ((a[i] + 2) * a[i + 1]); ; b[i] = ((a[i] + 2) * a[i + 1]);
; } ; }
; ;
; NO-SINK-AFTER-LABEL: no_sink_after ; SINK-AFTER-LABEL: @sink_after_with_multiple_users
; NO-SINK-AFTER-NOT: vector.ph: ; SINK-AFTER-LABEL: vector.ph:
; NO-SINK-AFTER: } ; SINK-AFTER-NEXT: %n.mod.vf = urem i64 %n, 4
; SINK-AFTER-NEXT: %n.vec = sub i64 %n, %n.mod.vf
; SINK-AFTER-NEXT: %vector.recur.init = insertelement <4 x i16> poison, i16 %.pre, i32 3
; SINK-AFTER-NEXT: br label %vector.body
; SINK-AFTER-LABEL: vector.body:
; SINK-AFTER-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; SINK-AFTER-NEXT: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ %wide.load, %vector.body ]
; SINK-AFTER-NEXT: %1 = add i64 %index, 0
; SINK-AFTER-NEXT: %2 = add nuw nsw i64 %1, 1
; SINK-AFTER-NEXT: %3 = getelementptr inbounds i16, i16* %a, i64 %2
; SINK-AFTER-NEXT: %4 = getelementptr inbounds i16, i16* %3, i32 0
; SINK-AFTER-NEXT: %5 = bitcast i16* %4 to <4 x i16>*
; SINK-AFTER-NEXT: %wide.load = load <4 x i16>, <4 x i16>* %5, align 2, !alias.scope !43
; SINK-AFTER-NEXT: %6 = shufflevector <4 x i16> %vector.recur, <4 x i16> %wide.load, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: %7 = sext <4 x i16> %6 to <4 x i32>
; SINK-AFTER-NEXT: %8 = add nsw <4 x i32> %7, <i32 2, i32 2, i32 2, i32 2>
; SINK-AFTER-NEXT: %9 = sext <4 x i16> %wide.load to <4 x i32>
; SINK-AFTER-NEXT: %10 = mul nsw <4 x i32> %8, %9
; SINK-AFTER-NEXT: %11 = getelementptr inbounds i32, i32* %b, i64 %1
; SINK-AFTER-NEXT: %12 = getelementptr inbounds i32, i32* %11, i32 0
; SINK-AFTER-NEXT: %13 = bitcast i32* %12 to <4 x i32>*
; SINK-AFTER-NEXT: store <4 x i32> %10, <4 x i32>* %13, align 4, !alias.scope !46, !noalias !43
; SINK-AFTER-NEXT: %index.next = add i64 %index, 4
; SINK-AFTER-NEXT: %14 = icmp eq i64 %index.next, %n.vec
; SINK-AFTER-NEXT: br i1 %14, label %middle.block, label %vector.body, !llvm.loop !48
; ;
define void @no_sink_after(i16* %a, i32* %b, i64 %n) { define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) {
entry: entry:
%.pre = load i16, i16* %a %.pre = load i16, i16* %a
br label %for.body br label %for.body
@ -626,7 +651,7 @@ define void @sink_dead_inst() {
; SINK-AFTER-NEXT: %index.next = add i32 %index, 4 ; SINK-AFTER-NEXT: %index.next = add i32 %index, 4
; SINK-AFTER-NEXT: %vec.ind.next = add <4 x i16> %vec.ind, <i16 4, i16 4, i16 4, i16 4> ; SINK-AFTER-NEXT: %vec.ind.next = add <4 x i16> %vec.ind, <i16 4, i16 4, i16 4, i16 4>
; SINK-AFTER-NEXT: %6 = icmp eq i32 %index.next, 40 ; SINK-AFTER-NEXT: %6 = icmp eq i32 %index.next, 40
; SINK-AFTER-NEXT: br i1 %6, label %middle.block, label %vector.body, !llvm.loop !43 ; SINK-AFTER-NEXT: br i1 %6, label %middle.block, label %vector.body, !llvm.loop !50
; ;
entry: entry:
br label %for.cond br label %for.cond
@ -708,7 +733,7 @@ define i32 @sink_into_replication_region(i32 %y) {
; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI1]], [[TMP22]] ; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI1]], [[TMP22]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof !45, [[LOOP46:!llvm.loop !.*]] ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof !52, [[LOOP46:!llvm.loop !.*]]
; CHECK: middle.block: ; CHECK: middle.block:
; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI1]] ; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI1]]
; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) ; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
@ -719,7 +744,7 @@ define i32 @sink_into_replication_region(i32 %y) {
; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[TMP]] ; CHECK-NEXT: ret i32 [[TMP]]
; CHECK: bb2: ; CHECK: bb2:
; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof !47, [[LOOP48:!llvm.loop !.*]] ; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof !54, [[LOOP48:!llvm.loop !.*]]
; ;
bb: bb:
br label %bb2 br label %bb2
@ -835,7 +860,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4> ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof !45, [[LOOP49:!llvm.loop !.*]] ; CHECK-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof !52, [[LOOP49:!llvm.loop !.*]]
; CHECK: middle.block: ; CHECK: middle.block:
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI4]] ; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI4]]
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]]) ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]])
@ -846,7 +871,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[TMP]] ; CHECK-NEXT: ret i32 [[TMP]]
; CHECK: bb2: ; CHECK: bb2:
; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof !47, [[LOOP50:!llvm.loop !.*]] ; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof !54, [[LOOP50:!llvm.loop !.*]]
; ;
bb: bb:
br label %bb2 br label %bb2