[SLP]Fix comparator for cmp instruction vectorization.

The comparator for the sort functions should provide strict weak
ordering relation between parameters. Current solution causes compiler
crash with some standard c++ library implementations, because it does
not meet this criteria. Tried to fix it + it improves the iverall
vectorization result.

Differential Revision: https://reviews.llvm.org/D115268
This commit is contained in:
Alexey Bataev 2021-12-07 10:16:14 -08:00
parent 0d13f94c1d
commit 19c5cf4167
3 changed files with 289 additions and 229 deletions

View File

@ -9483,6 +9483,59 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
return Changed;
}
/// Compare two cmp instructions. If IsCompatibility is true, function returns
/// true if 2 cmps have same/swapped predicates and mos compatible corresponding
/// operands. If IsCompatibility is false, function implements strict weak
/// ordering relation between two cmp instructions, returning true if the first
/// instruction is "less" than the second, i.e. its predicate is less than the
/// predicate of the second or the operands IDs are less than the operands IDs
/// of the second cmp instruction.
template <bool IsCompatibility>
static bool compareCmp(Value *V, Value *V2,
function_ref<bool(Instruction *)> IsDeleted) {
auto *CI1 = cast<CmpInst>(V);
auto *CI2 = cast<CmpInst>(V2);
if (IsDeleted(CI2) || !isValidElementType(CI2->getType()))
return false;
if (CI1->getOperand(0)->getType()->getTypeID() <
CI2->getOperand(0)->getType()->getTypeID())
return !IsCompatibility;
if (CI1->getOperand(0)->getType()->getTypeID() >
CI2->getOperand(0)->getType()->getTypeID())
return false;
CmpInst::Predicate Pred1 = CI1->getPredicate();
CmpInst::Predicate Pred2 = CI2->getPredicate();
CmpInst::Predicate SwapPred1 = CmpInst::getSwappedPredicate(Pred1);
CmpInst::Predicate SwapPred2 = CmpInst::getSwappedPredicate(Pred2);
CmpInst::Predicate BasePred1 = std::min(Pred1, SwapPred1);
CmpInst::Predicate BasePred2 = std::min(Pred2, SwapPred2);
if (BasePred1 < BasePred2)
return !IsCompatibility;
if (BasePred1 > BasePred2)
return false;
// Compare operands.
bool LEPreds = Pred1 <= Pred2;
bool GEPreds = Pred1 >= Pred2;
for (int I = 0, E = CI1->getNumOperands(); I < E; ++I) {
auto *Op1 = CI1->getOperand(LEPreds ? I : E - I - 1);
auto *Op2 = CI2->getOperand(GEPreds ? I : E - I - 1);
if (Op1->getValueID() < Op2->getValueID())
return !IsCompatibility;
if (Op1->getValueID() > Op2->getValueID())
return false;
if (auto *I1 = dyn_cast<Instruction>(Op1))
if (auto *I2 = dyn_cast<Instruction>(Op2)) {
if (I1->getParent() != I2->getParent())
return false;
InstructionsState S = getSameOpcode({I1, I2});
if (S.getOpcode())
continue;
return false;
}
}
return IsCompatibility;
}
bool SLPVectorizerPass::vectorizeSimpleInstructions(
SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R,
bool AtTerminator) {
@ -9514,37 +9567,16 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions(
}
// Try to vectorize list of compares.
// Sort by type, compare predicate, etc.
// TODO: Add analysis on the operand opcodes (profitable to vectorize
// instructions with same/alternate opcodes/const values).
auto &&CompareSorter = [&R](Value *V, Value *V2) {
auto *CI1 = cast<CmpInst>(V);
auto *CI2 = cast<CmpInst>(V2);
if (R.isDeleted(CI2) || !isValidElementType(CI2->getType()))
return false;
if (CI1->getOperand(0)->getType()->getTypeID() <
CI2->getOperand(0)->getType()->getTypeID())
return true;
if (CI1->getOperand(0)->getType()->getTypeID() >
CI2->getOperand(0)->getType()->getTypeID())
return false;
return CI1->getPredicate() < CI2->getPredicate() ||
(CI1->getPredicate() > CI2->getPredicate() &&
CI1->getPredicate() <
CmpInst::getSwappedPredicate(CI2->getPredicate()));
return compareCmp<false>(V, V2,
[&R](Instruction *I) { return R.isDeleted(I); });
};
auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) {
if (V1 == V2)
return true;
auto *CI1 = cast<CmpInst>(V1);
auto *CI2 = cast<CmpInst>(V2);
if (R.isDeleted(CI2) || !isValidElementType(CI2->getType()))
return false;
if (CI1->getOperand(0)->getType() != CI2->getOperand(0)->getType())
return false;
return CI1->getPredicate() == CI2->getPredicate() ||
CI1->getPredicate() ==
CmpInst::getSwappedPredicate(CI2->getPredicate());
return compareCmp<true>(V1, V2,
[&R](Instruction *I) { return R.isDeleted(I); });
};
auto Limit = [&R](Value *V) {
unsigned EltSize = R.getVectorElementSize(V);

View File

@ -21,233 +21,222 @@ define void @n() local_unnamed_addr #0 {
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 1), align 4
; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 2), align 8
; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 3), align 4
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 0), align 16
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 1), align 4
; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 2), align 8
; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 3), align 4
; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 0), align 16
; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 1), align 4
; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 2), align 8
; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 3), align 4
; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 0), align 16
; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 1), align 4
; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 2), align 8
; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 3), align 4
; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 0), align 16
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 1), align 4
; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 2), align 8
; CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 3), align 4
; CHECK-NEXT: [[TMP13:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 0) to <16 x i32>*), align 16
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[B_0:%.*]] = phi i32 [ [[SPEC_SELECT8_3_7:%.*]], [[FOR_COND]] ], [ undef, [[ENTRY]] ]
; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], -183
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP31]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP32:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]]
; CHECK-NEXT: [[TMP33:%.*]] = icmp slt <4 x i32> [[TMP32]], zeroinitializer
; CHECK-NEXT: [[TMP34:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP32]]
; CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP33]], <4 x i32> [[TMP34]], <4 x i32> [[TMP32]]
; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP35]])
; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP36]], [[B_0]]
; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP36]], i32 [[B_0]]
; CHECK-NEXT: [[SUB_116:%.*]] = sub i32 [[TMP30]], [[TMP1]]
; CHECK-NEXT: [[TMP37:%.*]] = icmp slt i32 [[SUB_116]], 0
; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -183
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp slt <4 x i32> [[TMP17]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP20]])
; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP21]], [[B_0]]
; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP21]], i32 [[B_0]]
; CHECK-NEXT: [[SUB_116:%.*]] = sub i32 [[TMP15]], [[TMP1]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp slt i32 [[SUB_116]], 0
; CHECK-NEXT: [[NEG_117:%.*]] = sub nsw i32 0, [[SUB_116]]
; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[NEG_117]], i32 [[SUB_116]]
; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP38]], [[OP_EXTRA1]]
; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP38]], i32 [[OP_EXTRA1]]
; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP30]], [[TMP2]]
; CHECK-NEXT: [[TMP39:%.*]] = icmp slt i32 [[SUB_1_1]], 0
; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[NEG_117]], i32 [[SUB_116]]
; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP23]], [[OP_EXTRA1]]
; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP23]], i32 [[OP_EXTRA1]]
; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP15]], [[TMP2]]
; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[SUB_1_1]], 0
; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]]
; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP40]], [[SPEC_SELECT8_120]]
; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP25]], [[SPEC_SELECT8_120]]
; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[CMP12_118]]
; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP40]], i32 [[SPEC_SELECT8_120]]
; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]]
; CHECK-NEXT: [[TMP41:%.*]] = icmp slt i32 [[SUB_2_1]], 0
; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP25]], i32 [[SPEC_SELECT8_120]]
; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP15]], [[TMP3]]
; CHECK-NEXT: [[TMP26:%.*]] = icmp slt i32 [[SUB_2_1]], 0
; CHECK-NEXT: [[NEG_2_1:%.*]] = sub nsw i32 0, [[SUB_2_1]]
; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_2_1]], i32 [[SUB_2_1]]
; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_1_1]]
; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[NEG_2_1]], i32 [[SUB_2_1]]
; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP27]], [[SPEC_SELECT8_1_1]]
; CHECK-NEXT: [[NARROW34:%.*]] = or i1 [[CMP12_2_1]], [[NARROW]]
; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP42]], i32 [[SPEC_SELECT8_1_1]]
; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP30]], [[TMP4]]
; CHECK-NEXT: [[TMP43:%.*]] = icmp slt i32 [[SUB_3_1]], 0
; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP27]], i32 [[SPEC_SELECT8_1_1]]
; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP15]], [[TMP4]]
; CHECK-NEXT: [[TMP28:%.*]] = icmp slt i32 [[SUB_3_1]], 0
; CHECK-NEXT: [[NEG_3_1:%.*]] = sub nsw i32 0, [[SUB_3_1]]
; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_3_1]], i32 [[SUB_3_1]]
; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_2_1]]
; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[NEG_3_1]], i32 [[SUB_3_1]]
; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP29]], [[SPEC_SELECT8_2_1]]
; CHECK-NEXT: [[NARROW35:%.*]] = or i1 [[CMP12_3_1]], [[NARROW34]]
; CHECK-NEXT: [[SPEC_SELECT_3_1:%.*]] = zext i1 [[NARROW35]] to i32
; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP44]], i32 [[SPEC_SELECT8_2_1]]
; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP30]], [[TMP5]]
; CHECK-NEXT: [[TMP45:%.*]] = icmp slt i32 [[SUB_222]], 0
; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP29]], i32 [[SPEC_SELECT8_2_1]]
; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP15]], [[TMP5]]
; CHECK-NEXT: [[TMP30:%.*]] = icmp slt i32 [[SUB_222]], 0
; CHECK-NEXT: [[NEG_223:%.*]] = sub nsw i32 0, [[SUB_222]]
; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_223]], i32 [[SUB_222]]
; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP46]], [[SPEC_SELECT8_3_1]]
; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP46]], i32 [[SPEC_SELECT8_3_1]]
; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP30]], [[TMP6]]
; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[SUB_1_2]], 0
; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[NEG_223]], i32 [[SUB_222]]
; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP31]], [[SPEC_SELECT8_3_1]]
; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP31]], i32 [[SPEC_SELECT8_3_1]]
; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP15]], [[TMP6]]
; CHECK-NEXT: [[TMP32:%.*]] = icmp slt i32 [[SUB_1_2]], 0
; CHECK-NEXT: [[NEG_1_2:%.*]] = sub nsw i32 0, [[SUB_1_2]]
; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[NEG_1_2]], i32 [[SUB_1_2]]
; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP48]], [[SPEC_SELECT8_226]]
; CHECK-NEXT: [[TMP49:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]]
; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP48]], i32 [[SPEC_SELECT8_226]]
; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP30]], [[TMP7]]
; CHECK-NEXT: [[TMP50:%.*]] = icmp slt i32 [[SUB_2_2]], 0
; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[NEG_1_2]], i32 [[SUB_1_2]]
; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP33]], [[SPEC_SELECT8_226]]
; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]]
; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP33]], i32 [[SPEC_SELECT8_226]]
; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP15]], [[TMP7]]
; CHECK-NEXT: [[TMP35:%.*]] = icmp slt i32 [[SUB_2_2]], 0
; CHECK-NEXT: [[NEG_2_2:%.*]] = sub nsw i32 0, [[SUB_2_2]]
; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[NEG_2_2]], i32 [[SUB_2_2]]
; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP51]], [[SPEC_SELECT8_1_2]]
; CHECK-NEXT: [[TMP52:%.*]] = or i1 [[CMP12_2_2]], [[TMP49]]
; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP51]], i32 [[SPEC_SELECT8_1_2]]
; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP30]], [[TMP8]]
; CHECK-NEXT: [[TMP53:%.*]] = icmp slt i32 [[SUB_3_2]], 0
; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[NEG_2_2]], i32 [[SUB_2_2]]
; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP36]], [[SPEC_SELECT8_1_2]]
; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[CMP12_2_2]], [[TMP34]]
; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP36]], i32 [[SPEC_SELECT8_1_2]]
; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP15]], [[TMP8]]
; CHECK-NEXT: [[TMP38:%.*]] = icmp slt i32 [[SUB_3_2]], 0
; CHECK-NEXT: [[NEG_3_2:%.*]] = sub nsw i32 0, [[SUB_3_2]]
; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[NEG_3_2]], i32 [[SUB_3_2]]
; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP54]], [[SPEC_SELECT8_2_2]]
; CHECK-NEXT: [[TMP55:%.*]] = or i1 [[CMP12_3_2]], [[TMP52]]
; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP55]], i32 2, i32 [[SPEC_SELECT_3_1]]
; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP54]], i32 [[SPEC_SELECT8_2_2]]
; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP30]], [[TMP9]]
; CHECK-NEXT: [[TMP56:%.*]] = icmp slt i32 [[SUB_328]], 0
; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[NEG_3_2]], i32 [[SUB_3_2]]
; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP39]], [[SPEC_SELECT8_2_2]]
; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[CMP12_3_2]], [[TMP37]]
; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP40]], i32 2, i32 [[SPEC_SELECT_3_1]]
; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP39]], i32 [[SPEC_SELECT8_2_2]]
; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP15]], [[TMP9]]
; CHECK-NEXT: [[TMP41:%.*]] = icmp slt i32 [[SUB_328]], 0
; CHECK-NEXT: [[NEG_329:%.*]] = sub nsw i32 0, [[SUB_328]]
; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_329]], i32 [[SUB_328]]
; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_2]]
; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_2]]
; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP30]], [[TMP10]]
; CHECK-NEXT: [[TMP58:%.*]] = icmp slt i32 [[SUB_1_3]], 0
; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_329]], i32 [[SUB_328]]
; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_3_2]]
; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP42]], i32 [[SPEC_SELECT8_3_2]]
; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP15]], [[TMP10]]
; CHECK-NEXT: [[TMP43:%.*]] = icmp slt i32 [[SUB_1_3]], 0
; CHECK-NEXT: [[NEG_1_3:%.*]] = sub nsw i32 0, [[SUB_1_3]]
; CHECK-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], i32 [[NEG_1_3]], i32 [[SUB_1_3]]
; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP59]], [[SPEC_SELECT8_332]]
; CHECK-NEXT: [[TMP60:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]]
; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP59]], i32 [[SPEC_SELECT8_332]]
; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP30]], [[TMP11]]
; CHECK-NEXT: [[TMP61:%.*]] = icmp slt i32 [[SUB_2_3]], 0
; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_1_3]], i32 [[SUB_1_3]]
; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_332]]
; CHECK-NEXT: [[TMP45:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]]
; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP44]], i32 [[SPEC_SELECT8_332]]
; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP15]], [[TMP11]]
; CHECK-NEXT: [[TMP46:%.*]] = icmp slt i32 [[SUB_2_3]], 0
; CHECK-NEXT: [[NEG_2_3:%.*]] = sub nsw i32 0, [[SUB_2_3]]
; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[NEG_2_3]], i32 [[SUB_2_3]]
; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP62]], [[SPEC_SELECT8_1_3]]
; CHECK-NEXT: [[TMP63:%.*]] = or i1 [[CMP12_2_3]], [[TMP60]]
; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP62]], i32 [[SPEC_SELECT8_1_3]]
; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP30]], [[TMP12]]
; CHECK-NEXT: [[TMP64:%.*]] = icmp slt i32 [[SUB_3_3]], 0
; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_2_3]], i32 [[SUB_2_3]]
; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_1_3]]
; CHECK-NEXT: [[TMP48:%.*]] = or i1 [[CMP12_2_3]], [[TMP45]]
; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP47]], i32 [[SPEC_SELECT8_1_3]]
; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP15]], [[TMP12]]
; CHECK-NEXT: [[TMP49:%.*]] = icmp slt i32 [[SUB_3_3]], 0
; CHECK-NEXT: [[NEG_3_3:%.*]] = sub nsw i32 0, [[SUB_3_3]]
; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], i32 [[NEG_3_3]], i32 [[SUB_3_3]]
; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP65]], [[SPEC_SELECT8_2_3]]
; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[CMP12_3_3]], [[TMP63]]
; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP66]], i32 3, i32 [[SPEC_SELECT_3_2]]
; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP65]], i32 [[SPEC_SELECT8_2_3]]
; CHECK-NEXT: [[SUB_4:%.*]] = sub i32 [[TMP30]], [[TMP13]]
; CHECK-NEXT: [[TMP67:%.*]] = icmp slt i32 [[SUB_4]], 0
; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[SUB_4]]
; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_4]], i32 [[SUB_4]]
; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_3_3]]
; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_3_3]]
; CHECK-NEXT: [[SUB_1_4:%.*]] = sub i32 [[TMP30]], [[TMP14]]
; CHECK-NEXT: [[TMP69:%.*]] = icmp slt i32 [[SUB_1_4]], 0
; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[SUB_1_4]]
; CHECK-NEXT: [[TMP70:%.*]] = select i1 [[TMP69]], i32 [[NEG_1_4]], i32 [[SUB_1_4]]
; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP70]], [[SPEC_SELECT8_4]]
; CHECK-NEXT: [[TMP71:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]]
; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP70]], i32 [[SPEC_SELECT8_4]]
; CHECK-NEXT: [[SUB_2_4:%.*]] = sub i32 [[TMP30]], [[TMP15]]
; CHECK-NEXT: [[TMP72:%.*]] = icmp slt i32 [[SUB_2_4]], 0
; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[SUB_2_4]]
; CHECK-NEXT: [[TMP73:%.*]] = select i1 [[TMP72]], i32 [[NEG_2_4]], i32 [[SUB_2_4]]
; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP73]], [[SPEC_SELECT8_1_4]]
; CHECK-NEXT: [[TMP74:%.*]] = or i1 [[CMP12_2_4]], [[TMP71]]
; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP73]], i32 [[SPEC_SELECT8_1_4]]
; CHECK-NEXT: [[SUB_3_4:%.*]] = sub i32 [[TMP30]], [[TMP16]]
; CHECK-NEXT: [[TMP75:%.*]] = icmp slt i32 [[SUB_3_4]], 0
; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[SUB_3_4]]
; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP75]], i32 [[NEG_3_4]], i32 [[SUB_3_4]]
; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP76]], [[SPEC_SELECT8_2_4]]
; CHECK-NEXT: [[TMP77:%.*]] = or i1 [[CMP12_3_4]], [[TMP74]]
; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP77]], i32 4, i32 [[SPEC_SELECT_3_3]]
; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP76]], i32 [[SPEC_SELECT8_2_4]]
; CHECK-NEXT: [[SUB_5:%.*]] = sub i32 [[TMP30]], [[TMP17]]
; CHECK-NEXT: [[TMP78:%.*]] = icmp slt i32 [[SUB_5]], 0
; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[SUB_5]]
; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_5]], i32 [[SUB_5]]
; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_3_4]]
; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_3_4]]
; CHECK-NEXT: [[SUB_1_5:%.*]] = sub i32 [[TMP30]], [[TMP18]]
; CHECK-NEXT: [[TMP80:%.*]] = icmp slt i32 [[SUB_1_5]], 0
; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[SUB_1_5]]
; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[NEG_1_5]], i32 [[SUB_1_5]]
; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP81]], [[SPEC_SELECT8_5]]
; CHECK-NEXT: [[TMP82:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]]
; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP81]], i32 [[SPEC_SELECT8_5]]
; CHECK-NEXT: [[SUB_2_5:%.*]] = sub i32 [[TMP30]], [[TMP19]]
; CHECK-NEXT: [[TMP83:%.*]] = icmp slt i32 [[SUB_2_5]], 0
; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[SUB_2_5]]
; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[NEG_2_5]], i32 [[SUB_2_5]]
; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP84]], [[SPEC_SELECT8_1_5]]
; CHECK-NEXT: [[TMP85:%.*]] = or i1 [[CMP12_2_5]], [[TMP82]]
; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP84]], i32 [[SPEC_SELECT8_1_5]]
; CHECK-NEXT: [[SUB_3_5:%.*]] = sub i32 [[TMP30]], [[TMP20]]
; CHECK-NEXT: [[TMP86:%.*]] = icmp slt i32 [[SUB_3_5]], 0
; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[SUB_3_5]]
; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_3_5]], i32 [[SUB_3_5]]
; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_2_5]]
; CHECK-NEXT: [[TMP88:%.*]] = or i1 [[CMP12_3_5]], [[TMP85]]
; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP88]], i32 5, i32 [[SPEC_SELECT_3_4]]
; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP87]], i32 [[SPEC_SELECT8_2_5]]
; CHECK-NEXT: [[SUB_6:%.*]] = sub i32 [[TMP30]], [[TMP21]]
; CHECK-NEXT: [[TMP89:%.*]] = icmp slt i32 [[SUB_6]], 0
; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[SUB_6]]
; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_6]], i32 [[SUB_6]]
; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_3_5]]
; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_3_5]]
; CHECK-NEXT: [[SUB_1_6:%.*]] = sub i32 [[TMP30]], [[TMP22]]
; CHECK-NEXT: [[TMP91:%.*]] = icmp slt i32 [[SUB_1_6]], 0
; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[SUB_1_6]]
; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], i32 [[NEG_1_6]], i32 [[SUB_1_6]]
; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP92]], [[SPEC_SELECT8_6]]
; CHECK-NEXT: [[TMP93:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]]
; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP92]], i32 [[SPEC_SELECT8_6]]
; CHECK-NEXT: [[SUB_2_6:%.*]] = sub i32 [[TMP30]], [[TMP23]]
; CHECK-NEXT: [[TMP94:%.*]] = icmp slt i32 [[SUB_2_6]], 0
; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[SUB_2_6]]
; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[NEG_2_6]], i32 [[SUB_2_6]]
; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP95]], [[SPEC_SELECT8_1_6]]
; CHECK-NEXT: [[TMP96:%.*]] = or i1 [[CMP12_2_6]], [[TMP93]]
; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP95]], i32 [[SPEC_SELECT8_1_6]]
; CHECK-NEXT: [[SUB_3_6:%.*]] = sub i32 [[TMP30]], [[TMP24]]
; CHECK-NEXT: [[TMP97:%.*]] = icmp slt i32 [[SUB_3_6]], 0
; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[SUB_3_6]]
; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[SUB_3_6]]
; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[NEG_3_3]], i32 [[SUB_3_3]]
; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP50]], [[SPEC_SELECT8_2_3]]
; CHECK-NEXT: [[TMP51:%.*]] = or i1 [[CMP12_3_3]], [[TMP48]]
; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP51]], i32 3, i32 [[SPEC_SELECT_3_2]]
; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP50]], i32 [[SPEC_SELECT8_2_3]]
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x i32> poison, i32 [[TMP15]], i32 0
; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i32> [[TMP52]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP53:%.*]] = sub <16 x i32> [[SHUFFLE2]], [[TMP13]]
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i32> [[TMP53]], i32 0
; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[TMP54]]
; CHECK-NEXT: [[TMP55:%.*]] = icmp slt <16 x i32> [[TMP53]], zeroinitializer
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP55]], i32 0
; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_4]], i32 [[TMP54]]
; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_3]]
; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_3]]
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i32> [[TMP53]], i32 1
; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[TMP58]]
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP55]], i32 1
; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_1_4]], i32 [[TMP58]]
; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_4]]
; CHECK-NEXT: [[TMP61:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]]
; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP60]], i32 [[SPEC_SELECT8_4]]
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i32> [[TMP53]], i32 2
; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[TMP62]]
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP55]], i32 2
; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP63]], i32 [[NEG_2_4]], i32 [[TMP62]]
; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP64]], [[SPEC_SELECT8_1_4]]
; CHECK-NEXT: [[TMP65:%.*]] = or i1 [[CMP12_2_4]], [[TMP61]]
; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP64]], i32 [[SPEC_SELECT8_1_4]]
; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i32> [[TMP53]], i32 3
; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[TMP66]]
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP55]], i32 3
; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_3_4]], i32 [[TMP66]]
; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_2_4]]
; CHECK-NEXT: [[TMP69:%.*]] = or i1 [[CMP12_3_4]], [[TMP65]]
; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP69]], i32 4, i32 [[SPEC_SELECT_3_3]]
; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_2_4]]
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i32> [[TMP53]], i32 4
; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[TMP70]]
; CHECK-NEXT: [[TMP71:%.*]] = extractelement <16 x i1> [[TMP55]], i32 4
; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[NEG_5]], i32 [[TMP70]]
; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP72]], [[SPEC_SELECT8_3_4]]
; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP72]], i32 [[SPEC_SELECT8_3_4]]
; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i32> [[TMP53]], i32 5
; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[TMP73]]
; CHECK-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP55]], i32 5
; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[NEG_1_5]], i32 [[TMP73]]
; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP75]], [[SPEC_SELECT8_5]]
; CHECK-NEXT: [[TMP76:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]]
; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP75]], i32 [[SPEC_SELECT8_5]]
; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i32> [[TMP53]], i32 6
; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[TMP77]]
; CHECK-NEXT: [[TMP78:%.*]] = extractelement <16 x i1> [[TMP55]], i32 6
; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_2_5]], i32 [[TMP77]]
; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_1_5]]
; CHECK-NEXT: [[TMP80:%.*]] = or i1 [[CMP12_2_5]], [[TMP76]]
; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_1_5]]
; CHECK-NEXT: [[TMP81:%.*]] = extractelement <16 x i32> [[TMP53]], i32 7
; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[TMP81]]
; CHECK-NEXT: [[TMP82:%.*]] = extractelement <16 x i1> [[TMP55]], i32 7
; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP82]], i32 [[NEG_3_5]], i32 [[TMP81]]
; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP83]], [[SPEC_SELECT8_2_5]]
; CHECK-NEXT: [[TMP84:%.*]] = or i1 [[CMP12_3_5]], [[TMP80]]
; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP84]], i32 5, i32 [[SPEC_SELECT_3_4]]
; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP83]], i32 [[SPEC_SELECT8_2_5]]
; CHECK-NEXT: [[TMP85:%.*]] = extractelement <16 x i32> [[TMP53]], i32 8
; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[TMP85]]
; CHECK-NEXT: [[TMP86:%.*]] = extractelement <16 x i1> [[TMP55]], i32 8
; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_6]], i32 [[TMP85]]
; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_3_5]]
; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP87]], i32 [[SPEC_SELECT8_3_5]]
; CHECK-NEXT: [[TMP88:%.*]] = extractelement <16 x i32> [[TMP53]], i32 9
; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[TMP88]]
; CHECK-NEXT: [[TMP89:%.*]] = extractelement <16 x i1> [[TMP55]], i32 9
; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_1_6]], i32 [[TMP88]]
; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_6]]
; CHECK-NEXT: [[TMP91:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]]
; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_6]]
; CHECK-NEXT: [[TMP92:%.*]] = extractelement <16 x i32> [[TMP53]], i32 10
; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[TMP92]]
; CHECK-NEXT: [[TMP93:%.*]] = extractelement <16 x i1> [[TMP55]], i32 10
; CHECK-NEXT: [[TMP94:%.*]] = select i1 [[TMP93]], i32 [[NEG_2_6]], i32 [[TMP92]]
; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP94]], [[SPEC_SELECT8_1_6]]
; CHECK-NEXT: [[TMP95:%.*]] = or i1 [[CMP12_2_6]], [[TMP91]]
; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP94]], i32 [[SPEC_SELECT8_1_6]]
; CHECK-NEXT: [[TMP96:%.*]] = extractelement <16 x i32> [[TMP53]], i32 11
; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[TMP96]]
; CHECK-NEXT: [[TMP97:%.*]] = extractelement <16 x i1> [[TMP55]], i32 11
; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[TMP96]]
; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_2_6]]
; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP96]]
; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP95]]
; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP99]], i32 6, i32 [[SPEC_SELECT_3_5]]
; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_2_6]]
; CHECK-NEXT: [[SUB_7:%.*]] = sub i32 [[TMP30]], [[TMP25]]
; CHECK-NEXT: [[TMP100:%.*]] = icmp slt i32 [[SUB_7]], 0
; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[SUB_7]]
; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[NEG_7]], i32 [[SUB_7]]
; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP101]], [[SPEC_SELECT8_3_6]]
; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP101]], i32 [[SPEC_SELECT8_3_6]]
; CHECK-NEXT: [[SUB_1_7:%.*]] = sub i32 [[TMP30]], [[TMP26]]
; CHECK-NEXT: [[TMP102:%.*]] = icmp slt i32 [[SUB_1_7]], 0
; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[SUB_1_7]]
; CHECK-NEXT: [[TMP103:%.*]] = select i1 [[TMP102]], i32 [[NEG_1_7]], i32 [[SUB_1_7]]
; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP103]], [[SPEC_SELECT8_7]]
; CHECK-NEXT: [[TMP104:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]]
; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP103]], i32 [[SPEC_SELECT8_7]]
; CHECK-NEXT: [[SUB_2_7:%.*]] = sub i32 [[TMP30]], [[TMP27]]
; CHECK-NEXT: [[TMP105:%.*]] = icmp slt i32 [[SUB_2_7]], 0
; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[SUB_2_7]]
; CHECK-NEXT: [[TMP106:%.*]] = select i1 [[TMP105]], i32 [[NEG_2_7]], i32 [[SUB_2_7]]
; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP106]], [[SPEC_SELECT8_1_7]]
; CHECK-NEXT: [[TMP107:%.*]] = or i1 [[CMP12_2_7]], [[TMP104]]
; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP106]], i32 [[SPEC_SELECT8_1_7]]
; CHECK-NEXT: [[SUB_3_7:%.*]] = sub i32 [[TMP30]], [[TMP28]]
; CHECK-NEXT: [[TMP108:%.*]] = icmp slt i32 [[SUB_3_7]], 0
; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[SUB_3_7]]
; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_3_7]], i32 [[SUB_3_7]]
; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_2_7]]
; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_3_7]], [[TMP107]]
; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP110]], i32 7, i32 [[SPEC_SELECT_3_6]]
; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_2_7]]
; CHECK-NEXT: [[TMP100:%.*]] = extractelement <16 x i32> [[TMP53]], i32 12
; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[TMP100]]
; CHECK-NEXT: [[TMP101:%.*]] = extractelement <16 x i1> [[TMP55]], i32 12
; CHECK-NEXT: [[TMP102:%.*]] = select i1 [[TMP101]], i32 [[NEG_7]], i32 [[TMP100]]
; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP102]], [[SPEC_SELECT8_3_6]]
; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP102]], i32 [[SPEC_SELECT8_3_6]]
; CHECK-NEXT: [[TMP103:%.*]] = extractelement <16 x i32> [[TMP53]], i32 13
; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[TMP103]]
; CHECK-NEXT: [[TMP104:%.*]] = extractelement <16 x i1> [[TMP55]], i32 13
; CHECK-NEXT: [[TMP105:%.*]] = select i1 [[TMP104]], i32 [[NEG_1_7]], i32 [[TMP103]]
; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP105]], [[SPEC_SELECT8_7]]
; CHECK-NEXT: [[TMP106:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]]
; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP105]], i32 [[SPEC_SELECT8_7]]
; CHECK-NEXT: [[TMP107:%.*]] = extractelement <16 x i32> [[TMP53]], i32 14
; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[TMP107]]
; CHECK-NEXT: [[TMP108:%.*]] = extractelement <16 x i1> [[TMP55]], i32 14
; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_2_7]], i32 [[TMP107]]
; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_1_7]]
; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_2_7]], [[TMP106]]
; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_1_7]]
; CHECK-NEXT: [[TMP111:%.*]] = extractelement <16 x i32> [[TMP53]], i32 15
; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[TMP111]]
; CHECK-NEXT: [[TMP112:%.*]] = extractelement <16 x i1> [[TMP55]], i32 15
; CHECK-NEXT: [[TMP113:%.*]] = select i1 [[TMP112]], i32 [[NEG_3_7]], i32 [[TMP111]]
; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP113]], [[SPEC_SELECT8_2_7]]
; CHECK-NEXT: [[TMP114:%.*]] = or i1 [[CMP12_3_7]], [[TMP110]]
; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP114]], i32 7, i32 [[SPEC_SELECT_3_6]]
; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP113]], i32 [[SPEC_SELECT8_2_7]]
; CHECK-NEXT: [[K:%.*]] = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[SPEC_SELECT_3_7]], i32* [[K]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1

View File

@ -0,0 +1,39 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S --slp-vectorizer -mtriple=x86_64-unknown %s | FileCheck %s
define i32 @test(float* %isec, float %0) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[ISEC:%.*]], i64 0
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[ISEC]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX5]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0:%.*]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[CMP61:%.*]] = fcmp fast oge float 0.000000e+00, 0.000000e+00
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[CMP63:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
; CHECK-NEXT: br i1 [[CMP63]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
; CHECK: if.end:
; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:
; CHECK-NEXT: ret i32 0
;
entry:
%arrayidx5 = getelementptr inbounds float, float* %isec, i64 0
%1 = load float, float* %arrayidx5, align 4
%arrayidx10 = getelementptr inbounds float, float* %isec, i64 1
%2 = load float, float* %arrayidx10, align 4
%mul16 = fmul fast float %0, %2
%mul55 = fmul fast float 0.000000e+00, %1
%cmp61 = fcmp fast oge float 0.000000e+00, 0.000000e+00
%cmp63 = fcmp fast ogt float %mul55, %mul16
br i1 %cmp63, label %cleanup, label %if.end
if.end:
br label %cleanup
cleanup:
ret i32 0
}