diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1ad1a7744af0..e1401171924b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9483,6 +9483,59 @@ tryToVectorizeSequence(SmallVectorImpl &Incoming, return Changed; } +/// Compare two cmp instructions. If IsCompatibility is true, function returns +/// true if 2 cmps have same/swapped predicates and mos compatible corresponding +/// operands. If IsCompatibility is false, function implements strict weak +/// ordering relation between two cmp instructions, returning true if the first +/// instruction is "less" than the second, i.e. its predicate is less than the +/// predicate of the second or the operands IDs are less than the operands IDs +/// of the second cmp instruction. +template +static bool compareCmp(Value *V, Value *V2, + function_ref IsDeleted) { + auto *CI1 = cast(V); + auto *CI2 = cast(V2); + if (IsDeleted(CI2) || !isValidElementType(CI2->getType())) + return false; + if (CI1->getOperand(0)->getType()->getTypeID() < + CI2->getOperand(0)->getType()->getTypeID()) + return !IsCompatibility; + if (CI1->getOperand(0)->getType()->getTypeID() > + CI2->getOperand(0)->getType()->getTypeID()) + return false; + CmpInst::Predicate Pred1 = CI1->getPredicate(); + CmpInst::Predicate Pred2 = CI2->getPredicate(); + CmpInst::Predicate SwapPred1 = CmpInst::getSwappedPredicate(Pred1); + CmpInst::Predicate SwapPred2 = CmpInst::getSwappedPredicate(Pred2); + CmpInst::Predicate BasePred1 = std::min(Pred1, SwapPred1); + CmpInst::Predicate BasePred2 = std::min(Pred2, SwapPred2); + if (BasePred1 < BasePred2) + return !IsCompatibility; + if (BasePred1 > BasePred2) + return false; + // Compare operands. + bool LEPreds = Pred1 <= Pred2; + bool GEPreds = Pred1 >= Pred2; + for (int I = 0, E = CI1->getNumOperands(); I < E; ++I) { + auto *Op1 = CI1->getOperand(LEPreds ? I : E - I - 1); + auto *Op2 = CI2->getOperand(GEPreds ? I : E - I - 1); + if (Op1->getValueID() < Op2->getValueID()) + return !IsCompatibility; + if (Op1->getValueID() > Op2->getValueID()) + return false; + if (auto *I1 = dyn_cast(Op1)) + if (auto *I2 = dyn_cast(Op2)) { + if (I1->getParent() != I2->getParent()) + return false; + InstructionsState S = getSameOpcode({I1, I2}); + if (S.getOpcode()) + continue; + return false; + } + } + return IsCompatibility; +} + bool SLPVectorizerPass::vectorizeSimpleInstructions( SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R, bool AtTerminator) { @@ -9514,37 +9567,16 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions( } // Try to vectorize list of compares. // Sort by type, compare predicate, etc. - // TODO: Add analysis on the operand opcodes (profitable to vectorize - // instructions with same/alternate opcodes/const values). auto &&CompareSorter = [&R](Value *V, Value *V2) { - auto *CI1 = cast(V); - auto *CI2 = cast(V2); - if (R.isDeleted(CI2) || !isValidElementType(CI2->getType())) - return false; - if (CI1->getOperand(0)->getType()->getTypeID() < - CI2->getOperand(0)->getType()->getTypeID()) - return true; - if (CI1->getOperand(0)->getType()->getTypeID() > - CI2->getOperand(0)->getType()->getTypeID()) - return false; - return CI1->getPredicate() < CI2->getPredicate() || - (CI1->getPredicate() > CI2->getPredicate() && - CI1->getPredicate() < - CmpInst::getSwappedPredicate(CI2->getPredicate())); + return compareCmp(V, V2, + [&R](Instruction *I) { return R.isDeleted(I); }); }; auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) { if (V1 == V2) return true; - auto *CI1 = cast(V1); - auto *CI2 = cast(V2); - if (R.isDeleted(CI2) || !isValidElementType(CI2->getType())) - return false; - if (CI1->getOperand(0)->getType() != CI2->getOperand(0)->getType()) - return false; - return CI1->getPredicate() == CI2->getPredicate() || - CI1->getPredicate() == - CmpInst::getSwappedPredicate(CI2->getPredicate()); + return compareCmp(V1, V2, + [&R](Instruction *I) { return R.isDeleted(I); }); }; auto Limit = [&R](Value *V) { unsigned EltSize = R.getVectorElementSize(V); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll index 24aded3bea51..12d88ffea828 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll @@ -21,233 +21,222 @@ define void @n() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 1), align 4 ; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 2), align 8 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 3), align 4 -; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 0), align 16 -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 1), align 4 -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 2), align 8 -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 3), align 4 -; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 0), align 16 -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 1), align 4 -; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 2), align 8 -; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 3), align 4 -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 0), align 16 -; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 1), align 4 -; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 2), align 8 -; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 3), align 4 -; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 0), align 16 -; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 1), align 4 -; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 2), align 8 -; CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 3), align 4 +; CHECK-NEXT: [[TMP13:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 0) to <16 x i32>*), align 16 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[B_0:%.*]] = phi i32 [ [[SPEC_SELECT8_3_7:%.*]], [[FOR_COND]] ], [ undef, [[ENTRY]] ] -; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], -183 -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP31]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP32:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]] -; CHECK-NEXT: [[TMP33:%.*]] = icmp slt <4 x i32> [[TMP32]], zeroinitializer -; CHECK-NEXT: [[TMP34:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP32]] -; CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP33]], <4 x i32> [[TMP34]], <4 x i32> [[TMP32]] -; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP35]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP36]], [[B_0]] -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP36]], i32 [[B_0]] -; CHECK-NEXT: [[SUB_116:%.*]] = sub i32 [[TMP30]], [[TMP1]] -; CHECK-NEXT: [[TMP37:%.*]] = icmp slt i32 [[SUB_116]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -183 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp slt <4 x i32> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP20]]) +; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP21]], [[B_0]] +; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP21]], i32 [[B_0]] +; CHECK-NEXT: [[SUB_116:%.*]] = sub i32 [[TMP15]], [[TMP1]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp slt i32 [[SUB_116]], 0 ; CHECK-NEXT: [[NEG_117:%.*]] = sub nsw i32 0, [[SUB_116]] -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[NEG_117]], i32 [[SUB_116]] -; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP38]], [[OP_EXTRA1]] -; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP38]], i32 [[OP_EXTRA1]] -; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP30]], [[TMP2]] -; CHECK-NEXT: [[TMP39:%.*]] = icmp slt i32 [[SUB_1_1]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[NEG_117]], i32 [[SUB_116]] +; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP23]], [[OP_EXTRA1]] +; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP23]], i32 [[OP_EXTRA1]] +; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP15]], [[TMP2]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[SUB_1_1]], 0 ; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] -; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP40]], [[SPEC_SELECT8_120]] +; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] +; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP25]], [[SPEC_SELECT8_120]] ; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[CMP12_118]] -; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP40]], i32 [[SPEC_SELECT8_120]] -; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]] -; CHECK-NEXT: [[TMP41:%.*]] = icmp slt i32 [[SUB_2_1]], 0 +; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP25]], i32 [[SPEC_SELECT8_120]] +; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP15]], [[TMP3]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp slt i32 [[SUB_2_1]], 0 ; CHECK-NEXT: [[NEG_2_1:%.*]] = sub nsw i32 0, [[SUB_2_1]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_2_1]], i32 [[SUB_2_1]] -; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_1_1]] +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[NEG_2_1]], i32 [[SUB_2_1]] +; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP27]], [[SPEC_SELECT8_1_1]] ; CHECK-NEXT: [[NARROW34:%.*]] = or i1 [[CMP12_2_1]], [[NARROW]] -; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP42]], i32 [[SPEC_SELECT8_1_1]] -; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP30]], [[TMP4]] -; CHECK-NEXT: [[TMP43:%.*]] = icmp slt i32 [[SUB_3_1]], 0 +; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP27]], i32 [[SPEC_SELECT8_1_1]] +; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp slt i32 [[SUB_3_1]], 0 ; CHECK-NEXT: [[NEG_3_1:%.*]] = sub nsw i32 0, [[SUB_3_1]] -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_3_1]], i32 [[SUB_3_1]] -; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_2_1]] +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[NEG_3_1]], i32 [[SUB_3_1]] +; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP29]], [[SPEC_SELECT8_2_1]] ; CHECK-NEXT: [[NARROW35:%.*]] = or i1 [[CMP12_3_1]], [[NARROW34]] ; CHECK-NEXT: [[SPEC_SELECT_3_1:%.*]] = zext i1 [[NARROW35]] to i32 -; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP44]], i32 [[SPEC_SELECT8_2_1]] -; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP30]], [[TMP5]] -; CHECK-NEXT: [[TMP45:%.*]] = icmp slt i32 [[SUB_222]], 0 +; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP29]], i32 [[SPEC_SELECT8_2_1]] +; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP15]], [[TMP5]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp slt i32 [[SUB_222]], 0 ; CHECK-NEXT: [[NEG_223:%.*]] = sub nsw i32 0, [[SUB_222]] -; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_223]], i32 [[SUB_222]] -; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP46]], [[SPEC_SELECT8_3_1]] -; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP46]], i32 [[SPEC_SELECT8_3_1]] -; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP30]], [[TMP6]] -; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[SUB_1_2]], 0 +; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[NEG_223]], i32 [[SUB_222]] +; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP31]], [[SPEC_SELECT8_3_1]] +; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP31]], i32 [[SPEC_SELECT8_3_1]] +; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP15]], [[TMP6]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp slt i32 [[SUB_1_2]], 0 ; CHECK-NEXT: [[NEG_1_2:%.*]] = sub nsw i32 0, [[SUB_1_2]] -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[NEG_1_2]], i32 [[SUB_1_2]] -; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP48]], [[SPEC_SELECT8_226]] -; CHECK-NEXT: [[TMP49:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]] -; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP48]], i32 [[SPEC_SELECT8_226]] -; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP30]], [[TMP7]] -; CHECK-NEXT: [[TMP50:%.*]] = icmp slt i32 [[SUB_2_2]], 0 +; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[NEG_1_2]], i32 [[SUB_1_2]] +; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP33]], [[SPEC_SELECT8_226]] +; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]] +; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP33]], i32 [[SPEC_SELECT8_226]] +; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP15]], [[TMP7]] +; CHECK-NEXT: [[TMP35:%.*]] = icmp slt i32 [[SUB_2_2]], 0 ; CHECK-NEXT: [[NEG_2_2:%.*]] = sub nsw i32 0, [[SUB_2_2]] -; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[NEG_2_2]], i32 [[SUB_2_2]] -; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP51]], [[SPEC_SELECT8_1_2]] -; CHECK-NEXT: [[TMP52:%.*]] = or i1 [[CMP12_2_2]], [[TMP49]] -; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP51]], i32 [[SPEC_SELECT8_1_2]] -; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP30]], [[TMP8]] -; CHECK-NEXT: [[TMP53:%.*]] = icmp slt i32 [[SUB_3_2]], 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[NEG_2_2]], i32 [[SUB_2_2]] +; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP36]], [[SPEC_SELECT8_1_2]] +; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[CMP12_2_2]], [[TMP34]] +; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP36]], i32 [[SPEC_SELECT8_1_2]] +; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP15]], [[TMP8]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp slt i32 [[SUB_3_2]], 0 ; CHECK-NEXT: [[NEG_3_2:%.*]] = sub nsw i32 0, [[SUB_3_2]] -; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[NEG_3_2]], i32 [[SUB_3_2]] -; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP54]], [[SPEC_SELECT8_2_2]] -; CHECK-NEXT: [[TMP55:%.*]] = or i1 [[CMP12_3_2]], [[TMP52]] -; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP55]], i32 2, i32 [[SPEC_SELECT_3_1]] -; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP54]], i32 [[SPEC_SELECT8_2_2]] -; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP30]], [[TMP9]] -; CHECK-NEXT: [[TMP56:%.*]] = icmp slt i32 [[SUB_328]], 0 +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[NEG_3_2]], i32 [[SUB_3_2]] +; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP39]], [[SPEC_SELECT8_2_2]] +; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[CMP12_3_2]], [[TMP37]] +; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP40]], i32 2, i32 [[SPEC_SELECT_3_1]] +; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP39]], i32 [[SPEC_SELECT8_2_2]] +; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP15]], [[TMP9]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp slt i32 [[SUB_328]], 0 ; CHECK-NEXT: [[NEG_329:%.*]] = sub nsw i32 0, [[SUB_328]] -; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_329]], i32 [[SUB_328]] -; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_2]] -; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_2]] -; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP30]], [[TMP10]] -; CHECK-NEXT: [[TMP58:%.*]] = icmp slt i32 [[SUB_1_3]], 0 +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_329]], i32 [[SUB_328]] +; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_3_2]] +; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP42]], i32 [[SPEC_SELECT8_3_2]] +; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP43:%.*]] = icmp slt i32 [[SUB_1_3]], 0 ; CHECK-NEXT: [[NEG_1_3:%.*]] = sub nsw i32 0, [[SUB_1_3]] -; CHECK-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], i32 [[NEG_1_3]], i32 [[SUB_1_3]] -; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP59]], [[SPEC_SELECT8_332]] -; CHECK-NEXT: [[TMP60:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]] -; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP59]], i32 [[SPEC_SELECT8_332]] -; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP30]], [[TMP11]] -; CHECK-NEXT: [[TMP61:%.*]] = icmp slt i32 [[SUB_2_3]], 0 +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_1_3]], i32 [[SUB_1_3]] +; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_332]] +; CHECK-NEXT: [[TMP45:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]] +; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP44]], i32 [[SPEC_SELECT8_332]] +; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP15]], [[TMP11]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp slt i32 [[SUB_2_3]], 0 ; CHECK-NEXT: [[NEG_2_3:%.*]] = sub nsw i32 0, [[SUB_2_3]] -; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[NEG_2_3]], i32 [[SUB_2_3]] -; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP62]], [[SPEC_SELECT8_1_3]] -; CHECK-NEXT: [[TMP63:%.*]] = or i1 [[CMP12_2_3]], [[TMP60]] -; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP62]], i32 [[SPEC_SELECT8_1_3]] -; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP30]], [[TMP12]] -; CHECK-NEXT: [[TMP64:%.*]] = icmp slt i32 [[SUB_3_3]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_2_3]], i32 [[SUB_2_3]] +; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_1_3]] +; CHECK-NEXT: [[TMP48:%.*]] = or i1 [[CMP12_2_3]], [[TMP45]] +; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP47]], i32 [[SPEC_SELECT8_1_3]] +; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP15]], [[TMP12]] +; CHECK-NEXT: [[TMP49:%.*]] = icmp slt i32 [[SUB_3_3]], 0 ; CHECK-NEXT: [[NEG_3_3:%.*]] = sub nsw i32 0, [[SUB_3_3]] -; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], i32 [[NEG_3_3]], i32 [[SUB_3_3]] -; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP65]], [[SPEC_SELECT8_2_3]] -; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[CMP12_3_3]], [[TMP63]] -; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP66]], i32 3, i32 [[SPEC_SELECT_3_2]] -; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP65]], i32 [[SPEC_SELECT8_2_3]] -; CHECK-NEXT: [[SUB_4:%.*]] = sub i32 [[TMP30]], [[TMP13]] -; CHECK-NEXT: [[TMP67:%.*]] = icmp slt i32 [[SUB_4]], 0 -; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[SUB_4]] -; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_4]], i32 [[SUB_4]] -; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_3_3]] -; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_3_3]] -; CHECK-NEXT: [[SUB_1_4:%.*]] = sub i32 [[TMP30]], [[TMP14]] -; CHECK-NEXT: [[TMP69:%.*]] = icmp slt i32 [[SUB_1_4]], 0 -; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[SUB_1_4]] -; CHECK-NEXT: [[TMP70:%.*]] = select i1 [[TMP69]], i32 [[NEG_1_4]], i32 [[SUB_1_4]] -; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP70]], [[SPEC_SELECT8_4]] -; CHECK-NEXT: [[TMP71:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]] -; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP70]], i32 [[SPEC_SELECT8_4]] -; CHECK-NEXT: [[SUB_2_4:%.*]] = sub i32 [[TMP30]], [[TMP15]] -; CHECK-NEXT: [[TMP72:%.*]] = icmp slt i32 [[SUB_2_4]], 0 -; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[SUB_2_4]] -; CHECK-NEXT: [[TMP73:%.*]] = select i1 [[TMP72]], i32 [[NEG_2_4]], i32 [[SUB_2_4]] -; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP73]], [[SPEC_SELECT8_1_4]] -; CHECK-NEXT: [[TMP74:%.*]] = or i1 [[CMP12_2_4]], [[TMP71]] -; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP73]], i32 [[SPEC_SELECT8_1_4]] -; CHECK-NEXT: [[SUB_3_4:%.*]] = sub i32 [[TMP30]], [[TMP16]] -; CHECK-NEXT: [[TMP75:%.*]] = icmp slt i32 [[SUB_3_4]], 0 -; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[SUB_3_4]] -; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP75]], i32 [[NEG_3_4]], i32 [[SUB_3_4]] -; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP76]], [[SPEC_SELECT8_2_4]] -; CHECK-NEXT: [[TMP77:%.*]] = or i1 [[CMP12_3_4]], [[TMP74]] -; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP77]], i32 4, i32 [[SPEC_SELECT_3_3]] -; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP76]], i32 [[SPEC_SELECT8_2_4]] -; CHECK-NEXT: [[SUB_5:%.*]] = sub i32 [[TMP30]], [[TMP17]] -; CHECK-NEXT: [[TMP78:%.*]] = icmp slt i32 [[SUB_5]], 0 -; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[SUB_5]] -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_5]], i32 [[SUB_5]] -; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_3_4]] -; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_3_4]] -; CHECK-NEXT: [[SUB_1_5:%.*]] = sub i32 [[TMP30]], [[TMP18]] -; CHECK-NEXT: [[TMP80:%.*]] = icmp slt i32 [[SUB_1_5]], 0 -; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[SUB_1_5]] -; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[NEG_1_5]], i32 [[SUB_1_5]] -; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP81]], [[SPEC_SELECT8_5]] -; CHECK-NEXT: [[TMP82:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]] -; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP81]], i32 [[SPEC_SELECT8_5]] -; CHECK-NEXT: [[SUB_2_5:%.*]] = sub i32 [[TMP30]], [[TMP19]] -; CHECK-NEXT: [[TMP83:%.*]] = icmp slt i32 [[SUB_2_5]], 0 -; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[SUB_2_5]] -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[NEG_2_5]], i32 [[SUB_2_5]] -; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP84]], [[SPEC_SELECT8_1_5]] -; CHECK-NEXT: [[TMP85:%.*]] = or i1 [[CMP12_2_5]], [[TMP82]] -; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP84]], i32 [[SPEC_SELECT8_1_5]] -; CHECK-NEXT: [[SUB_3_5:%.*]] = sub i32 [[TMP30]], [[TMP20]] -; CHECK-NEXT: [[TMP86:%.*]] = icmp slt i32 [[SUB_3_5]], 0 -; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[SUB_3_5]] -; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_3_5]], i32 [[SUB_3_5]] -; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_2_5]] -; CHECK-NEXT: [[TMP88:%.*]] = or i1 [[CMP12_3_5]], [[TMP85]] -; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP88]], i32 5, i32 [[SPEC_SELECT_3_4]] -; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP87]], i32 [[SPEC_SELECT8_2_5]] -; CHECK-NEXT: [[SUB_6:%.*]] = sub i32 [[TMP30]], [[TMP21]] -; CHECK-NEXT: [[TMP89:%.*]] = icmp slt i32 [[SUB_6]], 0 -; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[SUB_6]] -; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_6]], i32 [[SUB_6]] -; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_3_5]] -; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_3_5]] -; CHECK-NEXT: [[SUB_1_6:%.*]] = sub i32 [[TMP30]], [[TMP22]] -; CHECK-NEXT: [[TMP91:%.*]] = icmp slt i32 [[SUB_1_6]], 0 -; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[SUB_1_6]] -; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], i32 [[NEG_1_6]], i32 [[SUB_1_6]] -; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP92]], [[SPEC_SELECT8_6]] -; CHECK-NEXT: [[TMP93:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]] -; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP92]], i32 [[SPEC_SELECT8_6]] -; CHECK-NEXT: [[SUB_2_6:%.*]] = sub i32 [[TMP30]], [[TMP23]] -; CHECK-NEXT: [[TMP94:%.*]] = icmp slt i32 [[SUB_2_6]], 0 -; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[SUB_2_6]] -; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[NEG_2_6]], i32 [[SUB_2_6]] -; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP95]], [[SPEC_SELECT8_1_6]] -; CHECK-NEXT: [[TMP96:%.*]] = or i1 [[CMP12_2_6]], [[TMP93]] -; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP95]], i32 [[SPEC_SELECT8_1_6]] -; CHECK-NEXT: [[SUB_3_6:%.*]] = sub i32 [[TMP30]], [[TMP24]] -; CHECK-NEXT: [[TMP97:%.*]] = icmp slt i32 [[SUB_3_6]], 0 -; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[SUB_3_6]] -; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[SUB_3_6]] +; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[NEG_3_3]], i32 [[SUB_3_3]] +; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP50]], [[SPEC_SELECT8_2_3]] +; CHECK-NEXT: [[TMP51:%.*]] = or i1 [[CMP12_3_3]], [[TMP48]] +; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP51]], i32 3, i32 [[SPEC_SELECT_3_2]] +; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP50]], i32 [[SPEC_SELECT8_2_3]] +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x i32> poison, i32 [[TMP15]], i32 0 +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i32> [[TMP52]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP53:%.*]] = sub <16 x i32> [[SHUFFLE2]], [[TMP13]] +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i32> [[TMP53]], i32 0 +; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[TMP54]] +; CHECK-NEXT: [[TMP55:%.*]] = icmp slt <16 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP55]], i32 0 +; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_4]], i32 [[TMP54]] +; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_3]] +; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_3]] +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i32> [[TMP53]], i32 1 +; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[TMP58]] +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP55]], i32 1 +; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_1_4]], i32 [[TMP58]] +; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_4]] +; CHECK-NEXT: [[TMP61:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]] +; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP60]], i32 [[SPEC_SELECT8_4]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i32> [[TMP53]], i32 2 +; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[TMP62]] +; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP55]], i32 2 +; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP63]], i32 [[NEG_2_4]], i32 [[TMP62]] +; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP64]], [[SPEC_SELECT8_1_4]] +; CHECK-NEXT: [[TMP65:%.*]] = or i1 [[CMP12_2_4]], [[TMP61]] +; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP64]], i32 [[SPEC_SELECT8_1_4]] +; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i32> [[TMP53]], i32 3 +; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[TMP66]] +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP55]], i32 3 +; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_3_4]], i32 [[TMP66]] +; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_2_4]] +; CHECK-NEXT: [[TMP69:%.*]] = or i1 [[CMP12_3_4]], [[TMP65]] +; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP69]], i32 4, i32 [[SPEC_SELECT_3_3]] +; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_2_4]] +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i32> [[TMP53]], i32 4 +; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[TMP70]] +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <16 x i1> [[TMP55]], i32 4 +; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[NEG_5]], i32 [[TMP70]] +; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP72]], [[SPEC_SELECT8_3_4]] +; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP72]], i32 [[SPEC_SELECT8_3_4]] +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i32> [[TMP53]], i32 5 +; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[TMP73]] +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP55]], i32 5 +; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[NEG_1_5]], i32 [[TMP73]] +; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP75]], [[SPEC_SELECT8_5]] +; CHECK-NEXT: [[TMP76:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]] +; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP75]], i32 [[SPEC_SELECT8_5]] +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i32> [[TMP53]], i32 6 +; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[TMP77]] +; CHECK-NEXT: [[TMP78:%.*]] = extractelement <16 x i1> [[TMP55]], i32 6 +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_2_5]], i32 [[TMP77]] +; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_1_5]] +; CHECK-NEXT: [[TMP80:%.*]] = or i1 [[CMP12_2_5]], [[TMP76]] +; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_1_5]] +; CHECK-NEXT: [[TMP81:%.*]] = extractelement <16 x i32> [[TMP53]], i32 7 +; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[TMP81]] +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <16 x i1> [[TMP55]], i32 7 +; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP82]], i32 [[NEG_3_5]], i32 [[TMP81]] +; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP83]], [[SPEC_SELECT8_2_5]] +; CHECK-NEXT: [[TMP84:%.*]] = or i1 [[CMP12_3_5]], [[TMP80]] +; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP84]], i32 5, i32 [[SPEC_SELECT_3_4]] +; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP83]], i32 [[SPEC_SELECT8_2_5]] +; CHECK-NEXT: [[TMP85:%.*]] = extractelement <16 x i32> [[TMP53]], i32 8 +; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[TMP85]] +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <16 x i1> [[TMP55]], i32 8 +; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_6]], i32 [[TMP85]] +; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_3_5]] +; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP87]], i32 [[SPEC_SELECT8_3_5]] +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <16 x i32> [[TMP53]], i32 9 +; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[TMP88]] +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <16 x i1> [[TMP55]], i32 9 +; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_1_6]], i32 [[TMP88]] +; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_6]] +; CHECK-NEXT: [[TMP91:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]] +; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_6]] +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <16 x i32> [[TMP53]], i32 10 +; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[TMP92]] +; CHECK-NEXT: [[TMP93:%.*]] = extractelement <16 x i1> [[TMP55]], i32 10 +; CHECK-NEXT: [[TMP94:%.*]] = select i1 [[TMP93]], i32 [[NEG_2_6]], i32 [[TMP92]] +; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP94]], [[SPEC_SELECT8_1_6]] +; CHECK-NEXT: [[TMP95:%.*]] = or i1 [[CMP12_2_6]], [[TMP91]] +; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP94]], i32 [[SPEC_SELECT8_1_6]] +; CHECK-NEXT: [[TMP96:%.*]] = extractelement <16 x i32> [[TMP53]], i32 11 +; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[TMP96]] +; CHECK-NEXT: [[TMP97:%.*]] = extractelement <16 x i1> [[TMP55]], i32 11 +; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[TMP96]] ; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_2_6]] -; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP96]] +; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP95]] ; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP99]], i32 6, i32 [[SPEC_SELECT_3_5]] ; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_2_6]] -; CHECK-NEXT: [[SUB_7:%.*]] = sub i32 [[TMP30]], [[TMP25]] -; CHECK-NEXT: [[TMP100:%.*]] = icmp slt i32 [[SUB_7]], 0 -; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[SUB_7]] -; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[NEG_7]], i32 [[SUB_7]] -; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP101]], [[SPEC_SELECT8_3_6]] -; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP101]], i32 [[SPEC_SELECT8_3_6]] -; CHECK-NEXT: [[SUB_1_7:%.*]] = sub i32 [[TMP30]], [[TMP26]] -; CHECK-NEXT: [[TMP102:%.*]] = icmp slt i32 [[SUB_1_7]], 0 -; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[SUB_1_7]] -; CHECK-NEXT: [[TMP103:%.*]] = select i1 [[TMP102]], i32 [[NEG_1_7]], i32 [[SUB_1_7]] -; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP103]], [[SPEC_SELECT8_7]] -; CHECK-NEXT: [[TMP104:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]] -; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP103]], i32 [[SPEC_SELECT8_7]] -; CHECK-NEXT: [[SUB_2_7:%.*]] = sub i32 [[TMP30]], [[TMP27]] -; CHECK-NEXT: [[TMP105:%.*]] = icmp slt i32 [[SUB_2_7]], 0 -; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[SUB_2_7]] -; CHECK-NEXT: [[TMP106:%.*]] = select i1 [[TMP105]], i32 [[NEG_2_7]], i32 [[SUB_2_7]] -; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP106]], [[SPEC_SELECT8_1_7]] -; CHECK-NEXT: [[TMP107:%.*]] = or i1 [[CMP12_2_7]], [[TMP104]] -; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP106]], i32 [[SPEC_SELECT8_1_7]] -; CHECK-NEXT: [[SUB_3_7:%.*]] = sub i32 [[TMP30]], [[TMP28]] -; CHECK-NEXT: [[TMP108:%.*]] = icmp slt i32 [[SUB_3_7]], 0 -; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[SUB_3_7]] -; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_3_7]], i32 [[SUB_3_7]] -; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_2_7]] -; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_3_7]], [[TMP107]] -; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP110]], i32 7, i32 [[SPEC_SELECT_3_6]] -; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_2_7]] +; CHECK-NEXT: [[TMP100:%.*]] = extractelement <16 x i32> [[TMP53]], i32 12 +; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[TMP100]] +; CHECK-NEXT: [[TMP101:%.*]] = extractelement <16 x i1> [[TMP55]], i32 12 +; CHECK-NEXT: [[TMP102:%.*]] = select i1 [[TMP101]], i32 [[NEG_7]], i32 [[TMP100]] +; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP102]], [[SPEC_SELECT8_3_6]] +; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP102]], i32 [[SPEC_SELECT8_3_6]] +; CHECK-NEXT: [[TMP103:%.*]] = extractelement <16 x i32> [[TMP53]], i32 13 +; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[TMP103]] +; CHECK-NEXT: [[TMP104:%.*]] = extractelement <16 x i1> [[TMP55]], i32 13 +; CHECK-NEXT: [[TMP105:%.*]] = select i1 [[TMP104]], i32 [[NEG_1_7]], i32 [[TMP103]] +; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP105]], [[SPEC_SELECT8_7]] +; CHECK-NEXT: [[TMP106:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]] +; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP105]], i32 [[SPEC_SELECT8_7]] +; CHECK-NEXT: [[TMP107:%.*]] = extractelement <16 x i32> [[TMP53]], i32 14 +; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[TMP107]] +; CHECK-NEXT: [[TMP108:%.*]] = extractelement <16 x i1> [[TMP55]], i32 14 +; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_2_7]], i32 [[TMP107]] +; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_1_7]] +; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_2_7]], [[TMP106]] +; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_1_7]] +; CHECK-NEXT: [[TMP111:%.*]] = extractelement <16 x i32> [[TMP53]], i32 15 +; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[TMP111]] +; CHECK-NEXT: [[TMP112:%.*]] = extractelement <16 x i1> [[TMP55]], i32 15 +; CHECK-NEXT: [[TMP113:%.*]] = select i1 [[TMP112]], i32 [[NEG_3_7]], i32 [[TMP111]] +; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP113]], [[SPEC_SELECT8_2_7]] +; CHECK-NEXT: [[TMP114:%.*]] = or i1 [[CMP12_3_7]], [[TMP110]] +; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP114]], i32 7, i32 [[SPEC_SELECT_3_6]] +; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP113]], i32 [[SPEC_SELECT8_2_7]] ; CHECK-NEXT: [[K:%.*]] = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[SPEC_SELECT_3_7]], i32* [[K]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll new file mode 100644 index 000000000000..2800fcefe945 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S --slp-vectorizer -mtriple=x86_64-unknown %s | FileCheck %s + +define i32 @test(float* %isec, float %0) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[ISEC:%.*]], i64 0 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[ISEC]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX5]] to <2 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> , float [[TMP0:%.*]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[CMP61:%.*]] = fcmp fast oge float 0.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; CHECK-NEXT: [[CMP63:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[CMP63]], label [[CLEANUP:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: ret i32 0 +; +entry: + %arrayidx5 = getelementptr inbounds float, float* %isec, i64 0 + %1 = load float, float* %arrayidx5, align 4 + %arrayidx10 = getelementptr inbounds float, float* %isec, i64 1 + %2 = load float, float* %arrayidx10, align 4 + %mul16 = fmul fast float %0, %2 + %mul55 = fmul fast float 0.000000e+00, %1 + %cmp61 = fcmp fast oge float 0.000000e+00, 0.000000e+00 + %cmp63 = fcmp fast ogt float %mul55, %mul16 + br i1 %cmp63, label %cleanup, label %if.end + +if.end: + br label %cleanup + +cleanup: + ret i32 0 +}