[SLP,LV] Use poison constant vector for shufflevector/initial insertelement

This patch makes SLP and LV emit operations with initial vectors set to poison constant instead of undef.
This is a part of efforts for using poison vector instead of undef to represent "doesn't care" vector.
The goal is to make nice shufflevector optimizations valid that is currently incorrect due to the tricky interaction between undef and poison (see https://bugs.llvm.org/show_bug.cgi?id=44185 ).

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D94061
This commit is contained in:
Juneyoung Lee 2021-01-05 14:11:50 +09:00
parent 4ae7952e2b
commit 4a8e6ed2f7
102 changed files with 865 additions and 815 deletions

View File

@ -2405,10 +2405,10 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
VectorValue = getBroadcastInstrs(ScalarValue);
VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
} else {
// Initialize packing with insertelements to start from undef.
// Initialize packing with insertelements to start from poison.
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
Value *Undef = UndefValue::get(VectorType::get(V->getType(), VF));
VectorLoopValueMap.setVectorValue(V, Part, Undef);
Value *Poison = PoisonValue::get(VectorType::get(V->getType(), VF));
VectorLoopValueMap.setVectorValue(V, Part, Poison);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorValue(V, {Part, Lane});
VectorValue = VectorLoopValueMap.getVectorValue(V, Part);
@ -2503,9 +2503,9 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
// }
// To:
// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
// %R.vec = shuffle %wide.vec, undef, <0, 3, 6, 9> ; R elements
// %G.vec = shuffle %wide.vec, undef, <1, 4, 7, 10> ; G elements
// %B.vec = shuffle %wide.vec, undef, <2, 5, 8, 11> ; B elements
// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
//
// Or translate following interleaved store group (factor = 3):
// for (i = 0; i < N; i+=3) {
@ -2516,7 +2516,7 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
// }
// To:
// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
// %B_U.vec = shuffle %B.vec, undef, <0, 1, 2, 3, u, u, u, u>
// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
@ -2581,7 +2581,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}
setDebugLocFromInst(Builder, Instr);
Value *UndefVec = UndefValue::get(VecTy);
Value *PoisonVec = PoisonValue::get(VecTy);
Value *MaskForGaps = nullptr;
if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) {
@ -2614,7 +2614,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}
NewLoad =
Builder.CreateMaskedLoad(AddrParts[Part], Group->getAlign(),
GroupMask, UndefVec, "wide.masked.vec");
GroupMask, PoisonVec, "wide.masked.vec");
}
else
NewLoad = Builder.CreateAlignedLoad(VecTy, AddrParts[Part],
@ -2832,7 +2832,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(
auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0}));
if (isMaskRequired)
NewLI = Builder.CreateMaskedLoad(
VecPtr, Alignment, BlockInMaskParts[Part], UndefValue::get(DataTy),
VecPtr, Alignment, BlockInMaskParts[Part], PoisonValue::get(DataTy),
"wide.masked.load");
else
NewLI =
@ -4026,7 +4026,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
VectorInit = Builder.CreateInsertElement(
UndefValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit,
PoisonValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit,
Builder.getInt32(VF.getKnownMinValue() - 1), "vector.recur.init");
}
@ -8796,13 +8796,13 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
*State.Instance, IsPredicated, State);
// Insert scalar instance packing it into a vector.
if (AlsoPack && State.VF.isVector()) {
// If we're constructing lane 0, initialize to start from undef.
// If we're constructing lane 0, initialize to start from poison.
if (State.Instance->Lane == 0) {
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
Value *Undef = UndefValue::get(
Value *Poison = PoisonValue::get(
VectorType::get(getUnderlyingValue()->getType(), State.VF));
State.ValueMap.setVectorValue(getUnderlyingInstr(),
State.Instance->Part, Undef);
State.Instance->Part, Poison);
}
State.ILV->packScalarIntoVectorValue(getUnderlyingInstr(),
*State.Instance);
@ -8875,7 +8875,7 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
} else {
Type *PredInstType = PredInst->getType();
PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
Phi->addIncoming(UndefValue::get(ScalarPredInst->getType()), PredicatingBB);
Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), PredicatingBB);
Phi->addIncoming(ScalarPredInst, PredicatedBB);
State.ValueMap.resetScalarValue(PredInst, *State.Instance, Phi);
}

View File

@ -257,7 +257,7 @@ static bool isCommutative(Instruction *I) {
/// %x3x3 = mul i8 %x3, %x3
/// %y1y1 = mul i8 %y1, %y1
/// %y2y2 = mul i8 %y2, %y2
/// %ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0
/// %ins1 = insertelement <4 x i8> poison, i8 %x0x0, i32 0
/// %ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
/// %ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
/// %ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
@ -272,13 +272,13 @@ static bool isCommutative(Instruction *I) {
/// %x3 = extractelement <4 x i8> %x, i32 3
/// %y1 = extractelement <4 x i8> %y, i32 1
/// %y2 = extractelement <4 x i8> %y, i32 2
/// %1 = insertelement <4 x i8> undef, i8 %x0, i32 0
/// %1 = insertelement <4 x i8> poison, i8 %x0, i32 0
/// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
/// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2
/// %4 = insertelement <4 x i8> %3, i8 %y2, i32 3
/// %5 = mul <4 x i8> %4, %4
/// %6 = extractelement <4 x i8> %5, i32 0
/// %ins1 = insertelement <4 x i8> undef, i8 %6, i32 0
/// %ins1 = insertelement <4 x i8> poison, i8 %6, i32 0
/// %7 = extractelement <4 x i8> %5, i32 1
/// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1
/// %8 = extractelement <4 x i8> %5, i32 2
@ -311,7 +311,7 @@ isShuffle(ArrayRef<Value *> VL) {
if (Idx->getValue().uge(Size))
continue;
unsigned IntIdx = Idx->getValue().getZExtValue();
// We can extractelement from undef vector.
// We can extractelement from undef or poison vector.
if (isa<UndefValue>(Vec))
continue;
// For correct shuffling we have to have at most 2 different vector operands
@ -4229,7 +4229,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
Value *Val0 =
isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size());
Value *Vec = UndefValue::get(VecTy);
Value *Vec = PoisonValue::get(VecTy);
unsigned InsIndex = 0;
for (Value *Val : VL) {
Vec = Builder.CreateInsertElement(Vec, Val, Builder.getInt32(InsIndex++));
@ -6271,7 +6271,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// part should also switch to same interface.
// For example, the following case is projected code after SLP:
// %4 = extractelement <4 x i64> %3, i32 0
// %v0 = insertelement <4 x i64> undef, i64 %4, i32 0
// %v0 = insertelement <4 x i64> poison, i64 %4, i32 0
// %5 = extractelement <4 x i64> %3, i32 1
// %v1 = insertelement <4 x i64> %v0, i64 %5, i32 1
// %6 = extractelement <4 x i64> %3, i32 2
@ -7287,7 +7287,7 @@ static bool findBuildAggregate_rec(Instruction *LastInsertInst,
}
/// Recognize construction of vectors like
/// %ra = insertelement <4 x float> undef, float %s0, i32 0
/// %ra = insertelement <4 x float> poison, float %s0, i32 0
/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
/// %rd = insertelement <4 x float> %rc, float %s3, i32 3

View File

@ -29,10 +29,10 @@ target triple = "aarch64--linux-gnu"
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], %x
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> undef, i64 [[TMP7]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE]]
; CHECK: [[PRED_UDIV_CONTINUE]]:
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i64> [ undef, %vector.body ], [ [[TMP8]], %[[PRED_UDIV_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i64> [ poison, %vector.body ], [ [[TMP8]], %[[PRED_UDIV_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2]]
; CHECK: [[PRED_UDIV_IF1]]:

View File

@ -19,11 +19,11 @@
; FORCED-NEXT: %0 = add i32 %index, 0
; FORCED-NEXT: %1 = extractvalue { i64, i64 } %sv, 0
; FORCED-NEXT: %2 = extractvalue { i64, i64 } %sv, 0
; FORCED-NEXT: %3 = insertelement <2 x i64> undef, i64 %1, i32 0
; FORCED-NEXT: %3 = insertelement <2 x i64> poison, i64 %1, i32 0
; FORCED-NEXT: %4 = insertelement <2 x i64> %3, i64 %2, i32 1
; FORCED-NEXT: %5 = extractvalue { i64, i64 } %sv, 1
; FORCED-NEXT: %6 = extractvalue { i64, i64 } %sv, 1
; FORCED-NEXT: %7 = insertelement <2 x i64> undef, i64 %5, i32 0
; FORCED-NEXT: %7 = insertelement <2 x i64> poison, i64 %5, i32 0
; FORCED-NEXT: %8 = insertelement <2 x i64> %7, i64 %6, i32 1
; FORCED-NEXT: %9 = getelementptr i64, i64* %dst, i32 %0
; FORCED-NEXT: %10 = add <2 x i64> %4, %8
@ -68,11 +68,11 @@ declare float @pow(float, float) readnone nounwind
; FORCED-NEXT: %0 = add i32 %index, 0
; FORCED-NEXT: %1 = extractvalue { float, float } %sv, 0
; FORCED-NEXT: %2 = extractvalue { float, float } %sv, 0
; FORCED-NEXT: %3 = insertelement <2 x float> undef, float %1, i32 0
; FORCED-NEXT: %3 = insertelement <2 x float> poison, float %1, i32 0
; FORCED-NEXT: %4 = insertelement <2 x float> %3, float %2, i32 1
; FORCED-NEXT: %5 = extractvalue { float, float } %sv, 1
; FORCED-NEXT: %6 = extractvalue { float, float } %sv, 1
; FORCED-NEXT: %7 = insertelement <2 x float> undef, float %5, i32 0
; FORCED-NEXT: %7 = insertelement <2 x float> poison, float %5, i32 0
; FORCED-NEXT: %8 = insertelement <2 x float> %7, float %6, i32 1
; FORCED-NEXT: %9 = getelementptr float, float* %dst, i32 %0
; FORCED-NEXT: %10 = call <2 x float> @llvm.pow.v2f32(<2 x float> %4, <2 x float> %8)

View File

@ -22,7 +22,7 @@ define void @test_stride1_4i32(i32* readonly %data, i32* noalias nocapture %dst,
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP5]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP5]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0
@ -379,7 +379,7 @@ define void @test_stride_loopinvar_4i32(i32* readonly %data, i32* noalias nocapt
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0

View File

@ -16,7 +16,7 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
; CHECK-NEXT: [[TMP4]] = add i32 [[TMP3]], [[VEC_PHI]]
@ -64,10 +64,10 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[VEC_IND]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[VEC_PHI]]
@ -125,10 +125,10 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], [[WIDE_MASKED_LOAD1]]
; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP5]], <4 x i32> [[VEC_PHI]]
@ -179,10 +179,10 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[TMP4]], [[WIDE_MASKED_LOAD1]]
; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP5]], <4 x i32> [[VEC_PHI]]
@ -233,10 +233,10 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6]] = or <4 x i32> [[VEC_PHI]], [[TMP5]]
@ -287,10 +287,10 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6]] = xor <4 x i32> [[VEC_PHI]], [[TMP5]]
@ -341,10 +341,10 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <4 x float> [[TMP4]], [[WIDE_MASKED_LOAD1]]
; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP5]], <4 x float> [[VEC_PHI]]
@ -395,10 +395,10 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], [[WIDE_MASKED_LOAD1]]
; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP5]], <4 x float> [[VEC_PHI]]

View File

@ -27,12 +27,12 @@ define i32 @mla_i32(i8* noalias nocapture readonly %A, i8* noalias nocapture rea
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP3]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP3]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i8> [[WIDE_MASKED_LOAD]] to <4 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP7]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP7]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[WIDE_MASKED_LOAD1]] to <4 x i32>
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]]
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP9]], <4 x i32> zeroinitializer
@ -115,12 +115,12 @@ define i32 @mla_i8(i8* noalias nocapture readonly %A, i8* noalias nocapture read
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP7]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP7]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i8> [[WIDE_MASKED_LOAD1]] to <16 x i32>
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <16 x i32> [[TMP8]], [[TMP4]]
; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP9]], <16 x i32> zeroinitializer
@ -203,7 +203,7 @@ define i32 @add_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[TMP6]] = add i32 [[TMP5]], [[VEC_PHI]]
@ -274,7 +274,7 @@ define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@ -345,7 +345,7 @@ define i32 @and_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@ -416,7 +416,7 @@ define i32 @or_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@ -487,7 +487,7 @@ define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@ -558,7 +558,7 @@ define float @fadd_f32(float* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@ -629,7 +629,7 @@ define float @fmul_f32(float* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4

View File

@ -176,7 +176,7 @@ define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
; CHECK-NEXT: [[TMP4]] = add i32 [[TMP3]], [[VEC_PHI]]
@ -222,7 +222,7 @@ define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP1]], i32 2, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i16> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP1]], i32 2, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i16> poison)
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
@ -270,7 +270,7 @@ define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_MASKED_LOAD]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
@ -317,7 +317,7 @@ define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[WIDE_MASKED_LOAD]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP2]])
; CHECK-NEXT: [[TMP4]] = add i16 [[TMP3]], [[VEC_PHI]]
@ -363,7 +363,7 @@ define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP1]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP1]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> poison)
; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[WIDE_MASKED_LOAD]] to <8 x i16>
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[TMP2]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP3]])
@ -410,7 +410,7 @@ define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> [[WIDE_MASKED_LOAD]], <16 x i8> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[TMP2]])
; CHECK-NEXT: [[TMP4]] = add i8 [[TMP3]], [[VEC_PHI]]
@ -641,10 +641,10 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y,
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
@ -693,11 +693,11 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y,
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP1]], i32 2, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i16> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP1]], i32 2, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i16> poison)
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP4]], i32 2, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i16> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP4]], i32 2, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i16> poison)
; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD1]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
@ -749,11 +749,11 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_MASKED_LOAD]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[WIDE_MASKED_LOAD1]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i32> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
@ -805,10 +805,10 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP3]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP3]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i16> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[TMP4]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP5]])
@ -857,11 +857,11 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP1]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP1]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> poison)
; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[WIDE_MASKED_LOAD]] to <8 x i16>
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP4]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP4]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> poison)
; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[WIDE_MASKED_LOAD1]] to <8 x i16>
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <8 x i16> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[TMP6]], <8 x i16> zeroinitializer
@ -913,10 +913,10 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i8> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> [[TMP4]], <16 x i8> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[TMP5]])

View File

@ -46,7 +46,7 @@ define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) {
;CHECK: %4 = load i32*, i32** %2, align 8
;CHECK: %5 = load i32, i32* %3, align 4
;CHECK: %6 = load i32, i32* %4, align 4
;CHECK: %7 = insertelement <2 x i32> undef, i32 %5, i32 0
;CHECK: %7 = insertelement <2 x i32> poison, i32 %5, i32 0
;CHECK: %8 = insertelement <2 x i32> %7, i32 %6, i32 1
;CHECK: %9 = getelementptr inbounds i32, i32* %A, i64 %index
;CHECK: %10 = bitcast i32* %9 to <2 x i32>*

View File

@ -18,7 +18,7 @@ define void @func_21() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 undef, i32 0>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE4]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE4]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
@ -28,10 +28,10 @@ define void @func_21() {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @A, i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:

View File

@ -109,10 +109,10 @@ attributes #0 = { "target-cpu"="knl" }
; FORCE: pred.load.if:
; FORCE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP0]]
; FORCE-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 1
; FORCE-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[TMP7]], i32 0
; FORCE-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE]]
; FORCE: pred.load.continue:
; FORCE-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ undef, [[PRED_STORE_CONTINUE2]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; FORCE-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; FORCE-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]]
; FORCE: pred.load.if3:

View File

@ -32,7 +32,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; AVX512-NEXT: [[TMP2:%.*]] = icmp sgt <16 x i32> [[WIDE_LOAD]], zeroinitializer
; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[INDEX:%.*]], i64 [[INDEX6]]
; AVX512-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <16 x i32>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP4]], i32 4, <16 x i1> [[TMP2]], <16 x i32> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP4]], i32 4, <16 x i1> [[TMP2]], <16 x i32> poison)
; AVX512-NEXT: [[TMP5:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD]] to <16 x i64>
; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[IN:%.*]], <16 x i64> [[TMP5]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP6]], i32 4, <16 x i1> [[TMP2]], <16 x float> undef)
@ -47,7 +47,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; AVX512-NEXT: [[TMP12:%.*]] = icmp sgt <16 x i32> [[WIDE_LOAD_1]], zeroinitializer
; AVX512-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDEX_NEXT]]
; AVX512-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <16 x i32>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP14]], i32 4, <16 x i1> [[TMP12]], <16 x i32> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP14]], i32 4, <16 x i1> [[TMP12]], <16 x i32> poison)
; AVX512-NEXT: [[TMP15:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD_1]] to <16 x i64>
; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[IN]], <16 x i64> [[TMP15]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER_1:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP16]], i32 4, <16 x i1> [[TMP12]], <16 x float> undef)
@ -62,7 +62,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; AVX512-NEXT: [[TMP22:%.*]] = icmp sgt <16 x i32> [[WIDE_LOAD_2]], zeroinitializer
; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDEX_NEXT_1]]
; AVX512-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <16 x i32>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD_2:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP24]], i32 4, <16 x i1> [[TMP22]], <16 x i32> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD_2:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP24]], i32 4, <16 x i1> [[TMP22]], <16 x i32> poison)
; AVX512-NEXT: [[TMP25:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD_2]] to <16 x i64>
; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[IN]], <16 x i64> [[TMP25]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER_2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP26]], i32 4, <16 x i1> [[TMP22]], <16 x float> undef)
@ -77,7 +77,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; AVX512-NEXT: [[TMP32:%.*]] = icmp sgt <16 x i32> [[WIDE_LOAD_3]], zeroinitializer
; AVX512-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDEX_NEXT_2]]
; AVX512-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <16 x i32>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD_3:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP34]], i32 4, <16 x i1> [[TMP32]], <16 x i32> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD_3:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* nonnull [[TMP34]], i32 4, <16 x i1> [[TMP32]], <16 x i32> poison)
; AVX512-NEXT: [[TMP35:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD_3]] to <16 x i64>
; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[IN]], <16 x i64> [[TMP35]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER_3:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP36]], i32 4, <16 x i1> [[TMP32]], <16 x float> undef)
@ -102,7 +102,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; FVW2-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[INDEX:%.*]], i64 [[INDEX6]]
; FVW2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
; FVW2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* [[TMP4]], i32 4, <2 x i1> [[TMP2]], <2 x i32> undef)
; FVW2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* [[TMP4]], i32 4, <2 x i1> [[TMP2]], <2 x i32> poison)
; FVW2-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[WIDE_MASKED_LOAD]] to <2 x i64>
; FVW2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[IN:%.*]], <2 x i64> [[TMP5]]
; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> [[TMP6]], i32 4, <2 x i1> [[TMP2]], <2 x float> undef)
@ -117,7 +117,7 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
; FVW2-NEXT: [[TMP12:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD_1]], zeroinitializer
; FVW2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDEX_NEXT]]
; FVW2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <2 x i32>*
; FVW2-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* nonnull [[TMP14]], i32 4, <2 x i1> [[TMP12]], <2 x i32> undef)
; FVW2-NEXT: [[WIDE_MASKED_LOAD_1:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* nonnull [[TMP14]], i32 4, <2 x i1> [[TMP12]], <2 x i32> poison)
; FVW2-NEXT: [[TMP15:%.*]] = sext <2 x i32> [[WIDE_MASKED_LOAD_1]] to <2 x i64>
; FVW2-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[IN]], <2 x i64> [[TMP15]]
; FVW2-NEXT: [[WIDE_MASKED_GATHER_1:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> [[TMP16]], i32 4, <2 x i1> [[TMP12]], <2 x float> undef)

View File

@ -277,7 +277,7 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32*
; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT18]], <16 x i32>* [[TMP5]], align 4, !alias.scope !17, !noalias !20
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP7]], i32 8, <16 x i1> [[TMP4]], <16 x i32> undef), !alias.scope !23
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP7]], i32 8, <16 x i1> [[TMP4]], <16 x i32> poison), !alias.scope !23
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT20]], i32 4, <16 x i1> [[TMP4]]), !alias.scope !24, !noalias !23
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@ -311,7 +311,7 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32*
; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT31]], <8 x i32>* [[TMP13]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX24]]
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP15]], i32 8, <8 x i1> [[TMP12]], <8 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP15]], i32 8, <8 x i1> [[TMP12]], <8 x i32> poison)
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[WIDE_MASKED_LOAD32]], <8 x i32*> [[BROADCAST_SPLAT34]], i32 4, <8 x i1> [[TMP12]])
; CHECK-NEXT: [[INDEX_NEXT25]] = add i64 [[INDEX24]], 8
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC23]]

View File

@ -200,7 +200,7 @@ define i32 @test_explicit_pred_generic(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -208,7 +208,7 @@ define i32 @test_explicit_pred_generic(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -216,7 +216,7 @@ define i32 @test_explicit_pred_generic(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -224,7 +224,7 @@ define i32 @test_explicit_pred_generic(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -371,7 +371,7 @@ define i32 @test_invariant_address(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -379,7 +379,7 @@ define i32 @test_invariant_address(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -387,7 +387,7 @@ define i32 @test_invariant_address(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -395,7 +395,7 @@ define i32 @test_invariant_address(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -403,7 +403,7 @@ define i32 @test_invariant_address(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP65:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP66:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP67:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP68:%.*]] = insertelement <4 x i32> undef, i32 [[TMP64]], i32 0
; CHECK-NEXT: [[TMP68:%.*]] = insertelement <4 x i32> poison, i32 [[TMP64]], i32 0
; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP65]], i32 1
; CHECK-NEXT: [[TMP70:%.*]] = insertelement <4 x i32> [[TMP69]], i32 [[TMP66]], i32 2
; CHECK-NEXT: [[TMP71:%.*]] = insertelement <4 x i32> [[TMP70]], i32 [[TMP67]], i32 3
@ -411,7 +411,7 @@ define i32 @test_invariant_address(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP73:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP74:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP75:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP76:%.*]] = insertelement <4 x i32> undef, i32 [[TMP72]], i32 0
; CHECK-NEXT: [[TMP76:%.*]] = insertelement <4 x i32> poison, i32 [[TMP72]], i32 0
; CHECK-NEXT: [[TMP77:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP73]], i32 1
; CHECK-NEXT: [[TMP78:%.*]] = insertelement <4 x i32> [[TMP77]], i32 [[TMP74]], i32 2
; CHECK-NEXT: [[TMP79:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP75]], i32 3
@ -419,7 +419,7 @@ define i32 @test_invariant_address(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP81:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP82:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP83:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP84:%.*]] = insertelement <4 x i32> undef, i32 [[TMP80]], i32 0
; CHECK-NEXT: [[TMP84:%.*]] = insertelement <4 x i32> poison, i32 [[TMP80]], i32 0
; CHECK-NEXT: [[TMP85:%.*]] = insertelement <4 x i32> [[TMP84]], i32 [[TMP81]], i32 1
; CHECK-NEXT: [[TMP86:%.*]] = insertelement <4 x i32> [[TMP85]], i32 [[TMP82]], i32 2
; CHECK-NEXT: [[TMP87:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP83]], i32 3
@ -427,7 +427,7 @@ define i32 @test_invariant_address(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP89:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP90:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP91:%.*]] = load i32, i32* [[BASE]], align 4
; CHECK-NEXT: [[TMP92:%.*]] = insertelement <4 x i32> undef, i32 [[TMP88]], i32 0
; CHECK-NEXT: [[TMP92:%.*]] = insertelement <4 x i32> poison, i32 [[TMP88]], i32 0
; CHECK-NEXT: [[TMP93:%.*]] = insertelement <4 x i32> [[TMP92]], i32 [[TMP89]], i32 1
; CHECK-NEXT: [[TMP94:%.*]] = insertelement <4 x i32> [[TMP93]], i32 [[TMP90]], i32 2
; CHECK-NEXT: [[TMP95:%.*]] = insertelement <4 x i32> [[TMP94]], i32 [[TMP91]], i32 3
@ -553,7 +553,7 @@ define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -561,7 +561,7 @@ define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -569,7 +569,7 @@ define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -577,7 +577,7 @@ define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -588,10 +588,10 @@ define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i16, i16* [[TMP65]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP67:%.*]] = bitcast i16* [[TMP66]] to i32*
; CHECK-NEXT: [[TMP68:%.*]] = load i32, i32* [[TMP67]], align 4
; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i32> undef, i32 [[TMP68]], i32 0
; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i32> poison, i32 [[TMP68]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP70:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP69]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP70:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP69]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP71:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1
; CHECK-NEXT: br i1 [[TMP71]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]]
; CHECK: pred.load.if4:
@ -632,10 +632,10 @@ define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds i16, i16* [[TMP93]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP95:%.*]] = bitcast i16* [[TMP94]] to i32*
; CHECK-NEXT: [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4
; CHECK-NEXT: [[TMP97:%.*]] = insertelement <4 x i32> undef, i32 [[TMP96]], i32 0
; CHECK-NEXT: [[TMP97:%.*]] = insertelement <4 x i32> poison, i32 [[TMP96]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE11]]
; CHECK: pred.load.continue11:
; CHECK-NEXT: [[TMP98:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE9]] ], [ [[TMP97]], [[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[TMP98:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE9]] ], [ [[TMP97]], [[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[TMP99:%.*]] = extractelement <4 x i1> [[TMP47]], i32 1
; CHECK-NEXT: br i1 [[TMP99]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
; CHECK: pred.load.if12:
@ -676,10 +676,10 @@ define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds i16, i16* [[TMP121]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP123:%.*]] = bitcast i16* [[TMP122]] to i32*
; CHECK-NEXT: [[TMP124:%.*]] = load i32, i32* [[TMP123]], align 4
; CHECK-NEXT: [[TMP125:%.*]] = insertelement <4 x i32> undef, i32 [[TMP124]], i32 0
; CHECK-NEXT: [[TMP125:%.*]] = insertelement <4 x i32> poison, i32 [[TMP124]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE19]]
; CHECK: pred.load.continue19:
; CHECK-NEXT: [[TMP126:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP125]], [[PRED_LOAD_IF18]] ]
; CHECK-NEXT: [[TMP126:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP125]], [[PRED_LOAD_IF18]] ]
; CHECK-NEXT: [[TMP127:%.*]] = extractelement <4 x i1> [[TMP55]], i32 1
; CHECK-NEXT: br i1 [[TMP127]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
; CHECK: pred.load.if20:
@ -720,10 +720,10 @@ define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP150:%.*]] = getelementptr inbounds i16, i16* [[TMP149]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP151:%.*]] = bitcast i16* [[TMP150]] to i32*
; CHECK-NEXT: [[TMP152:%.*]] = load i32, i32* [[TMP151]], align 4
; CHECK-NEXT: [[TMP153:%.*]] = insertelement <4 x i32> undef, i32 [[TMP152]], i32 0
; CHECK-NEXT: [[TMP153:%.*]] = insertelement <4 x i32> poison, i32 [[TMP152]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE27]]
; CHECK: pred.load.continue27:
; CHECK-NEXT: [[TMP154:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE25]] ], [ [[TMP153]], [[PRED_LOAD_IF26]] ]
; CHECK-NEXT: [[TMP154:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE25]] ], [ [[TMP153]], [[PRED_LOAD_IF26]] ]
; CHECK-NEXT: [[TMP155:%.*]] = extractelement <4 x i1> [[TMP63]], i32 1
; CHECK-NEXT: br i1 [[TMP155]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]]
; CHECK: pred.load.if28:
@ -890,7 +890,7 @@ define i32 @test_max_trip_count(i64 %len, i1* %test_base, i64 %n) {
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = load i1, i1* [[TMP20]], align 1
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> undef, i1 [[TMP33]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> poison, i1 [[TMP33]], i32 0
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 1
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 2
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i1> [[TMP39]], i1 [[TMP36]], i32 3
@ -898,7 +898,7 @@ define i32 @test_max_trip_count(i64 %len, i1* %test_base, i64 %n) {
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = load i1, i1* [[TMP24]], align 1
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> undef, i1 [[TMP41]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> poison, i1 [[TMP41]], i32 0
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 1
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 2
; CHECK-NEXT: [[TMP48:%.*]] = insertelement <4 x i1> [[TMP47]], i1 [[TMP44]], i32 3
@ -906,7 +906,7 @@ define i32 @test_max_trip_count(i64 %len, i1* %test_base, i64 %n) {
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = load i1, i1* [[TMP28]], align 1
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> undef, i1 [[TMP49]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> poison, i1 [[TMP49]], i32 0
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 1
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 2
; CHECK-NEXT: [[TMP56:%.*]] = insertelement <4 x i1> [[TMP55]], i1 [[TMP52]], i32 3
@ -914,7 +914,7 @@ define i32 @test_max_trip_count(i64 %len, i1* %test_base, i64 %n) {
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = load i1, i1* [[TMP32]], align 1
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> undef, i1 [[TMP57]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> poison, i1 [[TMP57]], i32 0
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3
@ -924,16 +924,16 @@ define i32 @test_max_trip_count(i64 %len, i1* %test_base, i64 %n) {
; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, i32* [[TMP65]], i32 0
; CHECK-NEXT: [[TMP70:%.*]] = bitcast i32* [[TMP69]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP70]], i32 4, <4 x i1> [[TMP40]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP70]], i32 4, <4 x i1> [[TMP40]], <4 x i32> poison)
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[TMP65]], i32 4
; CHECK-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP72]], i32 4, <4 x i1> [[TMP48]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP72]], i32 4, <4 x i1> [[TMP48]], <4 x i32> poison)
; CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[TMP65]], i32 8
; CHECK-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP74]], i32 4, <4 x i1> [[TMP56]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP74]], i32 4, <4 x i1> [[TMP56]], <4 x i32> poison)
; CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, i32* [[TMP65]], i32 12
; CHECK-NEXT: [[TMP76:%.*]] = bitcast i32* [[TMP75]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP76]], i32 4, <4 x i1> [[TMP64]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP76]], i32 4, <4 x i1> [[TMP64]], <4 x i32> poison)
; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP48]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP79:%.*]] = xor <4 x i1> [[TMP56]], <i1 true, i1 true, i1 true, i1 true>
@ -1063,7 +1063,7 @@ define i32 @test_non_zero_start(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -1071,7 +1071,7 @@ define i32 @test_non_zero_start(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -1079,7 +1079,7 @@ define i32 @test_non_zero_start(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -1087,7 +1087,7 @@ define i32 @test_non_zero_start(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -1097,16 +1097,16 @@ define i32 @test_non_zero_start(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0
; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4
; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8
; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12
; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
@ -1286,7 +1286,7 @@ define i32 @test_non_unit_stride(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -1294,7 +1294,7 @@ define i32 @test_non_unit_stride(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -1302,7 +1302,7 @@ define i32 @test_non_unit_stride(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -1310,7 +1310,7 @@ define i32 @test_non_unit_stride(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -1319,10 +1319,10 @@ define i32 @test_non_unit_stride(i64 %len, i1* %test_base) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP66:%.*]] = load i32, i32* [[TMP65]], align 4
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i32> undef, i32 [[TMP66]], i32 0
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP68:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP67]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP68:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP67]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP69:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1
; CHECK-NEXT: br i1 [[TMP69]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]]
; CHECK: pred.load.if4:
@ -1355,10 +1355,10 @@ define i32 @test_non_unit_stride(i64 %len, i1* %test_base) {
; CHECK: pred.load.if10:
; CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP86:%.*]] = load i32, i32* [[TMP85]], align 4
; CHECK-NEXT: [[TMP87:%.*]] = insertelement <4 x i32> undef, i32 [[TMP86]], i32 0
; CHECK-NEXT: [[TMP87:%.*]] = insertelement <4 x i32> poison, i32 [[TMP86]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE11]]
; CHECK: pred.load.continue11:
; CHECK-NEXT: [[TMP88:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE9]] ], [ [[TMP87]], [[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[TMP88:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE9]] ], [ [[TMP87]], [[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[TMP89:%.*]] = extractelement <4 x i1> [[TMP47]], i32 1
; CHECK-NEXT: br i1 [[TMP89]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
; CHECK: pred.load.if12:
@ -1391,10 +1391,10 @@ define i32 @test_non_unit_stride(i64 %len, i1* %test_base) {
; CHECK: pred.load.if18:
; CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP106:%.*]] = load i32, i32* [[TMP105]], align 4
; CHECK-NEXT: [[TMP107:%.*]] = insertelement <4 x i32> undef, i32 [[TMP106]], i32 0
; CHECK-NEXT: [[TMP107:%.*]] = insertelement <4 x i32> poison, i32 [[TMP106]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE19]]
; CHECK: pred.load.continue19:
; CHECK-NEXT: [[TMP108:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP107]], [[PRED_LOAD_IF18]] ]
; CHECK-NEXT: [[TMP108:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP107]], [[PRED_LOAD_IF18]] ]
; CHECK-NEXT: [[TMP109:%.*]] = extractelement <4 x i1> [[TMP55]], i32 1
; CHECK-NEXT: br i1 [[TMP109]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
; CHECK: pred.load.if20:
@ -1427,10 +1427,10 @@ define i32 @test_non_unit_stride(i64 %len, i1* %test_base) {
; CHECK: pred.load.if26:
; CHECK-NEXT: [[TMP125:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP126:%.*]] = load i32, i32* [[TMP125]], align 4
; CHECK-NEXT: [[TMP127:%.*]] = insertelement <4 x i32> undef, i32 [[TMP126]], i32 0
; CHECK-NEXT: [[TMP127:%.*]] = insertelement <4 x i32> poison, i32 [[TMP126]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE27]]
; CHECK: pred.load.continue27:
; CHECK-NEXT: [[TMP128:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE25]] ], [ [[TMP127]], [[PRED_LOAD_IF26]] ]
; CHECK-NEXT: [[TMP128:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE25]] ], [ [[TMP127]], [[PRED_LOAD_IF26]] ]
; CHECK-NEXT: [[TMP129:%.*]] = extractelement <4 x i1> [[TMP63]], i32 1
; CHECK-NEXT: br i1 [[TMP129]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]]
; CHECK: pred.load.if28:
@ -1581,7 +1581,7 @@ define i32 @neg_off_by_many(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -1589,7 +1589,7 @@ define i32 @neg_off_by_many(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -1597,7 +1597,7 @@ define i32 @neg_off_by_many(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -1605,7 +1605,7 @@ define i32 @neg_off_by_many(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -1615,16 +1615,16 @@ define i32 @neg_off_by_many(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0
; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4
; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8
; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12
; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
@ -1748,7 +1748,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -1756,7 +1756,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -1764,7 +1764,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -1772,7 +1772,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -1782,16 +1782,16 @@ define i32 @neg_off_by_one_iteration(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0
; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4
; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8
; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12
; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
@ -1915,7 +1915,7 @@ define i32 @neg_off_by_one_byte(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -1923,7 +1923,7 @@ define i32 @neg_off_by_one_byte(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -1931,7 +1931,7 @@ define i32 @neg_off_by_one_byte(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -1939,7 +1939,7 @@ define i32 @neg_off_by_one_byte(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -1949,16 +1949,16 @@ define i32 @neg_off_by_one_byte(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0
; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4
; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8
; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12
; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
@ -2091,7 +2091,7 @@ define i32 @test_constant_max(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = load i1, i1* [[TMP20]], align 1
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> undef, i1 [[TMP33]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> poison, i1 [[TMP33]], i32 0
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 1
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 2
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i1> [[TMP39]], i1 [[TMP36]], i32 3
@ -2099,7 +2099,7 @@ define i32 @test_constant_max(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = load i1, i1* [[TMP24]], align 1
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> undef, i1 [[TMP41]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> poison, i1 [[TMP41]], i32 0
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 1
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 2
; CHECK-NEXT: [[TMP48:%.*]] = insertelement <4 x i1> [[TMP47]], i1 [[TMP44]], i32 3
@ -2107,7 +2107,7 @@ define i32 @test_constant_max(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = load i1, i1* [[TMP28]], align 1
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> undef, i1 [[TMP49]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> poison, i1 [[TMP49]], i32 0
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 1
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 2
; CHECK-NEXT: [[TMP56:%.*]] = insertelement <4 x i1> [[TMP55]], i1 [[TMP52]], i32 3
@ -2115,7 +2115,7 @@ define i32 @test_constant_max(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = load i1, i1* [[TMP32]], align 1
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> undef, i1 [[TMP57]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> poison, i1 [[TMP57]], i32 0
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3
@ -2265,7 +2265,7 @@ define i32 @test_allocsize(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -2273,7 +2273,7 @@ define i32 @test_allocsize(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -2281,7 +2281,7 @@ define i32 @test_allocsize(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -2289,7 +2289,7 @@ define i32 @test_allocsize(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -2433,7 +2433,7 @@ define i32 @test_allocsize_array(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -2441,7 +2441,7 @@ define i32 @test_allocsize_array(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -2449,7 +2449,7 @@ define i32 @test_allocsize_array(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -2457,7 +2457,7 @@ define i32 @test_allocsize_array(i64 %len, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -2611,7 +2611,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, i1* %test_base) {
; CHECK-NEXT: [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> undef, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
@ -2619,7 +2619,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, i1* %test_base) {
; CHECK-NEXT: [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
; CHECK-NEXT: [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> undef, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
@ -2627,7 +2627,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, i1* %test_base) {
; CHECK-NEXT: [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
; CHECK-NEXT: [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> undef, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
@ -2635,7 +2635,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, i1* %test_base) {
; CHECK-NEXT: [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
; CHECK-NEXT: [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
; CHECK-NEXT: [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> undef, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
@ -2645,16 +2645,16 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, i1* %test_base) {
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0
; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4
; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8
; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12
; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>

View File

@ -53,7 +53,7 @@ define void @foo1(i32* nocapture %A, i32* nocapture readonly %B, i32* nocapture
; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
; AVX1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
; AVX1-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>*
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x i32> undef), !alias.scope !3
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x i32> poison), !alias.scope !3
; AVX1-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
; AVX1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
@ -144,16 +144,16 @@ define void @foo1(i32* nocapture %A, i32* nocapture readonly %B, i32* nocapture
; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP3]]
; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
; AVX2-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <8 x i32>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x i32> poison), !alias.scope !3
; AVX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 8
; AVX2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <8 x i32>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x i32> poison), !alias.scope !3
; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 16
; AVX2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <8 x i32>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x i32> poison), !alias.scope !3
; AVX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 24
; AVX2-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <8 x i32>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x i32> undef), !alias.scope !3
; AVX2-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x i32> poison), !alias.scope !3
; AVX2-NEXT: [[TMP32:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
; AVX2-NEXT: [[TMP33:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD15]], [[WIDE_LOAD12]]
; AVX2-NEXT: [[TMP34:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD16]], [[WIDE_LOAD13]]
@ -261,16 +261,16 @@ define void @foo1(i32* nocapture %A, i32* nocapture readonly %B, i32* nocapture
; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP3]]
; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
; AVX512-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <16 x i32>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP25]], i32 4, <16 x i1> [[TMP16]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP25]], i32 4, <16 x i1> [[TMP16]], <16 x i32> poison), !alias.scope !3
; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 16
; AVX512-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <16 x i32>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP27]], i32 4, <16 x i1> [[TMP17]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP27]], i32 4, <16 x i1> [[TMP17]], <16 x i32> poison), !alias.scope !3
; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 32
; AVX512-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <16 x i32>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP29]], i32 4, <16 x i1> [[TMP18]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP29]], i32 4, <16 x i1> [[TMP18]], <16 x i32> poison), !alias.scope !3
; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 48
; AVX512-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <16 x i32>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP31]], i32 4, <16 x i1> [[TMP19]], <16 x i32> undef), !alias.scope !3
; AVX512-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP31]], i32 4, <16 x i1> [[TMP19]], <16 x i32> poison), !alias.scope !3
; AVX512-NEXT: [[TMP32:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
; AVX512-NEXT: [[TMP33:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD15]], [[WIDE_LOAD12]]
; AVX512-NEXT: [[TMP34:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD16]], [[WIDE_LOAD13]]
@ -313,7 +313,7 @@ define void @foo1(i32* nocapture %A, i32* nocapture readonly %B, i32* nocapture
; AVX512-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP49]]
; AVX512-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, i32* [[TMP54]], i32 0
; AVX512-NEXT: [[TMP56:%.*]] = bitcast i32* [[TMP55]] to <8 x i32>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD22:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP56]], i32 4, <8 x i1> [[TMP53]], <8 x i32> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD22:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP56]], i32 4, <8 x i1> [[TMP53]], <8 x i32> poison)
; AVX512-NEXT: [[TMP57:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD22]], [[WIDE_LOAD21]]
; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP49]]
; AVX512-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, i32* [[TMP58]], i32 0
@ -415,7 +415,7 @@ define void @foo1_addrspace1(i32 addrspace(1)* nocapture %A, i32 addrspace(1)* n
; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP0]]
; AVX1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP5]], i32 0
; AVX1-NEXT: [[TMP7:%.*]] = bitcast i32 addrspace(1)* [[TMP6]] to <8 x i32> addrspace(1)*
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x i32> undef), !alias.scope !14
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x i32> poison), !alias.scope !14
; AVX1-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
; AVX1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP0]]
; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP9]], i32 0
@ -506,16 +506,16 @@ define void @foo1_addrspace1(i32 addrspace(1)* nocapture %A, i32 addrspace(1)* n
; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP3]]
; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 0
; AVX2-NEXT: [[TMP25:%.*]] = bitcast i32 addrspace(1)* [[TMP24]] to <8 x i32> addrspace(1)*
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x i32> undef), !alias.scope !14
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x i32> poison), !alias.scope !14
; AVX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 8
; AVX2-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(1)* [[TMP26]] to <8 x i32> addrspace(1)*
; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x i32> undef), !alias.scope !14
; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x i32> poison), !alias.scope !14
; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 16
; AVX2-NEXT: [[TMP29:%.*]] = bitcast i32 addrspace(1)* [[TMP28]] to <8 x i32> addrspace(1)*
; AVX2-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x i32> undef), !alias.scope !14
; AVX2-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x i32> poison), !alias.scope !14
; AVX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 24
; AVX2-NEXT: [[TMP31:%.*]] = bitcast i32 addrspace(1)* [[TMP30]] to <8 x i32> addrspace(1)*
; AVX2-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x i32> undef), !alias.scope !14
; AVX2-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x i32> poison), !alias.scope !14
; AVX2-NEXT: [[TMP32:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
; AVX2-NEXT: [[TMP33:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD15]], [[WIDE_LOAD12]]
; AVX2-NEXT: [[TMP34:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD16]], [[WIDE_LOAD13]]
@ -623,16 +623,16 @@ define void @foo1_addrspace1(i32 addrspace(1)* nocapture %A, i32 addrspace(1)* n
; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP3]]
; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 0
; AVX512-NEXT: [[TMP25:%.*]] = bitcast i32 addrspace(1)* [[TMP24]] to <16 x i32> addrspace(1)*
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP25]], i32 4, <16 x i1> [[TMP16]], <16 x i32> undef), !alias.scope !16
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP25]], i32 4, <16 x i1> [[TMP16]], <16 x i32> poison), !alias.scope !16
; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 16
; AVX512-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(1)* [[TMP26]] to <16 x i32> addrspace(1)*
; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP27]], i32 4, <16 x i1> [[TMP17]], <16 x i32> undef), !alias.scope !16
; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP27]], i32 4, <16 x i1> [[TMP17]], <16 x i32> poison), !alias.scope !16
; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 32
; AVX512-NEXT: [[TMP29:%.*]] = bitcast i32 addrspace(1)* [[TMP28]] to <16 x i32> addrspace(1)*
; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP29]], i32 4, <16 x i1> [[TMP18]], <16 x i32> undef), !alias.scope !16
; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP29]], i32 4, <16 x i1> [[TMP18]], <16 x i32> poison), !alias.scope !16
; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP20]], i32 48
; AVX512-NEXT: [[TMP31:%.*]] = bitcast i32 addrspace(1)* [[TMP30]] to <16 x i32> addrspace(1)*
; AVX512-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP31]], i32 4, <16 x i1> [[TMP19]], <16 x i32> undef), !alias.scope !16
; AVX512-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP31]], i32 4, <16 x i1> [[TMP19]], <16 x i32> poison), !alias.scope !16
; AVX512-NEXT: [[TMP32:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]]
; AVX512-NEXT: [[TMP33:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD15]], [[WIDE_LOAD12]]
; AVX512-NEXT: [[TMP34:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD16]], [[WIDE_LOAD13]]
@ -675,7 +675,7 @@ define void @foo1_addrspace1(i32 addrspace(1)* nocapture %A, i32 addrspace(1)* n
; AVX512-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[TMP49]]
; AVX512-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP54]], i32 0
; AVX512-NEXT: [[TMP56:%.*]] = bitcast i32 addrspace(1)* [[TMP55]] to <8 x i32> addrspace(1)*
; AVX512-NEXT: [[WIDE_MASKED_LOAD22:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP56]], i32 4, <8 x i1> [[TMP53]], <8 x i32> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD22:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP56]], i32 4, <8 x i1> [[TMP53]], <8 x i32> poison)
; AVX512-NEXT: [[TMP57:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD22]], [[WIDE_LOAD21]]
; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[TMP49]]
; AVX512-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP58]], i32 0
@ -786,7 +786,7 @@ define void @foo2(float* nocapture %A, float* nocapture readonly %B, i32* nocapt
; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]]
; AVX1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0
; AVX1-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <8 x float>*
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x float> undef), !alias.scope !24
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x float> poison), !alias.scope !24
; AVX1-NEXT: [[TMP8:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float>
; AVX1-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP8]]
; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]]
@ -879,16 +879,16 @@ define void @foo2(float* nocapture %A, float* nocapture readonly %B, i32* nocapt
; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]]
; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 0
; AVX2-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <8 x float>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x float> undef), !alias.scope !24
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x float> poison), !alias.scope !24
; AVX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 8
; AVX2-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <8 x float>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x float> undef), !alias.scope !24
; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x float> poison), !alias.scope !24
; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 16
; AVX2-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <8 x float>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x float> undef), !alias.scope !24
; AVX2-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x float> poison), !alias.scope !24
; AVX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 24
; AVX2-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <8 x float>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x float> undef), !alias.scope !24
; AVX2-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x float> poison), !alias.scope !24
; AVX2-NEXT: [[TMP32:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float>
; AVX2-NEXT: [[TMP33:%.*]] = sitofp <8 x i32> [[WIDE_LOAD12]] to <8 x float>
; AVX2-NEXT: [[TMP34:%.*]] = sitofp <8 x i32> [[WIDE_LOAD13]] to <8 x float>
@ -1001,16 +1001,16 @@ define void @foo2(float* nocapture %A, float* nocapture readonly %B, i32* nocapt
; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]]
; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 0
; AVX512-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <16 x float>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP25]], i32 4, <16 x i1> [[TMP16]], <16 x float> undef), !alias.scope !27
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP25]], i32 4, <16 x i1> [[TMP16]], <16 x float> poison), !alias.scope !27
; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 16
; AVX512-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <16 x float>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP27]], i32 4, <16 x i1> [[TMP17]], <16 x float> undef), !alias.scope !27
; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP27]], i32 4, <16 x i1> [[TMP17]], <16 x float> poison), !alias.scope !27
; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 32
; AVX512-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <16 x float>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP29]], i32 4, <16 x i1> [[TMP18]], <16 x float> undef), !alias.scope !27
; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP29]], i32 4, <16 x i1> [[TMP18]], <16 x float> poison), !alias.scope !27
; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 48
; AVX512-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <16 x float>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP31]], i32 4, <16 x i1> [[TMP19]], <16 x float> undef), !alias.scope !27
; AVX512-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP31]], i32 4, <16 x i1> [[TMP19]], <16 x float> poison), !alias.scope !27
; AVX512-NEXT: [[TMP32:%.*]] = sitofp <16 x i32> [[WIDE_LOAD]] to <16 x float>
; AVX512-NEXT: [[TMP33:%.*]] = sitofp <16 x i32> [[WIDE_LOAD12]] to <16 x float>
; AVX512-NEXT: [[TMP34:%.*]] = sitofp <16 x i32> [[WIDE_LOAD13]] to <16 x float>
@ -1057,7 +1057,7 @@ define void @foo2(float* nocapture %A, float* nocapture readonly %B, i32* nocapt
; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP53]]
; AVX512-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[TMP58]], i32 0
; AVX512-NEXT: [[TMP60:%.*]] = bitcast float* [[TMP59]] to <8 x float>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD22:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP60]], i32 4, <8 x i1> [[TMP57]], <8 x float> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD22:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP60]], i32 4, <8 x i1> [[TMP57]], <8 x float> poison)
; AVX512-NEXT: [[TMP61:%.*]] = sitofp <8 x i32> [[WIDE_LOAD21]] to <8 x float>
; AVX512-NEXT: [[TMP62:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD22]], [[TMP61]]
; AVX512-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP53]]
@ -1192,16 +1192,16 @@ define void @foo3(double* nocapture %A, double* nocapture readonly %B, i32* noca
; AVX-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP3]]
; AVX-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 0
; AVX-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP24]] to <4 x double>*
; AVX-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP25]], i32 8, <4 x i1> [[TMP16]], <4 x double> undef), !alias.scope !34
; AVX-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP25]], i32 8, <4 x i1> [[TMP16]], <4 x double> poison), !alias.scope !34
; AVX-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 4
; AVX-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>*
; AVX-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP27]], i32 8, <4 x i1> [[TMP17]], <4 x double> undef), !alias.scope !34
; AVX-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP27]], i32 8, <4 x i1> [[TMP17]], <4 x double> poison), !alias.scope !34
; AVX-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 8
; AVX-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <4 x double>*
; AVX-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP29]], i32 8, <4 x i1> [[TMP18]], <4 x double> undef), !alias.scope !34
; AVX-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP29]], i32 8, <4 x i1> [[TMP18]], <4 x double> poison), !alias.scope !34
; AVX-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 12
; AVX-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <4 x double>*
; AVX-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP31]], i32 8, <4 x i1> [[TMP19]], <4 x double> undef), !alias.scope !34
; AVX-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP31]], i32 8, <4 x i1> [[TMP19]], <4 x double> poison), !alias.scope !34
; AVX-NEXT: [[TMP32:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double>
; AVX-NEXT: [[TMP33:%.*]] = sitofp <4 x i32> [[WIDE_LOAD12]] to <4 x double>
; AVX-NEXT: [[TMP34:%.*]] = sitofp <4 x i32> [[WIDE_LOAD13]] to <4 x double>
@ -1312,16 +1312,16 @@ define void @foo3(double* nocapture %A, double* nocapture readonly %B, i32* noca
; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP3]]
; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 0
; AVX512-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP24]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP25]], i32 8, <8 x i1> [[TMP16]], <8 x double> undef), !alias.scope !38
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP25]], i32 8, <8 x i1> [[TMP16]], <8 x double> poison), !alias.scope !38
; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 8
; AVX512-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP27]], i32 8, <8 x i1> [[TMP17]], <8 x double> undef), !alias.scope !38
; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP27]], i32 8, <8 x i1> [[TMP17]], <8 x double> poison), !alias.scope !38
; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 16
; AVX512-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP29]], i32 8, <8 x i1> [[TMP18]], <8 x double> undef), !alias.scope !38
; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP29]], i32 8, <8 x i1> [[TMP18]], <8 x double> poison), !alias.scope !38
; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 24
; AVX512-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP31]], i32 8, <8 x i1> [[TMP19]], <8 x double> undef), !alias.scope !38
; AVX512-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP31]], i32 8, <8 x i1> [[TMP19]], <8 x double> poison), !alias.scope !38
; AVX512-NEXT: [[TMP32:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x double>
; AVX512-NEXT: [[TMP33:%.*]] = sitofp <8 x i32> [[WIDE_LOAD12]] to <8 x double>
; AVX512-NEXT: [[TMP34:%.*]] = sitofp <8 x i32> [[WIDE_LOAD13]] to <8 x double>
@ -1711,25 +1711,25 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s
; AVX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -3
; AVX2-NEXT: [[REVERSE18:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE18]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE18]], <4 x double> poison), !alias.scope !44
; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -4
; AVX2-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -3
; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope !44
; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8
; AVX2-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -3
; AVX2-NEXT: [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> poison), !alias.scope !44
; AVX2-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -12
; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -3
; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> poison), !alias.scope !44
; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP40:%.*]] = fadd <4 x double> [[REVERSE19]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX2-NEXT: [[TMP41:%.*]] = fadd <4 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@ -1855,25 +1855,25 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s
; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -7
; AVX512-NEXT: [[REVERSE18:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP30]], i32 8, <8 x i1> [[REVERSE18]], <8 x double> undef), !alias.scope !58
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP30]], i32 8, <8 x i1> [[REVERSE18]], <8 x double> poison), !alias.scope !58
; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8
; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -7
; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP21]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP33]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> undef), !alias.scope !58
; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP33]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope !58
; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -16
; AVX512-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -7
; AVX512-NEXT: [[REVERSE23:%.*]] = shufflevector <8 x i1> [[TMP22]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP36]], i32 8, <8 x i1> [[REVERSE23]], <8 x double> undef), !alias.scope !58
; AVX512-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP36]], i32 8, <8 x i1> [[REVERSE23]], <8 x double> poison), !alias.scope !58
; AVX512-NEXT: [[REVERSE25:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD24]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -24
; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -7
; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x i1> [[TMP23]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP39]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> undef), !alias.scope !58
; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP39]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> poison), !alias.scope !58
; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD27]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP40:%.*]] = fadd <8 x double> [[REVERSE19]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX512-NEXT: [[TMP41:%.*]] = fadd <8 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@ -2020,16 +2020,16 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea
; AVX1-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
; AVX1-NEXT: [[TMP32:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 0
; AVX1-NEXT: [[TMP33:%.*]] = bitcast double** [[TMP32]] to <4 x double*>*
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> undef)
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> poison)
; AVX1-NEXT: [[TMP34:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 4
; AVX1-NEXT: [[TMP35:%.*]] = bitcast double** [[TMP34]] to <4 x double*>*
; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> undef)
; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> poison)
; AVX1-NEXT: [[TMP36:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 8
; AVX1-NEXT: [[TMP37:%.*]] = bitcast double** [[TMP36]] to <4 x double*>*
; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> undef)
; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> poison)
; AVX1-NEXT: [[TMP38:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 12
; AVX1-NEXT: [[TMP39:%.*]] = bitcast double** [[TMP38]] to <4 x double*>*
; AVX1-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> undef)
; AVX1-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> poison)
; AVX1-NEXT: [[TMP40:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
; AVX1-NEXT: [[TMP41:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD4]], zeroinitializer
; AVX1-NEXT: [[TMP42:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD5]], zeroinitializer
@ -2144,16 +2144,16 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea
; AVX2-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
; AVX2-NEXT: [[TMP32:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 0
; AVX2-NEXT: [[TMP33:%.*]] = bitcast double** [[TMP32]] to <4 x double*>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> undef)
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> poison)
; AVX2-NEXT: [[TMP34:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 4
; AVX2-NEXT: [[TMP35:%.*]] = bitcast double** [[TMP34]] to <4 x double*>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> undef)
; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> poison)
; AVX2-NEXT: [[TMP36:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 8
; AVX2-NEXT: [[TMP37:%.*]] = bitcast double** [[TMP36]] to <4 x double*>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> undef)
; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> poison)
; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 12
; AVX2-NEXT: [[TMP39:%.*]] = bitcast double** [[TMP38]] to <4 x double*>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> undef)
; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> poison)
; AVX2-NEXT: [[TMP40:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
; AVX2-NEXT: [[TMP41:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD4]], zeroinitializer
; AVX2-NEXT: [[TMP42:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD5]], zeroinitializer
@ -2268,16 +2268,16 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea
; AVX512-NEXT: [[TMP31:%.*]] = xor <8 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 0
; AVX512-NEXT: [[TMP33:%.*]] = bitcast double** [[TMP32]] to <8 x double*>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP33]], i32 8, <8 x i1> [[TMP28]], <8 x double*> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP33]], i32 8, <8 x i1> [[TMP28]], <8 x double*> poison)
; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 8
; AVX512-NEXT: [[TMP35:%.*]] = bitcast double** [[TMP34]] to <8 x double*>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP35]], i32 8, <8 x i1> [[TMP29]], <8 x double*> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP35]], i32 8, <8 x i1> [[TMP29]], <8 x double*> poison)
; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 16
; AVX512-NEXT: [[TMP37:%.*]] = bitcast double** [[TMP36]] to <8 x double*>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP37]], i32 8, <8 x i1> [[TMP30]], <8 x double*> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP37]], i32 8, <8 x i1> [[TMP30]], <8 x double*> poison)
; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 24
; AVX512-NEXT: [[TMP39:%.*]] = bitcast double** [[TMP38]] to <8 x double*>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP39]], i32 8, <8 x i1> [[TMP31]], <8 x double*> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP39]], i32 8, <8 x i1> [[TMP31]], <8 x double*> poison)
; AVX512-NEXT: [[TMP40:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
; AVX512-NEXT: [[TMP41:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD4]], zeroinitializer
; AVX512-NEXT: [[TMP42:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD5]], zeroinitializer
@ -2437,16 +2437,16 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea
; AVX1-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
; AVX1-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 0
; AVX1-NEXT: [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <4 x i32 ()*>*
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> undef)
; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> poison)
; AVX1-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 4
; AVX1-NEXT: [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <4 x i32 ()*>*
; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> undef)
; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> poison)
; AVX1-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 8
; AVX1-NEXT: [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <4 x i32 ()*>*
; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> undef)
; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> poison)
; AVX1-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 12
; AVX1-NEXT: [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <4 x i32 ()*>*
; AVX1-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> undef)
; AVX1-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> poison)
; AVX1-NEXT: [[TMP40:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
; AVX1-NEXT: [[TMP41:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD4]], zeroinitializer
; AVX1-NEXT: [[TMP42:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD5]], zeroinitializer
@ -2561,16 +2561,16 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea
; AVX2-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
; AVX2-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 0
; AVX2-NEXT: [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <4 x i32 ()*>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> undef)
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> poison)
; AVX2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 4
; AVX2-NEXT: [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <4 x i32 ()*>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> undef)
; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> poison)
; AVX2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 8
; AVX2-NEXT: [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <4 x i32 ()*>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> undef)
; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> poison)
; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 12
; AVX2-NEXT: [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <4 x i32 ()*>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> undef)
; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> poison)
; AVX2-NEXT: [[TMP40:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
; AVX2-NEXT: [[TMP41:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD4]], zeroinitializer
; AVX2-NEXT: [[TMP42:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD5]], zeroinitializer
@ -2685,16 +2685,16 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea
; AVX512-NEXT: [[TMP31:%.*]] = xor <8 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 0
; AVX512-NEXT: [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <8 x i32 ()*>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP33]], i32 8, <8 x i1> [[TMP28]], <8 x i32 ()*> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP33]], i32 8, <8 x i1> [[TMP28]], <8 x i32 ()*> poison)
; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 8
; AVX512-NEXT: [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <8 x i32 ()*>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP35]], i32 8, <8 x i1> [[TMP29]], <8 x i32 ()*> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP35]], i32 8, <8 x i1> [[TMP29]], <8 x i32 ()*> poison)
; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 16
; AVX512-NEXT: [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <8 x i32 ()*>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP37]], i32 8, <8 x i1> [[TMP30]], <8 x i32 ()*> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP37]], i32 8, <8 x i1> [[TMP30]], <8 x i32 ()*> poison)
; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 24
; AVX512-NEXT: [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <8 x i32 ()*>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP39]], i32 8, <8 x i1> [[TMP31]], <8 x i32 ()*> undef)
; AVX512-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP39]], i32 8, <8 x i1> [[TMP31]], <8 x i32 ()*> poison)
; AVX512-NEXT: [[TMP40:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
; AVX512-NEXT: [[TMP41:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD4]], zeroinitializer
; AVX512-NEXT: [[TMP42:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD5]], zeroinitializer

View File

@ -1769,7 +1769,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly
; O3DEFAULT-NEXT: entry:
; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; O3DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; O3DEFAULT-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O3DEFAULT-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O3DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
; O3DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP1]], [[TMP3]]
; O3DEFAULT-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*

View File

@ -25,7 +25,7 @@ define i32 @foo_optsize() #0 {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <64 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* [[TMP4]], i32 1, <64 x i1> [[TMP1]], <64 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* [[TMP4]], i32 1, <64 x i1> [[TMP1]], <64 x i8> poison)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <64 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to <64 x i8>*
@ -66,7 +66,7 @@ define i32 @foo_optsize() #0 {
; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]]
; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0
; AUTOVF-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <32 x i8>*
; AUTOVF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* [[TMP4]], i32 1, <32 x i1> [[TMP1]], <32 x i8> undef)
; AUTOVF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* [[TMP4]], i32 1, <32 x i1> [[TMP1]], <32 x i8> poison)
; AUTOVF-NEXT: [[TMP5:%.*]] = icmp eq <32 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer
; AUTOVF-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; AUTOVF-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to <32 x i8>*
@ -129,7 +129,7 @@ define i32 @foo_minsize() #1 {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <64 x i8>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* [[TMP4]], i32 1, <64 x i1> [[TMP1]], <64 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* [[TMP4]], i32 1, <64 x i1> [[TMP1]], <64 x i8> poison)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <64 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to <64 x i8>*
@ -170,7 +170,7 @@ define i32 @foo_minsize() #1 {
; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]]
; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0
; AUTOVF-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <32 x i8>*
; AUTOVF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* [[TMP4]], i32 1, <32 x i1> [[TMP1]], <32 x i8> undef)
; AUTOVF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* [[TMP4]], i32 1, <32 x i1> [[TMP1]], <32 x i8> poison)
; AUTOVF-NEXT: [[TMP5:%.*]] = icmp eq <32 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer
; AUTOVF-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; AUTOVF-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to <32 x i8>*

View File

@ -160,10 +160,10 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP26]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY9]] ], [ [[TMP27]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY9]] ], [ [[TMP27]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
; CHECK: pred.load.if30:
@ -196,10 +196,10 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK: pred.load.if36:
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i32> undef, i32 [[TMP46]], i32 0
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i32> poison, i32 [[TMP46]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE37]]
; CHECK: pred.load.continue37:
; CHECK-NEXT: [[TMP48:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE35]] ], [ [[TMP47]], [[PRED_LOAD_IF36]] ]
; CHECK-NEXT: [[TMP48:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE35]] ], [ [[TMP47]], [[PRED_LOAD_IF36]] ]
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1
; CHECK-NEXT: br i1 [[TMP49]], label [[PRED_LOAD_IF38:%.*]], label [[PRED_LOAD_CONTINUE39:%.*]]
; CHECK: pred.load.if38:
@ -345,7 +345,7 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
; CHECK: pred.load.if16:
@ -354,7 +354,7 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE17]]
; CHECK: pred.load.continue17:
; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ undef, [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF16]] ]
; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF16]] ]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
; CHECK: pred.load.if18:
@ -363,7 +363,7 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE19]]
; CHECK: pred.load.continue19:
; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ undef, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP14]], [[PRED_LOAD_IF18]] ]
; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP14]], [[PRED_LOAD_IF18]] ]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
; CHECK: pred.load.if20:
@ -372,7 +372,7 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE21]]
; CHECK: pred.load.continue21:
; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ undef, [[PRED_LOAD_CONTINUE19]] ], [ [[TMP18]], [[PRED_LOAD_IF20]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE19]] ], [ [[TMP18]], [[PRED_LOAD_IF20]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
@ -545,7 +545,7 @@ define void @example23c(i16* noalias nocapture %src, i32* noalias nocapture %dst
; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[NEXT_GEP]], align 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP4:%.*]] = phi i16 [ undef, [[VECTOR_BODY]] ], [ [[TMP3]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i16 [ poison, [[VECTOR_BODY]] ], [ [[TMP3]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
; CHECK: pred.load.if11:
@ -554,7 +554,7 @@ define void @example23c(i16* noalias nocapture %src, i32* noalias nocapture %dst
; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[NEXT_GEP4]], align 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]]
; CHECK: pred.load.continue12:
; CHECK-NEXT: [[TMP8:%.*]] = phi i16 [ undef, [[PRED_LOAD_CONTINUE]] ], [ [[TMP7]], [[PRED_LOAD_IF11]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP7]], [[PRED_LOAD_IF11]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
; CHECK: pred.load.if13:
@ -563,7 +563,7 @@ define void @example23c(i16* noalias nocapture %src, i32* noalias nocapture %dst
; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[NEXT_GEP5]], align 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]]
; CHECK: pred.load.continue14:
; CHECK-NEXT: [[TMP12:%.*]] = phi i16 [ undef, [[PRED_LOAD_CONTINUE12]] ], [ [[TMP11]], [[PRED_LOAD_IF13]] ]
; CHECK-NEXT: [[TMP12:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE12]] ], [ [[TMP11]], [[PRED_LOAD_IF13]] ]
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
; CHECK: pred.load.if15:
@ -572,7 +572,7 @@ define void @example23c(i16* noalias nocapture %src, i32* noalias nocapture %dst
; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[NEXT_GEP6]], align 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE16]]
; CHECK: pred.load.continue16:
; CHECK-NEXT: [[TMP16:%.*]] = phi i16 [ undef, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP15]], [[PRED_LOAD_IF15]] ]
; CHECK-NEXT: [[TMP16:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP15]], [[PRED_LOAD_IF15]] ]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:

View File

@ -30,7 +30,7 @@ define i32 @matrix_row_col([100 x i32]* nocapture readonly %data, i32 %i, i32 %j
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !tbaa !1
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, [[TBAA1:!tbaa !.*]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]
@ -39,15 +39,15 @@ define i32 @matrix_row_col([100 x i32]* nocapture readonly %data, i32 %i, i32 %j
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP5]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP6]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP7]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP12]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP13]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP14]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP15]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP16]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP17]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP18]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP11]], align 4, [[TBAA1]]
; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP12]], align 4, [[TBAA1]]
; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP13]], align 4, [[TBAA1]]
; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP14]], align 4, [[TBAA1]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP15]], align 4, [[TBAA1]]
; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP16]], align 4, [[TBAA1]]
; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP17]], align 4, [[TBAA1]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP18]], align 4, [[TBAA1]]
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP20]], i32 1
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP21]], i32 2
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP29]], i32 [[TMP22]], i32 3
@ -60,7 +60,7 @@ define i32 @matrix_row_col([100 x i32]* nocapture readonly %data, i32 %i, i32 %j
; CHECK-NEXT: [[TMP37]] = add <8 x i32> [[TMP36]], [[TMP35]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP37]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 96
@ -76,15 +76,15 @@ define i32 @matrix_row_col([100 x i32]* nocapture readonly %data, i32 %i, i32 %j
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP40:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP40:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4, [[TBAA1]]
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, [[TBAA1]]
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP41]], [[TMP40]]
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUM_015]], 4
; CHECK-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !7
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]]
;
entry:
%idxprom = sext i32 %i to i64

View File

@ -21,11 +21,11 @@ define dso_local void @tail_folding_enabled(i32* noalias nocapture %A, i32* noal
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP4]], i32 4, <8 x i1> [[TMP1]], <8 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP4]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison)
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP1]], <8 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison)
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
@ -92,11 +92,11 @@ define dso_local void @tail_folding_disabled(i32* noalias nocapture %A, i32* noa
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP4]], i32 4, <8 x i1> [[TMP1]], <8 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP4]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison)
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP1]], <8 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison)
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
@ -181,11 +181,11 @@ define i32 @reduction_i32(i32* nocapture readonly %A, i32* nocapture readonly %B
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x i32> poison)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP10]], i32 4, <8 x i1> [[TMP4]], <8 x i32> undef)
; CHECK-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP10]], i32 4, <8 x i1> [[TMP4]], <8 x i32> poison)
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD3]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP12]] = add <8 x i32> [[TMP11]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP12]], <8 x i32> [[VEC_PHI]]

View File

@ -513,7 +513,7 @@ define i32 @test_count_bits(i8* %test_base) {
; CHECK-NEXT: [[TMP27:%.*]] = load i8, i8* [[TMP13]], align 1
; CHECK-NEXT: [[TMP28:%.*]] = load i8, i8* [[TMP15]], align 1
; CHECK-NEXT: [[TMP29:%.*]] = load i8, i8* [[TMP17]], align 1
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i8> undef, i8 [[TMP26]], i32 0
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i8> poison, i8 [[TMP26]], i32 0
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i8> [[TMP30]], i8 [[TMP27]], i32 1
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i8> [[TMP31]], i8 [[TMP28]], i32 2
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i8> [[TMP32]], i8 [[TMP29]], i32 3
@ -521,7 +521,7 @@ define i32 @test_count_bits(i8* %test_base) {
; CHECK-NEXT: [[TMP35:%.*]] = load i8, i8* [[TMP21]], align 1
; CHECK-NEXT: [[TMP36:%.*]] = load i8, i8* [[TMP23]], align 1
; CHECK-NEXT: [[TMP37:%.*]] = load i8, i8* [[TMP25]], align 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i8> undef, i8 [[TMP34]], i32 0
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i8> poison, i8 [[TMP34]], i32 0
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 1
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i8> [[TMP39]], i8 [[TMP36]], i32 2
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i8> [[TMP40]], i8 [[TMP37]], i32 3

View File

@ -102,11 +102,11 @@ define void @vectorized1(float* noalias nocapture %A, float* noalias nocapture r
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <8 x float>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP4]], i32 4, <8 x i1> [[TMP1]], <8 x float> undef), !llvm.access.group !6
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP4]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison), !llvm.access.group !6
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <8 x float>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP7]], i32 4, <8 x i1> [[TMP1]], <8 x float> undef), !llvm.access.group !6
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP7]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison), !llvm.access.group !6
; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <8 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]]
; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP6]] to <8 x float>*
; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP8]], <8 x float>* [[TMP9]], i32 4, <8 x i1> [[TMP1]]), !llvm.access.group !6

View File

@ -47,10 +47,10 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]]
; DISABLED_MASKED_STRIDED: pred.load.continue:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if1:
@ -145,7 +145,7 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i8>*
@ -223,10 +223,10 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]]
; DISABLED_MASKED_STRIDED: pred.load.continue:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if1:
@ -322,7 +322,7 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> poison)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>*
@ -415,10 +415,10 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> undef, i8 [[TMP7]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]]
; DISABLED_MASKED_STRIDED: pred.load.continue:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if3:
@ -524,7 +524,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[TMP6]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[TMP6]], <16 x i8> poison)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>*
@ -622,10 +622,10 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> undef, i8 [[TMP7]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]]
; DISABLED_MASKED_STRIDED: pred.load.continue:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if3:
@ -731,7 +731,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <24 x i8> @llvm.masked.load.v24i8.p0v24i8(<24 x i8>* [[TMP5]], i32 1, <24 x i1> [[TMP6]], <24 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <24 x i8> @llvm.masked.load.v24i8.p0v24i8(<24 x i8>* [[TMP5]], i32 1, <24 x i1> [[TMP6]], <24 x i8> poison)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_MASKED_VEC]], <24 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>*
@ -829,7 +829,7 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read
; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP12]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP14]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = load i8, i8* [[TMP16]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = insertelement <8 x i8> undef, i8 [[TMP17]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = insertelement <8 x i8> poison, i8 [[TMP17]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP18]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = insertelement <8 x i8> [[TMP26]], i8 [[TMP19]], i32 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP20]], i32 3
@ -855,7 +855,7 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read
; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = shl nuw nsw i32 [[INDEX]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP0]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP2]], i32 1, <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP2]], i32 1, <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> poison)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>*
@ -928,10 +928,10 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]]
; DISABLED_MASKED_STRIDED: pred.load.continue:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if1:
@ -1034,7 +1034,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> poison)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>*
@ -1113,10 +1113,10 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]]
; DISABLED_MASKED_STRIDED: pred.load.continue:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if1:
@ -1194,10 +1194,10 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP52]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = load i8, i8* [[TMP53]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = insertelement <8 x i8> undef, i8 [[TMP54]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]]
; DISABLED_MASKED_STRIDED: pred.load.continue16:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = phi <8 x i8> [ undef, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = phi <8 x i8> [ poison, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if17:
@ -1436,7 +1436,7 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1
@ -1542,10 +1542,10 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> undef, i8 [[TMP7]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]]
; DISABLED_MASKED_STRIDED: pred.load.continue:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if3:
@ -1623,10 +1623,10 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = extractelement <8 x i32> [[TMP52]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP54]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = load i8, i8* [[TMP55]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = insertelement <8 x i8> undef, i8 [[TMP56]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = insertelement <8 x i8> poison, i8 [[TMP56]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE18]]
; DISABLED_MASKED_STRIDED: pred.load.continue18:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = phi <8 x i8> [ undef, [[PRED_LOAD_CONTINUE16]] ], [ [[TMP57]], [[PRED_LOAD_IF17]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = phi <8 x i8> [ poison, [[PRED_LOAD_CONTINUE16]] ], [ [[TMP57]], [[PRED_LOAD_IF17]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP59]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if19:
@ -1874,7 +1874,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], 1
@ -1982,10 +1982,10 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]]
; DISABLED_MASKED_STRIDED: pred.load.continue:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if1:
@ -2063,10 +2063,10 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP52]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = load i8, i8* [[TMP53]], align 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = insertelement <8 x i8> undef, i8 [[TMP54]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]]
; DISABLED_MASKED_STRIDED: pred.load.continue16:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = phi <8 x i8> [ undef, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = phi <8 x i8> [ poison, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
; DISABLED_MASKED_STRIDED: pred.load.if17:
@ -2312,7 +2312,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1

View File

@ -16,10 +16,10 @@ target triple = "x86_64-apple-macosx10.8.0"
; CHECK: [[IF0]]:
; CHECK: %[[T0:.+]] = extractelement <2 x i32> %wide.masked.load, i32 0
; CHECK: %[[T1:.+]] = sdiv i32 %[[T0]], %x
; CHECK: %[[T2:.+]] = insertelement <2 x i32> undef, i32 %[[T1]], i32 0
; CHECK: %[[T2:.+]] = insertelement <2 x i32> poison, i32 %[[T1]], i32 0
; CHECK: br label %[[CONT0]]
; CHECK: [[CONT0]]:
; CHECK: %[[T3:.+]] = phi <2 x i32> [ undef, %vector.body ], [ %[[T2]], %[[IF0]] ]
; CHECK: %[[T3:.+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[T2]], %[[IF0]] ]
; CHECK: br i1 {{.*}}, label %[[IF1:.+]], label %[[CONT1:.+]]
; CHECK: [[IF1]]:
; CHECK: %[[T4:.+]] = extractelement <2 x i32> %wide.masked.load, i32 1

View File

@ -407,7 +407,7 @@ for.end:
; CHECK: %next.gep11 = getelementptr i32, i32* %a, i64 %[[I2]]
; CHECK: %[[I3:.+]] = or i64 %index, 3
; CHECK: %next.gep12 = getelementptr i32, i32* %a, i64 %[[I3]]
; CHECK: %[[V0:.+]] = insertelement <4 x i32*> undef, i32* %next.gep, i32 0
; CHECK: %[[V0:.+]] = insertelement <4 x i32*> poison, i32* %next.gep, i32 0
; CHECK: %[[V1:.+]] = insertelement <4 x i32*> %[[V0]], i32* %next.gep10, i32 1
; CHECK: %[[V2:.+]] = insertelement <4 x i32*> %[[V1]], i32* %next.gep11, i32 2
; CHECK: %[[V3:.+]] = insertelement <4 x i32*> %[[V2]], i32* %next.gep12, i32 3

View File

@ -10,7 +10,7 @@ define void @can_sink_after_store(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr
; CHECK-LABEL: vector.ph:
; CHECK: %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3
; CHECK-NEXT: %vector.recur.init = insertelement <4 x i32> poison, i32 %.pre, i32 3
; CHECK-NEXT: br label %vector.body
; CHECK-LABEL: vector.body:
@ -64,7 +64,7 @@ define void @sink_sdiv(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 {
; CHECK-LABEL: vector.ph:
; CHECK: %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3
; CHECK-NEXT: %vector.recur.init = insertelement <4 x i32> poison, i32 %.pre, i32 3
; CHECK-NEXT: br label %vector.body
; CHECK-LABEL: vector.body:

View File

@ -14,7 +14,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
;
; CHECK-LABEL: @recurrence_1(
; CHECK: vector.ph:
; CHECK: %vector.recur.init = insertelement <4 x i32> undef, i32 %pre_load, i32 3
; CHECK: %vector.recur.init = insertelement <4 x i32> poison, i32 %pre_load, i32 3
; CHECK: vector.body:
; CHECK: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ]
; CHECK: [[L1]] = load <4 x i32>
@ -71,7 +71,7 @@ for.exit:
;
; CHECK-LABEL: @recurrence_2(
; CHECK: vector.ph:
; CHECK: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3
; CHECK: %vector.recur.init = insertelement <4 x i32> poison, i32 %.pre, i32 3
; CHECK: vector.body:
; CHECK: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ]
; CHECK: [[L1]] = load <4 x i32>
@ -108,13 +108,13 @@ for.cond.cleanup.loopexit:
br label %for.cond.cleanup
for.cond.cleanup:
%minmax.0.lcssa = phi i32 [ undef, %entry ], [ %minmax.0.cond.lcssa, %for.cond.cleanup.loopexit ]
%minmax.0.lcssa = phi i32 [ poison, %entry ], [ %minmax.0.cond.lcssa, %for.cond.cleanup.loopexit ]
ret i32 %minmax.0.lcssa
scalar.body:
%0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ]
%indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
%minmax.028 = phi i32 [ undef, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
%minmax.028 = phi i32 [ poison, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%1 = load i32, i32* %arrayidx, align 4
%sub3 = sub nsw i32 %1, %0
@ -136,7 +136,7 @@ scalar.body:
;
; CHECK-LABEL: @recurrence_3(
; CHECK: vector.ph:
; CHECK: %vector.recur.init = insertelement <4 x i16> undef, i16 %0, i32 3
; CHECK: %vector.recur.init = insertelement <4 x i16> poison, i16 %0, i32 3
; CHECK: vector.body:
; CHECK: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ]
; CHECK: [[L1]] = load <4 x i16>
@ -268,7 +268,7 @@ entry:
for.cond1.preheader:
%i.016 = phi i32 [ 1, %entry ], [ %inc, %for.cond.cleanup3 ]
%e.015 = phi i32 [ undef, %entry ], [ %e.1.lcssa, %for.cond.cleanup3 ]
%e.015 = phi i32 [ poison, %entry ], [ %e.1.lcssa, %for.cond.cleanup3 ]
br label %for.cond1
for.cond.cleanup:
@ -291,7 +291,7 @@ for.cond.cleanup3:
; UNROLL-NO-IC-LABEL: @PR30183(
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> undef, i32 [[PRE_LOAD:%.*]], i32 3
; UNROLL-NO-IC: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3
; UNROLL-NO-IC-NEXT: br label %vector.body
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
@ -300,7 +300,7 @@ for.cond.cleanup3:
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load i32, i32* {{.*}}
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = load i32, i32* {{.*}}
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = load i32, i32* {{.*}}
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> undef, i32 [[TMP27]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> poison, i32 [[TMP27]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP35]], i32 [[TMP28]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP36]], i32 [[TMP29]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP30]], i32 3
@ -308,7 +308,7 @@ for.cond.cleanup3:
; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = load i32, i32* {{.*}}
; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = load i32, i32* {{.*}}
; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = load i32, i32* {{.*}}
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = insertelement <4 x i32> undef, i32 [[TMP31]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = insertelement <4 x i32> poison, i32 [[TMP31]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP32]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP33]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP42]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP34]], i32 3
@ -336,7 +336,7 @@ for.end:
; UNROLL-NO-IC-LABEL: @constant_folded_previous_value(
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 undef, i64 undef, i64 undef, i64 0>, %vector.ph ], [ <i64 1, i64 1, i64 1, i64 1>, %vector.body ]
; UNROLL-NO-IC: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, %vector.ph ], [ <i64 1, i64 1, i64 1, i64 1>, %vector.body ]
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC: br i1 {{.*}}, label %middle.block, label %vector.body
;
@ -615,8 +615,8 @@ define void @sink_dead_inst() {
; SINK-AFTER-LABEL: vector.body: ; preds = %vector.body, %vector.ph
; SINK-AFTER-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; SINK-AFTER-NEXT: %vec.ind = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, %vector.ph ], [ %vec.ind.next, %vector.body ]
; SINK-AFTER-NEXT: %vector.recur = phi <4 x i16> [ <i16 undef, i16 undef, i16 undef, i16 0>, %vector.ph ], [ %3, %vector.body ]
; SINK-AFTER-NEXT: %vector.recur2 = phi <4 x i32> [ <i32 undef, i32 undef, i32 undef, i32 -27>, %vector.ph ], [ %1, %vector.body ]
; SINK-AFTER-NEXT: %vector.recur = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, %vector.ph ], [ %3, %vector.body ]
; SINK-AFTER-NEXT: %vector.recur2 = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, %vector.ph ], [ %1, %vector.body ]
; SINK-AFTER-NEXT: %0 = add <4 x i16> %vec.ind, <i16 1, i16 1, i16 1, i16 1>
; SINK-AFTER-NEXT: %1 = zext <4 x i16> %0 to <4 x i32>
; SINK-AFTER-NEXT: %2 = shufflevector <4 x i32> %vector.recur2, <4 x i32> %1, <4 x i32> <i32 3, i32 4, i32 5, i32 6>

View File

@ -24,10 +24,10 @@ for.cond.cleanup: ; preds = %if.end
; CHECK: %[[SDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SD0:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0]], %[[SDA1]]
; CHECK: %[[SD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[SD0]], i32 0
; CHECK: %[[SD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[SD0]], i32 0
; CHECK: br label %[[ESD]]
; CHECK: [[ESD]]:
; CHECK: %[[SDR:[a-zA-Z0-9]+]] = phi <2 x i32> [ undef, %vector.body ], [ %[[SD1]], %[[CSD]] ]
; CHECK: %[[SDR:[a-zA-Z0-9]+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[SD1]], %[[CSD]] ]
; CHECK: %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 1
; CHECK: br i1 %[[SDEEH]], label %[[CSDH:[a-zA-Z0-9.]+]], label %[[ESDH:[a-zA-Z0-9.]+]]
; CHECK: [[CSDH]]:
@ -45,10 +45,10 @@ for.cond.cleanup: ; preds = %if.end
; CHECK: %[[UDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[UDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[UD0:[a-zA-Z0-9]+]] = udiv i32 %[[UDA0]], %[[UDA1]]
; CHECK: %[[UD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[UD0]], i32 0
; CHECK: %[[UD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[UD0]], i32 0
; CHECK: br label %[[EUD]]
; CHECK: [[EUD]]:
; CHECK: %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[UD1]], %[[CUD]] ]
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UD1]], %[[CUD]] ]
; CHECK: %[[SREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
; CHECK: br i1 %[[SREE]], label %[[CSR:[a-zA-Z0-9.]+]], label %[[ESR:[a-zA-Z0-9.]+]]
@ -56,10 +56,10 @@ for.cond.cleanup: ; preds = %if.end
; CHECK: %[[SRA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SRA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SR0:[a-zA-Z0-9]+]] = srem i32 %[[SRA0]], %[[SRA1]]
; CHECK: %[[SR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[SR0]], i32 0
; CHECK: %[[SR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[SR0]], i32 0
; CHECK: br label %[[ESR]]
; CHECK: [[ESR]]:
; CHECK: %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[SR1]], %[[CSR]] ]
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[SR1]], %[[CSR]] ]
; CHECK: %[[UREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
; CHECK: br i1 %[[UREE]], label %[[CUR:[a-zA-Z0-9.]+]], label %[[EUR:[a-zA-Z0-9.]+]]
@ -67,10 +67,10 @@ for.cond.cleanup: ; preds = %if.end
; CHECK: %[[URA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[URA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[UR0:[a-zA-Z0-9]+]] = urem i32 %[[URA0]], %[[URA1]]
; CHECK: %[[UR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[UR0]], i32 0
; CHECK: %[[UR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[UR0]], i32 0
; CHECK: br label %[[EUR]]
; CHECK: [[EUR]]:
; CHECK: %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[UR1]], %[[CUR]] ]
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UR1]], %[[CUR]] ]
for.body: ; preds = %if.end, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
@ -124,7 +124,7 @@ for.cond.cleanup: ; preds = %if.end
; CHECK: %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}
; CHECK: br label %[[FI]]
; CHECK: [[FI]]:
; CHECK: %{{.*}} = phi i32 [ undef, %vector.body ], [ %[[PD]], %[[THEN]] ]
; CHECK: %{{.*}} = phi i32 [ poison, %vector.body ], [ %[[PD]], %[[THEN]] ]
for.body: ; preds = %if.end, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
@ -169,7 +169,7 @@ for.cond.cleanup: ; preds = %if.end
; CHECK: %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}
; CHECK: br label %[[FI]]
; CHECK: [[FI]]:
; CHECK: %{{.*}} = phi i32 [ undef, %vector.body ], [ %[[PD]], %[[THEN]] ]
; CHECK: %{{.*}} = phi i32 [ poison, %vector.body ], [ %[[PD]], %[[THEN]] ]
for.body: ; preds = %if.end, %entry
@ -213,10 +213,10 @@ entry:
; CHECK: %[[T01:.+]] = add nsw i32 %[[T00]], %x
; CHECK: %[[T02:.+]] = extractelement <2 x i32> %wide.load, i32 0
; CHECK: %[[T03:.+]] = udiv i32 %[[T02]], %[[T01]]
; CHECK: %[[T04:.+]] = insertelement <2 x i32> undef, i32 %[[T03]], i32 0
; CHECK: %[[T04:.+]] = insertelement <2 x i32> poison, i32 %[[T03]], i32 0
; CHECK: br label %[[CONT0]]
; CHECK: [[CONT0]]:
; CHECK: %[[T05:.+]] = phi <2 x i32> [ undef, %vector.body ], [ %[[T04]], %[[IF0]] ]
; CHECK: %[[T05:.+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[T04]], %[[IF0]] ]
; CHECK: br i1 {{.*}}, label %[[IF1:.+]], label %[[CONT1:.+]]
; CHECK: [[IF1]]:
; CHECK: %[[T06:.+]] = extractelement <2 x i32> %wide.load, i32 1
@ -242,14 +242,14 @@ entry:
; UNROLL-NO-VF: %[[DIV0:.+]] = udiv i32 %[[LOAD0]], %[[ADD0]]
; UNROLL-NO-VF: br label %[[CONT0]]
; UNROLL-NO-VF: [[CONT0]]:
; UNROLL-NO-VF: phi i32 [ undef, %vector.body ], [ %[[DIV0]], %[[IF0]] ]
; UNROLL-NO-VF: phi i32 [ poison, %vector.body ], [ %[[DIV0]], %[[IF0]] ]
; UNROLL-NO-VF: br i1 {{.*}}, label %[[IF1:.+]], label %[[CONT1:.+]]
; UNROLL-NO-VF: [[IF1]]:
; UNROLL-NO-VF: %[[ADD1:.+]] = add nsw i32 %[[LOAD1]], %x
; UNROLL-NO-VF: %[[DIV1:.+]] = udiv i32 %[[LOAD1]], %[[ADD1]]
; UNROLL-NO-VF: br label %[[CONT1]]
; UNROLL-NO-VF: [[CONT1]]:
; UNROLL-NO-VF: phi i32 [ undef, %[[CONT0]] ], [ %[[DIV1]], %[[IF1]] ]
; UNROLL-NO-VF: phi i32 [ poison, %[[CONT0]] ], [ %[[DIV1]], %[[IF1]] ]
; UNROLL-NO-VF: br i1 {{.*}}, label %middle.block, label %vector.body
;
for.body:

View File

@ -860,7 +860,7 @@ define i64 @trunc_with_first_order_recurrence() {
; CHECK-NEXT: %vec.phi = phi <2 x i64>
; CHECK-NEXT: %vec.ind = phi <2 x i64> [ <i64 1, i64 2>, %vector.ph ], [ %vec.ind.next, %vector.body ]
; CHECK-NEXT: %vec.ind2 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next3, %vector.body ]
; CHECK-NEXT: %vector.recur = phi <2 x i32> [ <i32 undef, i32 42>, %vector.ph ], [ %vec.ind5, %vector.body ]
; CHECK-NEXT: %vector.recur = phi <2 x i32> [ <i32 poison, i32 42>, %vector.ph ], [ %vec.ind5, %vector.body ]
; CHECK-NEXT: %vec.ind5 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next6, %vector.body ]
; CHECK-NEXT: %vec.ind7 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next8, %vector.body ]
; CHECK-NEXT: shufflevector <2 x i32> %vector.recur, <2 x i32> %vec.ind5, <2 x i32> <i32 1, i32 2>

View File

@ -57,7 +57,7 @@ define void @scev4stride1(i32* noalias nocapture %a, i32* noalias nocapture read
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP8]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP10]], align 4
; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP12]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> undef, i32 [[TMP13]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP14]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP15]], i32 2
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 3

View File

@ -32,10 +32,10 @@ define i16 @test_true_and_false_branch_equal() {
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]]
; CHECK: pred.srem.if:
; CHECK-NEXT: [[TMP7:%.*]] = srem i16 5786, [[TMP2]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i16> undef, i16 [[TMP7]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i16> poison, i16 [[TMP7]], i32 0
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE]]
; CHECK: pred.srem.continue:
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_SREM_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_SREM_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4]]
; CHECK: pred.srem.if3:

View File

@ -19,10 +19,10 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -109,10 +109,10 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -145,10 +145,10 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK: pred.load.if7:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP26]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue8:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
@ -240,10 +240,10 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -334,10 +334,10 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -370,10 +370,10 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK: pred.load.if7:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP26]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue8:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
@ -469,10 +469,10 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -505,10 +505,10 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK: pred.load.if7:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP26]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue8:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
@ -601,10 +601,10 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -637,10 +637,10 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK: pred.load.if7:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP26]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue8:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
@ -729,10 +729,10 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -765,10 +765,10 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
; CHECK: pred.load.if7:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP26]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue8:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
@ -857,10 +857,10 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -893,10 +893,10 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
; CHECK: pred.load.if7:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP26]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue8:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
@ -983,10 +983,10 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -1019,10 +1019,10 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
; CHECK: pred.load.if7:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP26]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue8:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
@ -1109,10 +1109,10 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> undef, float [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ undef, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -1145,10 +1145,10 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
; CHECK: pred.load.if7:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> undef, float [[TMP26]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> poison, float [[TMP26]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue8:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
@ -1237,10 +1237,10 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> undef, float [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ undef, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -1273,10 +1273,10 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
; CHECK: pred.load.if7:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> undef, float [[TMP26]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> poison, float [[TMP26]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue8:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
@ -1362,10 +1362,10 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -1450,10 +1450,10 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -1634,10 +1634,10 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i8> undef, i8 [[TMP5]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP5]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
@ -1732,10 +1732,10 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i8> undef, i8 [[TMP7]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i8> poison, i8 [[TMP7]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:

View File

@ -37,7 +37,7 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
; CHECK-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI3]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP9]]
; CHECK-NEXT: [[BIN_RDX7:%.*]] = add i32 [[TMP13]], [[BIN_RDX]]
@ -46,7 +46,7 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !2
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], [[LOOP2:!llvm.loop !.*]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[BIN_RDX8]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
@ -95,10 +95,10 @@ define i32 @predicated(i32* noalias nocapture %A) {
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
; CHECK: pred.load.if7:
@ -135,10 +135,10 @@ define i32 @predicated(i32* noalias nocapture %A) {
; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP28]]
; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> undef, i32 [[TMP30]], i32 0
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]]
; CHECK: pred.load.continue14:
; CHECK-NEXT: [[TMP32:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE12]] ], [ [[TMP31]], [[PRED_LOAD_IF13]] ]
; CHECK-NEXT: [[TMP32:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE12]] ], [ [[TMP31]], [[PRED_LOAD_IF13]] ]
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
; CHECK: pred.load.if15:
@ -175,10 +175,10 @@ define i32 @predicated(i32* noalias nocapture %A) {
; CHECK-NEXT: [[TMP52:%.*]] = or i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP52]]
; CHECK-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i32> undef, i32 [[TMP54]], i32 0
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i32> poison, i32 [[TMP54]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE22]]
; CHECK: pred.load.continue22:
; CHECK-NEXT: [[TMP56:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE20]] ], [ [[TMP55]], [[PRED_LOAD_IF21]] ]
; CHECK-NEXT: [[TMP56:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE20]] ], [ [[TMP55]], [[PRED_LOAD_IF21]] ]
; CHECK-NEXT: [[TMP57:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
; CHECK: pred.load.if23:
@ -215,10 +215,10 @@ define i32 @predicated(i32* noalias nocapture %A) {
; CHECK-NEXT: [[TMP76:%.*]] = or i64 [[INDEX]], 12
; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP76]]
; CHECK-NEXT: [[TMP78:%.*]] = load i32, i32* [[TMP77]], align 4
; CHECK-NEXT: [[TMP79:%.*]] = insertelement <4 x i32> undef, i32 [[TMP78]], i32 0
; CHECK-NEXT: [[TMP79:%.*]] = insertelement <4 x i32> poison, i32 [[TMP78]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE30]]
; CHECK: pred.load.continue30:
; CHECK-NEXT: [[TMP80:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE28]] ], [ [[TMP79]], [[PRED_LOAD_IF29]] ]
; CHECK-NEXT: [[TMP80:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE28]] ], [ [[TMP79]], [[PRED_LOAD_IF29]] ]
; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP81]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
; CHECK: pred.load.if31:
@ -264,7 +264,7 @@ define i32 @predicated(i32* noalias nocapture %A) {
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 16, i64 16, i64 16, i64 16>
; CHECK-NEXT: [[TMP111:%.*]] = icmp eq i64 [[INDEX_NEXT]], 272
; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP104]], [[TMP101]]
; CHECK-NEXT: [[BIN_RDX37:%.*]] = add i32 [[TMP107]], [[BIN_RDX]]
@ -273,7 +273,7 @@ define i32 @predicated(i32* noalias nocapture %A) {
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !5
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], [[LOOP5:!llvm.loop !.*]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[BIN_RDX38]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]

View File

@ -284,12 +284,12 @@ define i1 @cmp_lt_gt(double %a, double %b, double %c) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[B:%.*]]
; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[FNEG]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[B]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[MUL]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], <double 0x3EB0C6F7A0B5ED8D, double 0x3EB0C6F7A0B5ED8D>

View File

@ -8,7 +8,7 @@ target triple = "aarch64--linux-gnu"
define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[XMIN:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]]
; CHECK: for.body3.lr.ph:
@ -53,7 +53,7 @@ for.end27:
define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[XMIN:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]]
; CHECK: for.body3.lr.ph:

View File

@ -16,7 +16,7 @@ target triple = "aarch64--linux-gnu"
define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @gather_multiple_use(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i32 3

View File

@ -30,7 +30,7 @@ define void @PR28330(i32 %n) {
; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> undef, i1 [[TMP3]], i32 0
; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> poison, i1 [[TMP3]], i32 0
; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1
; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
@ -52,7 +52,7 @@ define void @PR28330(i32 %n) {
; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4
; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5
; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6
; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0
; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1
; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2
; GATHER-NEXT: [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP22]], i32 3
@ -166,7 +166,7 @@ define void @PR32038(i32 %n) {
; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> undef, i1 [[TMP3]], i32 0
; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> poison, i1 [[TMP3]], i32 0
; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1
; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
@ -188,7 +188,7 @@ define void @PR32038(i32 %n) {
; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4
; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5
; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6
; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0
; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1
; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2
; GATHER-NEXT: [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP22]], i32 3
@ -221,7 +221,7 @@ define void @PR32038(i32 %n) {
; MAX-COST: for.body:
; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[P34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; MAX-COST-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> undef, i1 [[TMP2]], i32 0
; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> poison, i1 [[TMP2]], i32 0
; MAX-COST-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
; MAX-COST-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[TMP4]], i32 1
; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[P5]], i32 2

View File

@ -60,7 +60,7 @@ define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP20]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP5]], 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> undef, i32 [[T4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[T4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0
@ -152,7 +152,7 @@ define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %
; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
@ -175,7 +175,7 @@ define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP7]]
; CHECK-NEXT: [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[T4]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP9]], [[TMP1]]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0

View File

@ -19,13 +19,13 @@ define void @wrap_mul4(double* nocapture %Out, [2 x double]* nocapture readonly
; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[ARRAYIDX3_I]] to <2 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[TEMP]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TEMP]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TEMP]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[ARRAYIDX18_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 1, i64 1
; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX7_I]] to <2 x double>*
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> undef, double [[TEMP2]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[TEMP2]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[TEMP2]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP7]]
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP5]], [[TMP10]]
@ -44,10 +44,10 @@ define void @wrap_mul4(double* nocapture %Out, [2 x double]* nocapture readonly
; CHECK-NEXT: [[TEMP10:%.*]] = load double, double* [[ARRAYIDX47_I]], align 8
; CHECK-NEXT: [[ARRAYIDX52_I:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[A]], i64 1, i64 1
; CHECK-NEXT: [[TEMP11:%.*]] = load double, double* [[ARRAYIDX52_I]], align 8
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> undef, double [[TEMP10]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> poison, double [[TEMP10]], i32 0
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> [[TMP19]], double [[TEMP10]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = fmul <2 x double> [[TMP2]], [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> undef, double [[TEMP11]], i32 0
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> poison, double [[TEMP11]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x double> [[TMP22]], double [[TEMP11]], i32 1
; CHECK-NEXT: [[TMP24:%.*]] = fmul <2 x double> [[TMP7]], [[TMP23]]
; CHECK-NEXT: [[TMP25:%.*]] = fadd <2 x double> [[TMP21]], [[TMP24]]

View File

@ -203,7 +203,7 @@ define void @select_uniform_ugt_8xi8(i8* %ptr, i8 %x) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> undef, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3
@ -279,7 +279,7 @@ define void @select_uniform_ugt_16xi8(i8* %ptr, i8 %x) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> undef, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3
@ -443,7 +443,7 @@ define void @select_uniform_ugt_4xi16(i16* %ptr, i16 %x) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <4 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[X]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i16> [[TMP5]], i16 [[X]], i32 3
@ -492,7 +492,7 @@ define void @select_uniform_ult_8xi16(i16* %ptr, i16 %x) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[X]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[X]], i32 3
@ -562,7 +562,7 @@ define void @select_uniform_eq_2xi32(i32* %ptr, i32 %x) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <2 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[PTR]] to <2 x i32>*
@ -593,7 +593,7 @@ define void @select_uniform_eq_4xi32(i32* %ptr, i32 %x) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[X]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[X]], i32 3
@ -635,7 +635,7 @@ define void @select_uniform_ne_2xi64(i64* %ptr, i64 %x) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*

View File

@ -114,7 +114,7 @@ define amdgpu_kernel void @mul_scalar_v2f16(half addrspace(3)* %a, half %scalar,
; GCN-LABEL: @mul_scalar_v2f16(
; GCN-NEXT: [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
; GCN-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
; GCN-NEXT: [[TMP3:%.*]] = insertelement <2 x half> undef, half [[SCALAR:%.*]], i32 0
; GCN-NEXT: [[TMP3:%.*]] = insertelement <2 x half> poison, half [[SCALAR:%.*]], i32 0
; GCN-NEXT: [[TMP4:%.*]] = insertelement <2 x half> [[TMP3]], half [[SCALAR]], i32 1
; GCN-NEXT: [[TMP5:%.*]] = fmul <2 x half> [[TMP2]], [[TMP4]]
; GCN-NEXT: [[TMP6:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
@ -197,7 +197,7 @@ define amdgpu_kernel void @test1_fabs_scalar_fma_v2f16(half addrspace(3)* %a, ha
; GCN-NEXT: [[I4:%.*]] = load half, half addrspace(3)* [[ARRAYIDX4]], align 2
; GCN-NEXT: [[TMP3:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
; GCN-NEXT: [[TMP4:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP3]], align 2
; GCN-NEXT: [[TMP5:%.*]] = insertelement <2 x half> undef, half [[I1_FABS]], i32 0
; GCN-NEXT: [[TMP5:%.*]] = insertelement <2 x half> poison, half [[I1_FABS]], i32 0
; GCN-NEXT: [[TMP6:%.*]] = insertelement <2 x half> [[TMP5]], half [[I4]], i32 1
; GCN-NEXT: [[TMP7:%.*]] = call <2 x half> @llvm.fma.v2f16(<2 x half> [[TMP2]], <2 x half> [[TMP6]], <2 x half> [[TMP4]])
; GCN-NEXT: [[TMP8:%.*]] = bitcast half addrspace(3)* [[D:%.*]] to <2 x half> addrspace(3)*

View File

@ -15,7 +15,7 @@ define void @foo() local_unnamed_addr {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2), align 4
; CHECK-NEXT: [[ARRAYIDX372_2:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 3), align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[ADD277]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 3

View File

@ -10,7 +10,7 @@ define void @test() #0 {
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP6:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3

View File

@ -4,11 +4,11 @@
@global = local_unnamed_addr global [6 x double] zeroinitializer, align 16
define { i64, i64 } @patatino(double %arg) {
; CHECK-LABEL: define {{[^@]+}}@patatino(
; CHECK-LABEL: @patatino(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[ARG:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]]

View File

@ -6,7 +6,7 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> undef, i32 5
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> undef, half [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[TMP0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>

View File

@ -6,7 +6,7 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> undef, i32 5
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> undef, half [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[TMP0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>

View File

@ -39,15 +39,15 @@ define void @Test(i32) {
; CHECK-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA26]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[OP_EXTRA26]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 14910, i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> undef, i32 [[TMP12]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
; CHECK-NEXT: [[TMP15]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP14]], i32 1
; CHECK-NEXT: br label [[LOOP]]
@ -96,9 +96,9 @@ define void @Test(i32) {
; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA27]], [[VAL_39]]
; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
; FORCE_REDUCTION-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0
; FORCE_REDUCTION-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[VAL_40]], i32 0
; FORCE_REDUCTION-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_41]], i32 0
; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[VAL_41]], i32 0
; FORCE_REDUCTION-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 14910, i32 1
; FORCE_REDUCTION-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP8]], [[TMP10]]
; FORCE_REDUCTION-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP8]], [[TMP10]]

View File

@ -4,7 +4,7 @@
define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
; CHECK-LABEL: @mainTest(
; CHECK-NEXT: bci_15.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 31, i32 undef>, i32 [[PARAM:%.*]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 31, i32 poison>, i32 [[PARAM:%.*]], i32 1
; CHECK-NEXT: br label [[BCI_15:%.*]]
; CHECK: bci_15:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
@ -16,7 +16,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP4]])
; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[V44]], i32 0
; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1
; CHECK-NEXT: br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]]
; CHECK: loopexit:

View File

@ -462,7 +462,7 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) {
define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) {
; CHECK-LABEL: @add_sub_v8i32_splat(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]

View File

@ -462,7 +462,7 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) {
define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) {
; CHECK-LABEL: @add_sub_v8i32_splat(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]

View File

@ -545,7 +545,7 @@ define void @PR47450(i16* nocapture readonly %p) {
; CHECK-NEXT: [[X:%.*]] = load i16, i16* [[P:%.*]], align 2
; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X]] to i32
; CHECK-NEXT: [[S:%.*]] = shl nuw nsw i32 [[Z]], 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[S]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[S]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[S]], i32 3

View File

@ -8,7 +8,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[N]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[N]], i32 3

View File

@ -17,11 +17,11 @@ define void @bcast_vals(i64 *%A, i64 *%B, i64 *%S) {
; CHECK-NEXT: [[B0:%.*]] = load i64, i64* [[B:%.*]], align 8
; CHECK-NEXT: [[V1:%.*]] = sub i64 [[A0]], 1
; CHECK-NEXT: [[V2:%.*]] = sub i64 [[B0]], 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> undef, i64 [[V1]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V1]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[V1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[V1]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[V1]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> undef, i64 [[V2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> poison, i64 [[V2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[V2]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[V2]], i32 2
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP6]], i64 [[V2]], i32 3
@ -75,12 +75,12 @@ define void @bcast_vals2(i16 *%A, i16 *%B, i16 *%C, i16 *%D, i16 *%E, i32 *%S) {
; CHECK-NEXT: [[D0:%.*]] = load i16, i16* [[D:%.*]], align 8
; CHECK-NEXT: [[E0:%.*]] = load i16, i16* [[E:%.*]], align 8
; CHECK-NEXT: [[V1:%.*]] = sext i16 [[A0]] to i32
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B0]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[B0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> [[TMP0]], i16 [[C0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[E0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[D0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[V1]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[V1]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V1]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[V1]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[V1]], i32 3

View File

@ -13,7 +13,7 @@ define i32 @foo(double* noalias nocapture %A, double* noalias nocapture %B, doub
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <2 x double> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[G:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[G:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[G]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[TMP4]], <2 x double> <double 1.000000e+00, double 1.000000e+00>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*

View File

@ -51,7 +51,7 @@ define void @splat(i8 %a, i8 %b, i8 %c) {
; SSE-NEXT: ret void
;
; AVX-LABEL: @splat(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0
; AVX-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[C]], i32 1
; AVX-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 [[C]], i32 2
; AVX-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[C]], i32 3
@ -67,7 +67,7 @@ define void @splat(i8 %a, i8 %b, i8 %c) {
; AVX-NEXT: [[TMP14:%.*]] = insertelement <16 x i8> [[TMP13]], i8 [[C]], i32 13
; AVX-NEXT: [[TMP15:%.*]] = insertelement <16 x i8> [[TMP14]], i8 [[C]], i32 14
; AVX-NEXT: [[TMP16:%.*]] = insertelement <16 x i8> [[TMP15]], i8 [[C]], i32 15
; AVX-NEXT: [[TMP17:%.*]] = insertelement <2 x i8> undef, i8 [[A:%.*]], i32 0
; AVX-NEXT: [[TMP17:%.*]] = insertelement <2 x i8> poison, i8 [[A:%.*]], i32 0
; AVX-NEXT: [[TMP18:%.*]] = insertelement <2 x i8> [[TMP17]], i8 [[B:%.*]], i32 1
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i8> [[TMP18]], <2 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
; AVX-NEXT: [[TMP19:%.*]] = xor <16 x i8> [[TMP16]], [[SHUFFLE]]
@ -129,11 +129,11 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
; SSE-NEXT: ret void
;
; AVX-LABEL: @same_opcode_on_one_side(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C]], i32 1
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[C]], i32 2
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[C]], i32 3
; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1
; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2
; AVX-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3

View File

@ -10,7 +10,7 @@ define void @reduce_compare(double* nocapture %A, i32 %n) {
; CHECK-LABEL: @reduce_compare(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[CONV]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[CONV]], i32 1
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:

View File

@ -62,10 +62,10 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) {
; AVX-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
; AVX-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4
; AVX-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i32 1
; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1
; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP5]], [[TMP7]]
; AVX-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer
@ -78,7 +78,7 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) {
; AVX-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0
; AVX-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1
; AVX-NEXT: [[ADD13]] = fadd float [[TMP16]], [[TMP17]]
; AVX-NEXT: [[TMP18:%.*]] = insertelement <2 x float> undef, float [[TMP17]], i32 0
; AVX-NEXT: [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP17]], i32 0
; AVX-NEXT: [[TMP19:%.*]] = insertelement <2 x float> [[TMP18]], float [[ADD13]], i32 1
; AVX-NEXT: [[TMP20:%.*]] = fcmp olt <2 x float> [[TMP19]], <float 1.000000e+00, float 1.000000e+00>
; AVX-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP20]], <2 x float> [[TMP19]], <2 x float> <float 1.000000e+00, float 1.000000e+00>

View File

@ -130,7 +130,7 @@ define fastcc void @dct36(double* %inbuf) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double undef, i32 1
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP1]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>*

View File

@ -95,7 +95,7 @@ define void @zot(%struct.hoge* %arg) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = load double, double* undef, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load double, double* undef, align 8
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[TMP]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], undef
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[ARG:%.*]], i64 0, i32 1

View File

@ -30,7 +30,7 @@ define void @main() #0 {
; CHECK-NEXT: br i1 undef, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
; CHECK: cond.false66.us:
; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, undef
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[ADD_I276_US]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[ADD_I276_US]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double undef, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 0.000000e+00, double 0xBFA5CC2D1960285F>
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 1.400000e+02, double 1.400000e+02>
@ -111,7 +111,7 @@ define void @_Z8radianceRK3RayiPt() #0 {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
; CHECK: if.then38:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double undef, i32 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double undef, double poison>, double undef, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]]

View File

@ -27,7 +27,7 @@ define i32 @test(double* nocapture %G) {
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[G]], i64 2
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
; CHECK-NEXT: [[MUL11:%.*]] = fmul double [[TMP6]], 4.000000e+00
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[MUL11]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], <double 7.000000e+00, double 8.000000e+00>
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[G]], i64 3
@ -74,7 +74,7 @@ define i32 @foo(double* nocapture %A, i32 %n) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <4 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 7.900000e+00, double 7.700000e+00, double 7.600000e+00, double 7.400000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> undef, double [[CONV]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3
@ -135,7 +135,7 @@ define i32 @test2(double* nocapture %G, i32 %k) {
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[G]], i64 6
; CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = fmul double [[TMP7]], 3.000000e+00
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> undef, double [[TMP4]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[TMP8]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], <double 1.000000e+00, double 6.000000e+00>
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[G]], i64 1
@ -147,7 +147,7 @@ define i32 @test2(double* nocapture %G, i32 %k) {
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[G]], i64 6
; CHECK-NEXT: [[TMP17:%.*]] = load double, double* [[TMP16]], align 8
; CHECK-NEXT: [[TMP18:%.*]] = fmul double [[TMP17]], 3.000000e+00
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> undef, double [[TMP4]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> [[TMP19]], double [[TMP18]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[TMP20]], <double 7.000000e+00, double 8.000000e+00>
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[G]], i64 3
@ -208,7 +208,7 @@ define i32 @foo4(double* nocapture %A, i32 %n) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <4 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 7.900000e+00, double 7.900000e+00, double 7.900000e+00, double 7.900000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> undef, double [[CONV]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3
@ -261,7 +261,7 @@ define i32 @partial_mrg(double* nocapture %A, i32 %n) {
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[CONV]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[A]] to <2 x double>*

View File

@ -23,7 +23,7 @@ define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i3
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
@ -76,7 +76,7 @@ define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
@ -122,7 +122,7 @@ define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3

View File

@ -57,7 +57,7 @@ define void @fextr2(double* %ptr) {
; CHECK-NEXT: [[V0:%.*]] = extractelement <4 x double> [[LD]], i32 0
; CHECK-NEXT: [[V1:%.*]] = extractelement <4 x double> [[LD]], i32 1
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 5.500000e+00, double 6.600000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[P0]] to <2 x double>*

View File

@ -10,7 +10,7 @@ define i32 @fn1() {
; CHECK-LABEL: @fn1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> undef, i64* [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 11, i64 56>
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64>

View File

@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
define i32 @foo(i32* nocapture %A, i32 %n, i32 %m) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[N]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[N]], i32 3

View File

@ -12,6 +12,20 @@ define float @f(<2 x float> %x) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret float [[ADD]]
;
; THRESH1-LABEL: @f(
; THRESH1-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]]
; THRESH1-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]]
; THRESH1-NEXT: ret float [[ADD]]
;
; THRESH2-LABEL: @f(
; THRESH2-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]]
; THRESH2-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]]
; THRESH2-NEXT: ret float [[ADD]]
;
%x0 = extractelement <2 x float> %x, i32 0
%x1 = extractelement <2 x float> %x, i32 1
@ -22,13 +36,31 @@ define float @f(<2 x float> %x) {
}
define float @f_used_out_of_tree(<2 x float> %x) {
; CHECK-LABEL: @f_used_out_of_tree(
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]]
; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
; CHECK-NEXT: store float [[ADD]], float* @a, align 4
; CHECK-NEXT: ret float [[X0]]
;
; THRESH1-LABEL: @f_used_out_of_tree(
; THRESH1-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
; THRESH1-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
; THRESH1-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]]
; THRESH1-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
; THRESH1-NEXT: store float [[ADD]], float* @a, align 4
; THRESH1-NEXT: ret float [[X0]]
;
; THRESH2-LABEL: @f_used_out_of_tree(
; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]]
; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
; THRESH2-NEXT: store float [[ADD]], float* @a
; THRESH2-NEXT: store float [[ADD]], float* @a, align 4
; THRESH2-NEXT: ret float [[TMP1]]
;
%x0 = extractelement <2 x float> %x, i32 0
@ -41,15 +73,33 @@ define float @f_used_out_of_tree(<2 x float> %x) {
}
define float @f_used_twice_in_tree(<2 x float> %x) {
; CHECK-LABEL: @f_used_twice_in_tree(
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X1]]
; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
; CHECK-NEXT: ret float [[ADD]]
;
; THRESH1-LABEL: @f_used_twice_in_tree(
; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]]
; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
; THRESH1-NEXT: ret float [[ADD]]
;
; THRESH2-LABEL: @f_used_twice_in_tree(
; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]]
; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
; THRESH2-NEXT: ret float [[ADD]]
;
%x0 = extractelement <2 x float> %x, i32 0
%x1 = extractelement <2 x float> %x, i32 1

View File

@ -11,7 +11,7 @@ target triple = "i686-unknown-linux-gnu"
define void @vectorize_fp128(fp128 %c, fp128 %d) #0 {
; CHECK-LABEL: @vectorize_fp128(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x fp128> undef, fp128 [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x fp128> poison, fp128 [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x fp128> [[TMP0]], fp128 [[D:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x fp128> @llvm.fabs.v2f128(<2 x fp128> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x fp128> [[TMP2]], <fp128 0xL00000000000000007FFF000000000000, fp128 0xL00000000000000007FFF000000000000>

View File

@ -16,7 +16,7 @@ target triple = "i386-apple-macosx10.9.0"
define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[K:%.*]], i32 1
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: br label [[FOR_BODY:%.*]]

View File

@ -1200,11 +1200,11 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
define i32 @wobble(i32 %arg, i32 %bar) {
; CHECK-LABEL: @wobble(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[ARG:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[BAR:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3
@ -1219,11 +1219,11 @@ define i32 @wobble(i32 %arg, i32 %bar) {
;
; THRESHOLD-LABEL: @wobble(
; THRESHOLD-NEXT: bb:
; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[ARG:%.*]], i32 0
; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0
; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1
; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2
; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3
; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[BAR:%.*]], i32 0
; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1
; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2
; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3

View File

@ -761,14 +761,14 @@ define i32 @maxi8_mutiple_uses(i32) {
; AVX-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
; AVX-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
; AVX-NEXT: store i32 [[TMP15]], i32* @var, align 8
; AVX-NEXT: ret i32 [[TMP14]]
; AVX-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
; AVX-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP10]], i32 [[TMP5]]
; AVX-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[OP_EXTRA1]], [[TMP11]]
; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[OP_EXTRA1]], i32 [[TMP11]]
; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP4]], i32 3, i32 4
; AVX-NEXT: store i32 [[TMP14]], i32* @var, align 8
; AVX-NEXT: ret i32 [[TMP13]]
;
; THRESH-LABEL: @maxi8_mutiple_uses(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
@ -777,23 +777,23 @@ define i32 @maxi8_mutiple_uses(i32) {
; THRESH-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; THRESH-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; THRESH-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
; THRESH-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[TMP7]], i32 0
; THRESH-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
; THRESH-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP3]], i32 1
; THRESH-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0
; THRESH-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP6]], i32 0
; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP4]], i32 1
; THRESH-NEXT: [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP9]], [[TMP11]]
; THRESH-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP9]], <2 x i32> [[TMP11]]
; THRESH-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
; THRESH-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
; THRESH-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]]
; THRESH-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 [[TMP14]]
; THRESH-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP17]]
; THRESH-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[OP_EXTRA]], i32 [[TMP17]]
; THRESH-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
; THRESH-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 3, i32 4
; THRESH-NEXT: store i32 [[TMP21]], i32* @var, align 8
; THRESH-NEXT: ret i32 [[TMP19]]
; THRESH-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]]
; THRESH-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP15]], i32 [[TMP14]]
; THRESH-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[OP_EXTRA1]], [[TMP16]]
; THRESH-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[OP_EXTRA1]], i32 [[TMP16]]
; THRESH-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
; THRESH-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 3, i32 4
; THRESH-NEXT: store i32 [[TMP20]], i32* @var, align 8
; THRESH-NEXT: ret i32 [[TMP18]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
@ -865,9 +865,9 @@ define i32 @maxi8_wrong_parent(i32) {
; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
; AVX-NEXT: ret i32 [[OP_EXTRA]]
; AVX-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
; AVX-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP13]], i32 [[TMP5]]
; AVX-NEXT: ret i32 [[OP_EXTRA1]]
;
; THRESH-LABEL: @maxi8_wrong_parent(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
@ -883,18 +883,18 @@ define i32 @maxi8_wrong_parent(i32) {
; THRESH-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
; THRESH-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
; THRESH-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i1> undef, i1 [[TMP12]], i32 0
; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i1> poison, i1 [[TMP12]], i32 0
; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> [[TMP13]], i1 [[TMP5]], i32 1
; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP11]], i32 0
; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP3]], i32 1
; THRESH-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> undef, i32 [[TMP8]], i32 0
; THRESH-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
; THRESH-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1
; THRESH-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]]
; THRESH-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
; THRESH-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
; THRESH-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]]
; THRESH-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP20]]
; THRESH-NEXT: ret i32 [[OP_EXTRA]]
; THRESH-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]]
; THRESH-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP21]], i32 [[TMP20]]
; THRESH-NEXT: ret i32 [[OP_EXTRA1]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4

View File

@ -11,7 +11,7 @@ define void @in_tree_user(double* nocapture %A, i32 %n) {
; CHECK-LABEL: @in_tree_user(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[CONV]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[CONV]], i32 1
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:

View File

@ -24,19 +24,19 @@ entry:
define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c, i8* noalias nocapture readonly %d, i8* noalias nocapture %e, i32 %w) local_unnamed_addr #1 {
; SSE-LABEL: @bar(
; SSE-NEXT: entry:
; SSE-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[W:%.*]], i32 0
; SSE-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[W:%.*]], i32 0
; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[W]], i32 1
; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[W]], i32 2
; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[W]], i32 3
; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[W]], i32 0
; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
; SSE-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[W]], i32 1
; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[W]], i32 2
; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[W]], i32 3
; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> undef, i32 [[W]], i32 0
; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
; SSE-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[W]], i32 1
; SSE-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[W]], i32 2
; SSE-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[W]], i32 3
; SSE-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> undef, i32 [[W]], i32 0
; SSE-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[W]], i32 1
; SSE-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[W]], i32 2
; SSE-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[W]], i32 3
@ -196,7 +196,7 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
;
; AVX512-LABEL: @bar(
; AVX512-NEXT: entry:
; AVX512-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> undef, i32 [[W:%.*]], i32 0
; AVX512-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[W:%.*]], i32 0
; AVX512-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[W]], i32 1
; AVX512-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[W]], i32 2
; AVX512-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[W]], i32 3

View File

@ -230,20 +230,20 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
@ -450,7 +450,7 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; Make sure we handle multiple trees that feed one build vector correctly.
define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
; CHECK-LABEL: @multi_tree(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> undef, double [[Z:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[Z:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3

View File

@ -230,20 +230,20 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
@ -450,7 +450,7 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; Make sure we handle multiple trees that feed one build vector correctly.
define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
; CHECK-LABEL: @multi_tree(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> undef, double [[Z:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[Z:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3

View File

@ -11,7 +11,7 @@ define i32 @fn1() {
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 8, i32 3

View File

@ -71,7 +71,7 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3

View File

@ -42,7 +42,7 @@ define dso_local void @j() local_unnamed_addr {
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 15
; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* @a, align 4
; CHECK-NEXT: [[CONV19:%.*]] = sitofp i32 [[TMP15]] to float
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> <float -1.000000e+00, float -1.000000e+00, float undef, float undef>, float [[CONV19]], i32 2
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> <float -1.000000e+00, float -1.000000e+00, float poison, float poison>, float [[CONV19]], i32 2
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 2
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP17]], i32 3
; CHECK-NEXT: [[TMP19:%.*]] = fadd <4 x float> [[TMP10]], [[TMP18]]

View File

@ -14,7 +14,7 @@ define i32 @test(double* nocapture %A, i8* nocapture %B) {
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], <i8 3, i8 3>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i8> undef, i8 [[TMP4]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i8> poison, i8 [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP5]], i8 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double>
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]]

View File

@ -234,7 +234,7 @@ define void @lookahead_external_uses(double* %A, double *%B, double *%C, double
; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1
; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double*> undef, double* [[A]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double*> poison, double* [[A]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double*> [[TMP0]], double* [[A]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, <2 x double*> [[TMP1]], <2 x i64> <i64 0, i64 2>
; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
@ -246,9 +246,9 @@ define void @lookahead_external_uses(double* %A, double *%B, double *%C, double
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double*> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A1]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[B2]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP6]]
@ -414,7 +414,7 @@ define void @lookahead_crash(double* %A, double *%S, %Class *%Arg0) {
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
; CHECK-NEXT: [[C0:%.*]] = call double @_ZN1i2ayEv(%Class* [[ARG0:%.*]])
; CHECK-NEXT: [[C1:%.*]] = call double @_ZN1i2axEv()
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[C1]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
@ -451,10 +451,10 @@ define void @ChecksExtractScores(double* %storeArray, double* %array, <2 x doubl
; CHECK-NEXT: [[LOADA1:%.*]] = load double, double* [[IDX1]], align 4
; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[LOADA0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[LOADA0]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[LOADVEC]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> undef, double [[LOADA1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[LOADA1]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[LOADVEC2]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]]
@ -595,14 +595,14 @@ define void @ChecksExtractScores_different_vectors(double* %storeArray, double*
; CHECK-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
; CHECK-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
; CHECK-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[EXTRB0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRA1]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP4]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> undef, double [[EXTRA0]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[EXTRB1]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP2]]
; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], [[TMP9]]

View File

@ -28,7 +28,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) {
; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>*
; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, <8 x i32>* [[TMP7]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> undef, i32 [[N]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> poison, i32 [[N]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[N]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[N]], i32 2
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[N]], i32 3

View File

@ -15,31 +15,31 @@ target triple = "x86_64-unknown-linux-gnu"
define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) {
; CHECK-LABEL: @PR31243_zext(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> undef, i8 [[V0:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
; CHECK-NEXT: [[TMPE4:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP_4:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMPE4]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP5]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]]
; CHECK-NEXT: ret i8 [[TMP8]]
; CHECK-NEXT: [[TMP_5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP_6:%.*]] = load i8, i8* [[TMP_4]], align 1
; CHECK-NEXT: [[TMP_7:%.*]] = load i8, i8* [[TMP_5]], align 1
; CHECK-NEXT: [[TMP_8:%.*]] = add i8 [[TMP_6]], [[TMP_7]]
; CHECK-NEXT: ret i8 [[TMP_8]]
;
entry:
%tmp0 = zext i8 %v0 to i32
%tmp1 = zext i8 %v1 to i32
%tmp2 = or i32 %tmp0, 1
%tmp3 = or i32 %tmp1, 1
%tmp4 = getelementptr inbounds i8, i8* %ptr, i32 %tmp2
%tmp5 = getelementptr inbounds i8, i8* %ptr, i32 %tmp3
%tmp6 = load i8, i8* %tmp4
%tmp7 = load i8, i8* %tmp5
%tmp8 = add i8 %tmp6, %tmp7
ret i8 %tmp8
%tmp_0 = zext i8 %v0 to i32
%tmp_1 = zext i8 %v1 to i32
%tmp_2 = or i32 %tmp_0, 1
%tmp_3 = or i32 %tmp_1, 1
%tmp_4 = getelementptr inbounds i8, i8* %ptr, i32 %tmp_2
%tmp_5 = getelementptr inbounds i8, i8* %ptr, i32 %tmp_3
%tmp_6 = load i8, i8* %tmp_4
%tmp_7 = load i8, i8* %tmp_5
%tmp_8 = add i8 %tmp_6, %tmp_7
ret i8 %tmp_8
}
; When computing minimum sizes, if we cannot prove the sign bit is zero, we
@ -71,7 +71,7 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) {
;
; AVX-LABEL: @PR31243_sext(
; AVX-NEXT: entry:
; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> undef, i8 [[V0:%.*]], i32 0
; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i32 0
; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i32 1
; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16>

View File

@ -15,7 +15,7 @@ target triple = "x86_64-apple-macosx10.7.0"
define i32 @foo(i32* nocapture %A, i32 %n) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i32 [[N:%.*]], 5
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP1]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 3

View File

@ -10,7 +10,7 @@ define void @shuffle_operands1(double * noalias %from, double * noalias %to, dou
; CHECK-LABEL: @shuffle_operands1(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[V1:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[V1:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[V2:%.*]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
@ -36,7 +36,7 @@ define void @vecload_vs_broadcast(double * noalias %from, double * noalias %to,
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[P]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
@ -72,7 +72,7 @@ define void @vecload_vs_broadcast2(double * noalias %from, double * noalias %to,
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[P]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
@ -108,7 +108,7 @@ define void @vecload_vs_broadcast3(double * noalias %from, double * noalias %to,
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[P]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
@ -145,9 +145,9 @@ define void @shuffle_preserve_broadcast4(double * noalias %from, double * noalia
; CHECK-NEXT: [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
; CHECK-NEXT: [[V0_1:%.*]] = load double, double* [[FROM]], align 4
; CHECK-NEXT: [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0_2]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[V0_1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
@ -221,9 +221,9 @@ define void @shuffle_preserve_broadcast6(double * noalias %from, double * noalia
; CHECK-NEXT: [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
; CHECK-NEXT: [[V0_1:%.*]] = load double, double* [[FROM]], align 4
; CHECK-NEXT: [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0_1]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[V0_2]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[P]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
@ -276,7 +276,7 @@ define void @good_load_order() {
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[ARRAYIDX5]] to <4 x float>*

View File

@ -13,11 +13,11 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 {
; CHECK: if.end:
; CHECK-NEXT: [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef
; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[SHR15]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[SHR15]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[SUB14]], i32 1
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], <i32 0, i32 -1, i32 -5, i32 -9>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[SHR15]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[SHR15]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 undef, i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 undef, i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 undef, i32 3

View File

@ -164,7 +164,7 @@ define float @foo3(float* nocapture readonly %A) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP11]] = extractelement <2 x float> [[SHUFFLE1]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP11]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> poison, float [[TMP11]], i32 0
; CHECK-NEXT: [[TMP13]] = extractelement <2 x float> [[SHUFFLE1]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP13]], i32 1
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP8]], i32 2
@ -247,7 +247,7 @@ define float @sort_phi_type(float* nocapture readonly %A) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3

View File

@ -14,7 +14,7 @@ define void @Rf_GReset() {
; CHECK-LABEL: @Rf_GReset(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[TMP0]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP1]]
; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
; CHECK: if.then:
@ -55,7 +55,7 @@ define void @Rf_GReset_unary_fneg() {
; CHECK-LABEL: @Rf_GReset_unary_fneg(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[TMP0]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fneg <2 x double> [[TMP1]]
; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
; CHECK: if.then:

View File

@ -12,7 +12,7 @@ define void @test(double* %i1, double* %i2, double* %o) {
; CHECK-NEXT: [[I1_0:%.*]] = load double, double* [[I1:%.*]], align 16
; CHECK-NEXT: [[I1_GEP1:%.*]] = getelementptr double, double* [[I1]], i64 1
; CHECK-NEXT: [[I1_1:%.*]] = load double, double* [[I1_GEP1]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[I1_0]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[I1_0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[I1_1]], i32 1
; CHECK-NEXT: br i1 undef, label [[THEN:%.*]], label [[END:%.*]]
; CHECK: then:
@ -20,7 +20,7 @@ define void @test(double* %i1, double* %i2, double* %o) {
; CHECK-NEXT: [[I2_0:%.*]] = load double, double* [[I2_GEP0]], align 16
; CHECK-NEXT: [[I2_GEP1:%.*]] = getelementptr inbounds double, double* [[I2]], i64 1
; CHECK-NEXT: [[I2_1:%.*]] = load double, double* [[I2_GEP1]], align 16
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[I2_0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[I2_0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[I2_1]], i32 1
; CHECK-NEXT: br label [[END]]
; CHECK: end:

View File

@ -38,7 +38,7 @@ define void @_ZN1C10SwitchModeEv() local_unnamed_addr #0 comdat align 2 {
; AVX-NEXT: [[TMP0:%.*]] = bitcast i64* [[FOO_1]] to <2 x i64>*
; AVX-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; AVX-NEXT: [[BAR5:%.*]] = load i64, i64* undef, align 8
; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> undef, i64 [[OR_1]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[OR_1]], i32 0
; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[BAR5]], i32 1
; AVX-NEXT: [[TMP4:%.*]] = and <2 x i64> [[TMP3]], [[TMP1]]
; AVX-NEXT: [[BAR3:%.*]] = getelementptr inbounds [[CLASS_2:%.*]], %class.2* undef, i64 0, i32 0, i32 0, i32 0, i64 0
@ -103,14 +103,14 @@ define void @pr35497() local_unnamed_addr #0 {
; AVX-NEXT: [[ADD:%.*]] = add i64 undef, undef
; AVX-NEXT: store i64 [[ADD]], i64* undef, align 1
; AVX-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 5
; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> undef, i64 [[TMP0]], i32 1
; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> <i64 undef, i64 poison>, i64 [[TMP0]], i32 1
; AVX-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], <i64 2, i64 2>
; AVX-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 20, i64 20>
; AVX-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 4
; AVX-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer
; AVX-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 1
; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> undef, i64 [[TMP5]], i32 0
; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0
; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[ADD]], i32 1
; AVX-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], <i64 2, i64 2>
; AVX-NEXT: [[TMP9:%.*]] = and <2 x i64> [[TMP8]], <i64 20, i64 20>
@ -118,7 +118,7 @@ define void @pr35497() local_unnamed_addr #0 {
; AVX-NEXT: [[TMP10:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>*
; AVX-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1
; AVX-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
; AVX-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> undef, i64 [[TMP11]], i32 0
; AVX-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP11]], i32 0
; AVX-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP5]], i32 1
; AVX-NEXT: [[TMP14:%.*]] = lshr <2 x i64> [[TMP13]], <i64 6, i64 6>
; AVX-NEXT: [[TMP15:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP14]]

View File

@ -9,7 +9,7 @@ define void @store_i32(i32* nocapture %0, i32 %1, i32 %2) {
; CHECK-LABEL: @store_i32(
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4, [[TBAA0:!tbaa !.*]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1:%.*]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], <i32 15, i32 15, i32 15, i32 15>
@ -54,7 +54,7 @@ define void @store_i8(i8* nocapture %0, i32 %1, i32 %2) {
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP0:%.*]] to <4 x i8>*
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1, [[TBAA4:!tbaa !.*]]
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[TMP5]] to <4 x i32>
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1:%.*]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP8]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = lshr <4 x i32> [[TMP9]], <i32 15, i32 15, i32 15, i32 15>
@ -186,7 +186,7 @@ define void @store_i64(i64* nocapture %0, i32 %1, i32 %2) {
; AVX2-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64
; AVX2-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP0:%.*]] to <4 x i64>*
; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i64>, <4 x i64>* [[TMP5]], align 8, [[TBAA5:!tbaa !.*]]
; AVX2-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX2-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i32 0
; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP9:%.*]] = mul <4 x i64> [[TMP6]], [[TMP8]]
; AVX2-NEXT: [[TMP10:%.*]] = lshr <4 x i64> [[TMP9]], <i64 15, i64 15, i64 15, i64 15>

View File

@ -89,7 +89,7 @@ define void @gather_load_2(i32* noalias nocapture %0, i32* noalias nocapture rea
; AVX-NEXT: ret void
;
; AVX2-LABEL: @gather_load_2(
; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1:%.*]], i32 0
; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1:%.*]], i32 0
; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x i32*> [[TMP4]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
; AVX2-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0:!tbaa !.*]]
@ -99,7 +99,7 @@ define void @gather_load_2(i32* noalias nocapture %0, i32* noalias nocapture rea
; AVX2-NEXT: ret void
;
; AVX512-LABEL: @gather_load_2(
; AVX512-NEXT: [[TMP3:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1:%.*]], i32 0
; AVX512-NEXT: [[TMP3:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1:%.*]], i32 0
; AVX512-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x i32*> [[TMP4]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
; AVX512-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0:!tbaa !.*]]
@ -219,7 +219,7 @@ define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture rea
; AVX2-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1
; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
; AVX2-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP6:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP6:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP8:%.*]] = getelementptr i32, <4 x i32*> [[TMP7]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX2-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP8]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0]]
@ -248,7 +248,7 @@ define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture rea
; AVX512-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1
; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
; AVX512-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP6:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP6:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP8:%.*]] = getelementptr i32, <4 x i32*> [[TMP7]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX512-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP8]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0]]
@ -398,7 +398,7 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
;
; AVX2-LABEL: @gather_load_4(
; AVX2-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1
; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> undef, i32* [[T1:%.*]], i32 0
; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> poison, i32* [[T1:%.*]], i32 0
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP3:%.*]] = getelementptr i32, <4 x i32*> [[TMP2]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX2-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
@ -427,7 +427,7 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
;
; AVX512-LABEL: @gather_load_4(
; AVX512-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1
; AVX512-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> undef, i32* [[T1:%.*]], i32 0
; AVX512-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> poison, i32* [[T1:%.*]], i32 0
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP3:%.*]] = getelementptr i32, <4 x i32*> [[TMP2]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX512-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
@ -505,7 +505,7 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x float*> undef, float* [[TMP1]], i32 0
; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x float*> poison, float* [[TMP1]], i32 0
; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x float*> [[TMP6]], float* [[TMP3]], i32 1
; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x float*> [[TMP7]], float* [[TMP4]], i32 2
; SSE-NEXT: [[TMP9:%.*]] = insertelement <4 x float*> [[TMP8]], float* [[TMP5]], i32 3
@ -534,7 +534,7 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> undef, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
@ -559,7 +559,7 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX2-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> undef, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX2-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX2-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
@ -584,7 +584,7 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX512-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> undef, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX512-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX512-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3

View File

@ -89,7 +89,7 @@ define void @gather_load_2(i32* noalias nocapture %0, i32* noalias nocapture rea
; AVX-NEXT: ret void
;
; AVX2-LABEL: @gather_load_2(
; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1:%.*]], i32 0
; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1:%.*]], i32 0
; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x i32*> [[TMP4]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
; AVX2-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0:!tbaa !.*]]
@ -99,7 +99,7 @@ define void @gather_load_2(i32* noalias nocapture %0, i32* noalias nocapture rea
; AVX2-NEXT: ret void
;
; AVX512-LABEL: @gather_load_2(
; AVX512-NEXT: [[TMP3:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1:%.*]], i32 0
; AVX512-NEXT: [[TMP3:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1:%.*]], i32 0
; AVX512-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x i32*> [[TMP4]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
; AVX512-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0:!tbaa !.*]]
@ -219,7 +219,7 @@ define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture rea
; AVX2-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1
; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
; AVX2-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP6:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP6:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP8:%.*]] = getelementptr i32, <4 x i32*> [[TMP7]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX2-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP8]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0]]
@ -248,7 +248,7 @@ define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture rea
; AVX512-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1
; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
; AVX512-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP6:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP6:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP8:%.*]] = getelementptr i32, <4 x i32*> [[TMP7]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX512-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP8]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0]]
@ -398,7 +398,7 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
;
; AVX2-LABEL: @gather_load_4(
; AVX2-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1
; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> undef, i32* [[T1:%.*]], i32 0
; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> poison, i32* [[T1:%.*]], i32 0
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP3:%.*]] = getelementptr i32, <4 x i32*> [[TMP2]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX2-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
@ -427,7 +427,7 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
;
; AVX512-LABEL: @gather_load_4(
; AVX512-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1
; AVX512-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> undef, i32* [[T1:%.*]], i32 0
; AVX512-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> poison, i32* [[T1:%.*]], i32 0
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP3:%.*]] = getelementptr i32, <4 x i32*> [[TMP2]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX512-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
@ -505,7 +505,7 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x float*> undef, float* [[TMP1]], i32 0
; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x float*> poison, float* [[TMP1]], i32 0
; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x float*> [[TMP6]], float* [[TMP3]], i32 1
; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x float*> [[TMP7]], float* [[TMP4]], i32 2
; SSE-NEXT: [[TMP9:%.*]] = insertelement <4 x float*> [[TMP8]], float* [[TMP5]], i32 3
@ -534,7 +534,7 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> undef, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
@ -559,7 +559,7 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX2-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> undef, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX2-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX2-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
@ -584,7 +584,7 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX512-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> undef, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX512-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX512-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3

View File

@ -88,12 +88,12 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[B:%.*]]
; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[FNEG]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[FNEG]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[C:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[C]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[MUL]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[MUL]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
@ -137,12 +137,12 @@ define i1 @fcmp_lt(double %a, double %b, double %c) {
; CHECK-LABEL: @fcmp_lt(
; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[B:%.*]]
; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[FNEG]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[FNEG]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[C]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[MUL]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[MUL]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = fdiv <2 x double> [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], <double 0x3EB0C6F7A0B5ED8D, double 0x3EB0C6F7A0B5ED8D>

View File

@ -11,13 +11,13 @@ define void @hoge() {
; CHECK-NEXT: ret void
; CHECK: bb2:
; CHECK-NEXT: [[T:%.*]] = select i1 undef, i16 undef, i16 15
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> undef, i16 [[T]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> poison, i16 [[T]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[TMP0]], i16 undef, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <2 x i32> <i32 undef, i32 63>, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef
; CHECK-NEXT: [[SHUFFLE5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE5]], <i32 15, i32 31, i32 47, i32 undef>
; CHECK-NEXT: [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE10]], <i32 15, i32 31, i32 47, i32 undef>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef
; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63
@ -26,17 +26,17 @@ define void @hoge() {
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49, i32 -33, i32 -33, i32 -17>
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[TMP10]], undef
; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i32 [[OP_EXTRA]], undef
; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[TMP12]], i32 [[OP_EXTRA]], i32 undef
; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[OP_EXTRA1]], undef
; CHECK-NEXT: [[OP_EXTRA2:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA1]], i32 undef
; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i32 [[OP_EXTRA2]], undef
; CHECK-NEXT: [[OP_EXTRA3:%.*]] = select i1 [[TMP14]], i32 [[OP_EXTRA2]], i32 undef
; CHECK-NEXT: [[TMP15:%.*]] = icmp slt i32 [[OP_EXTRA3]], undef
; CHECK-NEXT: [[OP_EXTRA4:%.*]] = select i1 [[TMP15]], i32 [[OP_EXTRA3]], i32 undef
; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA4]]
; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP10]], undef
; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP10]], i32 undef
; CHECK-NEXT: [[OP_EXTRA2:%.*]] = icmp slt i32 [[OP_EXTRA1]], undef
; CHECK-NEXT: [[OP_EXTRA3:%.*]] = select i1 [[OP_EXTRA2]], i32 [[OP_EXTRA1]], i32 undef
; CHECK-NEXT: [[OP_EXTRA4:%.*]] = icmp slt i32 [[OP_EXTRA3]], undef
; CHECK-NEXT: [[OP_EXTRA5:%.*]] = select i1 [[OP_EXTRA4]], i32 [[OP_EXTRA3]], i32 undef
; CHECK-NEXT: [[OP_EXTRA6:%.*]] = icmp slt i32 [[OP_EXTRA5]], undef
; CHECK-NEXT: [[OP_EXTRA7:%.*]] = select i1 [[OP_EXTRA6]], i32 [[OP_EXTRA5]], i32 undef
; CHECK-NEXT: [[OP_EXTRA8:%.*]] = icmp slt i32 [[OP_EXTRA7]], undef
; CHECK-NEXT: [[OP_EXTRA9:%.*]] = select i1 [[OP_EXTRA8]], i32 [[OP_EXTRA7]], i32 undef
; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA9]]
; CHECK-NEXT: unreachable
;
bb:

View File

@ -19,7 +19,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
; CHECK-NEXT: [[ARRAYIDX_I_I7_5_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 5
; CHECK-NEXT: [[ARRAYIDX_I_I7_6_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 6
; CHECK-NEXT: [[ARRAYIDX_I_I7_7_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 7
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV31_I]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3
@ -32,7 +32,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
; CHECK-NEXT: [[ARRAYIDX_I_I7_9_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 9
; CHECK-NEXT: [[ARRAYIDX_I_I7_10_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 10
; CHECK-NEXT: [[ARRAYIDX_I_I7_11_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 11
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[CONV31_I]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[CONV31_I]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[CONV31_I]], i32 2
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3
@ -43,7 +43,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
; CHECK-NEXT: [[SHR_13_I_I:%.*]] = lshr i32 [[CONV31_I]], 14
; CHECK-NEXT: [[ARRAYIDX_I_I7_14_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14
; CHECK-NEXT: [[SHR_14_I_I:%.*]] = lshr i32 [[CONV31_I]], 15
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> undef, i32 [[SUB_I]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP9]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP16]], i32 1
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP9]], i32 1

View File

@ -11,7 +11,7 @@ define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a,
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i64 [[I]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3

View File

@ -21,10 +21,10 @@ define void @test_supernode_add(double* %Aarray, double* %Barray, double *%Carra
; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1
; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP1]]
; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1
; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
@ -80,10 +80,10 @@ define void @test_supernode_addsub(double* %Aarray, double* %Barray, double *%Ca
; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1
; ENABLED-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP1]]
; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1
; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
@ -220,11 +220,11 @@ define void @supernode_scheduling(double* %Aarray, double* %Barray, double *%Car
; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[C]], i32 0
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B1]], i32 1
; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
; ENABLED-NEXT: [[D:%.*]] = load double, double* [[IDXD]], align 8
; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[D]], i32 1
; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
@ -286,14 +286,14 @@ define void @supernode_scheduling_cross_block(double* %Aarray, double* %Barray,
; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
; ENABLED-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
; ENABLED-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1
; ENABLED-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], <double 2.000000e+00, double 2.000000e+00>
; ENABLED-NEXT: br label [[BB:%.*]]
; ENABLED: bb:
; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[A1]], i32 1
; ENABLED-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
; ENABLED-NEXT: [[TMP6:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*

View File

@ -225,7 +225,7 @@ define void @store_splat(float*, float) {
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 2
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> undef, float [[TMP1:%.*]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP1]], i32 2
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP1]], i32 3

View File

@ -47,7 +47,7 @@ define void @n() local_unnamed_addr #0 {
; CHECK-NEXT: [[TMP31:%.*]] = icmp slt i32 [[SUB]], 0
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i32 0, [[SUB]]
; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[NEG]], i32 [[SUB]]
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> undef, i32 [[TMP30]], i32 0
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i32 0
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 1
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP30]], i32 2
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP35]], i32 [[TMP30]], i32 3
@ -58,198 +58,198 @@ define void @n() local_unnamed_addr #0 {
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP40]])
; CHECK-NEXT: [[TMP42:%.*]] = icmp slt i32 [[TMP41]], [[TMP32]]
; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP41]], i32 [[TMP32]]
; CHECK-NEXT: [[TMP44:%.*]] = icmp slt i32 [[TMP43]], [[B_0]]
; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP44]], i32 [[TMP43]], i32 [[B_0]]
; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP43]], [[B_0]]
; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP43]], i32 [[B_0]]
; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP30]], [[TMP2]]
; CHECK-NEXT: [[TMP45:%.*]] = icmp slt i32 [[SUB_1_1]], 0
; CHECK-NEXT: [[TMP44:%.*]] = icmp slt i32 [[SUB_1_1]], 0
; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]]
; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP46]], [[OP_EXTRA]]
; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[TMP44]]
; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP46]], i32 [[OP_EXTRA]]
; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP45]], [[OP_EXTRA1]]
; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[OP_EXTRA]]
; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP45]], i32 [[OP_EXTRA1]]
; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]]
; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[SUB_2_1]], 0
; CHECK-NEXT: [[TMP46:%.*]] = icmp slt i32 [[SUB_2_1]], 0
; CHECK-NEXT: [[NEG_2_1:%.*]] = sub nsw i32 0, [[SUB_2_1]]
; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[NEG_2_1]], i32 [[SUB_2_1]]
; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP48]], [[SPEC_SELECT8_1_1]]
; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_2_1]], i32 [[SUB_2_1]]
; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_1_1]]
; CHECK-NEXT: [[NARROW34:%.*]] = or i1 [[CMP12_2_1]], [[NARROW]]
; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP48]], i32 [[SPEC_SELECT8_1_1]]
; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP47]], i32 [[SPEC_SELECT8_1_1]]
; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP30]], [[TMP4]]
; CHECK-NEXT: [[TMP49:%.*]] = icmp slt i32 [[SUB_3_1]], 0
; CHECK-NEXT: [[TMP48:%.*]] = icmp slt i32 [[SUB_3_1]], 0
; CHECK-NEXT: [[NEG_3_1:%.*]] = sub nsw i32 0, [[SUB_3_1]]
; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[NEG_3_1]], i32 [[SUB_3_1]]
; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP50]], [[SPEC_SELECT8_2_1]]
; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP48]], i32 [[NEG_3_1]], i32 [[SUB_3_1]]
; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP49]], [[SPEC_SELECT8_2_1]]
; CHECK-NEXT: [[NARROW35:%.*]] = or i1 [[CMP12_3_1]], [[NARROW34]]
; CHECK-NEXT: [[SPEC_SELECT_3_1:%.*]] = zext i1 [[NARROW35]] to i32
; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP50]], i32 [[SPEC_SELECT8_2_1]]
; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP49]], i32 [[SPEC_SELECT8_2_1]]
; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP30]], [[TMP5]]
; CHECK-NEXT: [[TMP51:%.*]] = icmp slt i32 [[SUB_222]], 0
; CHECK-NEXT: [[TMP50:%.*]] = icmp slt i32 [[SUB_222]], 0
; CHECK-NEXT: [[NEG_223:%.*]] = sub nsw i32 0, [[SUB_222]]
; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[NEG_223]], i32 [[SUB_222]]
; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP52]], [[SPEC_SELECT8_3_1]]
; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP52]], i32 [[SPEC_SELECT8_3_1]]
; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[NEG_223]], i32 [[SUB_222]]
; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP51]], [[SPEC_SELECT8_3_1]]
; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP51]], i32 [[SPEC_SELECT8_3_1]]
; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP30]], [[TMP6]]
; CHECK-NEXT: [[TMP53:%.*]] = icmp slt i32 [[SUB_1_2]], 0
; CHECK-NEXT: [[TMP52:%.*]] = icmp slt i32 [[SUB_1_2]], 0
; CHECK-NEXT: [[NEG_1_2:%.*]] = sub nsw i32 0, [[SUB_1_2]]
; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[NEG_1_2]], i32 [[SUB_1_2]]
; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP54]], [[SPEC_SELECT8_226]]
; CHECK-NEXT: [[TMP55:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]]
; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP54]], i32 [[SPEC_SELECT8_226]]
; CHECK-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], i32 [[NEG_1_2]], i32 [[SUB_1_2]]
; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP53]], [[SPEC_SELECT8_226]]
; CHECK-NEXT: [[TMP54:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]]
; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP53]], i32 [[SPEC_SELECT8_226]]
; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP30]], [[TMP7]]
; CHECK-NEXT: [[TMP56:%.*]] = icmp slt i32 [[SUB_2_2]], 0
; CHECK-NEXT: [[TMP55:%.*]] = icmp slt i32 [[SUB_2_2]], 0
; CHECK-NEXT: [[NEG_2_2:%.*]] = sub nsw i32 0, [[SUB_2_2]]
; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_2_2]], i32 [[SUB_2_2]]
; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_1_2]]
; CHECK-NEXT: [[TMP58:%.*]] = or i1 [[CMP12_2_2]], [[TMP55]]
; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP57]], i32 [[SPEC_SELECT8_1_2]]
; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[NEG_2_2]], i32 [[SUB_2_2]]
; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP56]], [[SPEC_SELECT8_1_2]]
; CHECK-NEXT: [[TMP57:%.*]] = or i1 [[CMP12_2_2]], [[TMP54]]
; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP56]], i32 [[SPEC_SELECT8_1_2]]
; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP30]], [[TMP8]]
; CHECK-NEXT: [[TMP59:%.*]] = icmp slt i32 [[SUB_3_2]], 0
; CHECK-NEXT: [[TMP58:%.*]] = icmp slt i32 [[SUB_3_2]], 0
; CHECK-NEXT: [[NEG_3_2:%.*]] = sub nsw i32 0, [[SUB_3_2]]
; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_3_2]], i32 [[SUB_3_2]]
; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_2_2]]
; CHECK-NEXT: [[TMP61:%.*]] = or i1 [[CMP12_3_2]], [[TMP58]]
; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP61]], i32 2, i32 [[SPEC_SELECT_3_1]]
; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP60]], i32 [[SPEC_SELECT8_2_2]]
; CHECK-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], i32 [[NEG_3_2]], i32 [[SUB_3_2]]
; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP59]], [[SPEC_SELECT8_2_2]]
; CHECK-NEXT: [[TMP60:%.*]] = or i1 [[CMP12_3_2]], [[TMP57]]
; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP60]], i32 2, i32 [[SPEC_SELECT_3_1]]
; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP59]], i32 [[SPEC_SELECT8_2_2]]
; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP30]], [[TMP9]]
; CHECK-NEXT: [[TMP62:%.*]] = icmp slt i32 [[SUB_328]], 0
; CHECK-NEXT: [[TMP61:%.*]] = icmp slt i32 [[SUB_328]], 0
; CHECK-NEXT: [[NEG_329:%.*]] = sub nsw i32 0, [[SUB_328]]
; CHECK-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[NEG_329]], i32 [[SUB_328]]
; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP63]], [[SPEC_SELECT8_3_2]]
; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP63]], i32 [[SPEC_SELECT8_3_2]]
; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[NEG_329]], i32 [[SUB_328]]
; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP62]], [[SPEC_SELECT8_3_2]]
; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP62]], i32 [[SPEC_SELECT8_3_2]]
; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP30]], [[TMP10]]
; CHECK-NEXT: [[TMP64:%.*]] = icmp slt i32 [[SUB_1_3]], 0
; CHECK-NEXT: [[TMP63:%.*]] = icmp slt i32 [[SUB_1_3]], 0
; CHECK-NEXT: [[NEG_1_3:%.*]] = sub nsw i32 0, [[SUB_1_3]]
; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], i32 [[NEG_1_3]], i32 [[SUB_1_3]]
; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP65]], [[SPEC_SELECT8_332]]
; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]]
; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP65]], i32 [[SPEC_SELECT8_332]]
; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP63]], i32 [[NEG_1_3]], i32 [[SUB_1_3]]
; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP64]], [[SPEC_SELECT8_332]]
; CHECK-NEXT: [[TMP65:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]]
; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP64]], i32 [[SPEC_SELECT8_332]]
; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP30]], [[TMP11]]
; CHECK-NEXT: [[TMP67:%.*]] = icmp slt i32 [[SUB_2_3]], 0
; CHECK-NEXT: [[TMP66:%.*]] = icmp slt i32 [[SUB_2_3]], 0
; CHECK-NEXT: [[NEG_2_3:%.*]] = sub nsw i32 0, [[SUB_2_3]]
; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_2_3]], i32 [[SUB_2_3]]
; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_1_3]]
; CHECK-NEXT: [[TMP69:%.*]] = or i1 [[CMP12_2_3]], [[TMP66]]
; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP68]], i32 [[SPEC_SELECT8_1_3]]
; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[NEG_2_3]], i32 [[SUB_2_3]]
; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP67]], [[SPEC_SELECT8_1_3]]
; CHECK-NEXT: [[TMP68:%.*]] = or i1 [[CMP12_2_3]], [[TMP65]]
; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP67]], i32 [[SPEC_SELECT8_1_3]]
; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP30]], [[TMP12]]
; CHECK-NEXT: [[TMP70:%.*]] = icmp slt i32 [[SUB_3_3]], 0
; CHECK-NEXT: [[TMP69:%.*]] = icmp slt i32 [[SUB_3_3]], 0
; CHECK-NEXT: [[NEG_3_3:%.*]] = sub nsw i32 0, [[SUB_3_3]]
; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[NEG_3_3]], i32 [[SUB_3_3]]
; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP71]], [[SPEC_SELECT8_2_3]]
; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[CMP12_3_3]], [[TMP69]]
; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP72]], i32 3, i32 [[SPEC_SELECT_3_2]]
; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP71]], i32 [[SPEC_SELECT8_2_3]]
; CHECK-NEXT: [[TMP70:%.*]] = select i1 [[TMP69]], i32 [[NEG_3_3]], i32 [[SUB_3_3]]
; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP70]], [[SPEC_SELECT8_2_3]]
; CHECK-NEXT: [[TMP71:%.*]] = or i1 [[CMP12_3_3]], [[TMP68]]
; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP71]], i32 3, i32 [[SPEC_SELECT_3_2]]
; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP70]], i32 [[SPEC_SELECT8_2_3]]
; CHECK-NEXT: [[SUB_4:%.*]] = sub i32 [[TMP30]], [[TMP13]]
; CHECK-NEXT: [[TMP73:%.*]] = icmp slt i32 [[SUB_4]], 0
; CHECK-NEXT: [[TMP72:%.*]] = icmp slt i32 [[SUB_4]], 0
; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[SUB_4]]
; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], i32 [[NEG_4]], i32 [[SUB_4]]
; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP74]], [[SPEC_SELECT8_3_3]]
; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP74]], i32 [[SPEC_SELECT8_3_3]]
; CHECK-NEXT: [[TMP73:%.*]] = select i1 [[TMP72]], i32 [[NEG_4]], i32 [[SUB_4]]
; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP73]], [[SPEC_SELECT8_3_3]]
; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP73]], i32 [[SPEC_SELECT8_3_3]]
; CHECK-NEXT: [[SUB_1_4:%.*]] = sub i32 [[TMP30]], [[TMP14]]
; CHECK-NEXT: [[TMP75:%.*]] = icmp slt i32 [[SUB_1_4]], 0
; CHECK-NEXT: [[TMP74:%.*]] = icmp slt i32 [[SUB_1_4]], 0
; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[SUB_1_4]]
; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP75]], i32 [[NEG_1_4]], i32 [[SUB_1_4]]
; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP76]], [[SPEC_SELECT8_4]]
; CHECK-NEXT: [[TMP77:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]]
; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP76]], i32 [[SPEC_SELECT8_4]]
; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[NEG_1_4]], i32 [[SUB_1_4]]
; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP75]], [[SPEC_SELECT8_4]]
; CHECK-NEXT: [[TMP76:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]]
; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP75]], i32 [[SPEC_SELECT8_4]]
; CHECK-NEXT: [[SUB_2_4:%.*]] = sub i32 [[TMP30]], [[TMP15]]
; CHECK-NEXT: [[TMP78:%.*]] = icmp slt i32 [[SUB_2_4]], 0
; CHECK-NEXT: [[TMP77:%.*]] = icmp slt i32 [[SUB_2_4]], 0
; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[SUB_2_4]]
; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_2_4]], i32 [[SUB_2_4]]
; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_1_4]]
; CHECK-NEXT: [[TMP80:%.*]] = or i1 [[CMP12_2_4]], [[TMP77]]
; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP79]], i32 [[SPEC_SELECT8_1_4]]
; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[NEG_2_4]], i32 [[SUB_2_4]]
; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP78]], [[SPEC_SELECT8_1_4]]
; CHECK-NEXT: [[TMP79:%.*]] = or i1 [[CMP12_2_4]], [[TMP76]]
; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP78]], i32 [[SPEC_SELECT8_1_4]]
; CHECK-NEXT: [[SUB_3_4:%.*]] = sub i32 [[TMP30]], [[TMP16]]
; CHECK-NEXT: [[TMP81:%.*]] = icmp slt i32 [[SUB_3_4]], 0
; CHECK-NEXT: [[TMP80:%.*]] = icmp slt i32 [[SUB_3_4]], 0
; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[SUB_3_4]]
; CHECK-NEXT: [[TMP82:%.*]] = select i1 [[TMP81]], i32 [[NEG_3_4]], i32 [[SUB_3_4]]
; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP82]], [[SPEC_SELECT8_2_4]]
; CHECK-NEXT: [[TMP83:%.*]] = or i1 [[CMP12_3_4]], [[TMP80]]
; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP83]], i32 4, i32 [[SPEC_SELECT_3_3]]
; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP82]], i32 [[SPEC_SELECT8_2_4]]
; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[NEG_3_4]], i32 [[SUB_3_4]]
; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP81]], [[SPEC_SELECT8_2_4]]
; CHECK-NEXT: [[TMP82:%.*]] = or i1 [[CMP12_3_4]], [[TMP79]]
; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP82]], i32 4, i32 [[SPEC_SELECT_3_3]]
; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP81]], i32 [[SPEC_SELECT8_2_4]]
; CHECK-NEXT: [[SUB_5:%.*]] = sub i32 [[TMP30]], [[TMP17]]
; CHECK-NEXT: [[TMP84:%.*]] = icmp slt i32 [[SUB_5]], 0
; CHECK-NEXT: [[TMP83:%.*]] = icmp slt i32 [[SUB_5]], 0
; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[SUB_5]]
; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP84]], i32 [[NEG_5]], i32 [[SUB_5]]
; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP85]], [[SPEC_SELECT8_3_4]]
; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP85]], i32 [[SPEC_SELECT8_3_4]]
; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[NEG_5]], i32 [[SUB_5]]
; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP84]], [[SPEC_SELECT8_3_4]]
; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP84]], i32 [[SPEC_SELECT8_3_4]]
; CHECK-NEXT: [[SUB_1_5:%.*]] = sub i32 [[TMP30]], [[TMP18]]
; CHECK-NEXT: [[TMP86:%.*]] = icmp slt i32 [[SUB_1_5]], 0
; CHECK-NEXT: [[TMP85:%.*]] = icmp slt i32 [[SUB_1_5]], 0
; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[SUB_1_5]]
; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_1_5]], i32 [[SUB_1_5]]
; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_5]]
; CHECK-NEXT: [[TMP88:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]]
; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP87]], i32 [[SPEC_SELECT8_5]]
; CHECK-NEXT: [[TMP86:%.*]] = select i1 [[TMP85]], i32 [[NEG_1_5]], i32 [[SUB_1_5]]
; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP86]], [[SPEC_SELECT8_5]]
; CHECK-NEXT: [[TMP87:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]]
; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP86]], i32 [[SPEC_SELECT8_5]]
; CHECK-NEXT: [[SUB_2_5:%.*]] = sub i32 [[TMP30]], [[TMP19]]
; CHECK-NEXT: [[TMP89:%.*]] = icmp slt i32 [[SUB_2_5]], 0
; CHECK-NEXT: [[TMP88:%.*]] = icmp slt i32 [[SUB_2_5]], 0
; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[SUB_2_5]]
; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_2_5]], i32 [[SUB_2_5]]
; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_1_5]]
; CHECK-NEXT: [[TMP91:%.*]] = or i1 [[CMP12_2_5]], [[TMP88]]
; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP90]], i32 [[SPEC_SELECT8_1_5]]
; CHECK-NEXT: [[TMP89:%.*]] = select i1 [[TMP88]], i32 [[NEG_2_5]], i32 [[SUB_2_5]]
; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP89]], [[SPEC_SELECT8_1_5]]
; CHECK-NEXT: [[TMP90:%.*]] = or i1 [[CMP12_2_5]], [[TMP87]]
; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP89]], i32 [[SPEC_SELECT8_1_5]]
; CHECK-NEXT: [[SUB_3_5:%.*]] = sub i32 [[TMP30]], [[TMP20]]
; CHECK-NEXT: [[TMP92:%.*]] = icmp slt i32 [[SUB_3_5]], 0
; CHECK-NEXT: [[TMP91:%.*]] = icmp slt i32 [[SUB_3_5]], 0
; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[SUB_3_5]]
; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[NEG_3_5]], i32 [[SUB_3_5]]
; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP93]], [[SPEC_SELECT8_2_5]]
; CHECK-NEXT: [[TMP94:%.*]] = or i1 [[CMP12_3_5]], [[TMP91]]
; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP94]], i32 5, i32 [[SPEC_SELECT_3_4]]
; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP93]], i32 [[SPEC_SELECT8_2_5]]
; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], i32 [[NEG_3_5]], i32 [[SUB_3_5]]
; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP92]], [[SPEC_SELECT8_2_5]]
; CHECK-NEXT: [[TMP93:%.*]] = or i1 [[CMP12_3_5]], [[TMP90]]
; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP93]], i32 5, i32 [[SPEC_SELECT_3_4]]
; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP92]], i32 [[SPEC_SELECT8_2_5]]
; CHECK-NEXT: [[SUB_6:%.*]] = sub i32 [[TMP30]], [[TMP21]]
; CHECK-NEXT: [[TMP95:%.*]] = icmp slt i32 [[SUB_6]], 0
; CHECK-NEXT: [[TMP94:%.*]] = icmp slt i32 [[SUB_6]], 0
; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[SUB_6]]
; CHECK-NEXT: [[TMP96:%.*]] = select i1 [[TMP95]], i32 [[NEG_6]], i32 [[SUB_6]]
; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP96]], [[SPEC_SELECT8_3_5]]
; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP96]], i32 [[SPEC_SELECT8_3_5]]
; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[NEG_6]], i32 [[SUB_6]]
; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP95]], [[SPEC_SELECT8_3_5]]
; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP95]], i32 [[SPEC_SELECT8_3_5]]
; CHECK-NEXT: [[SUB_1_6:%.*]] = sub i32 [[TMP30]], [[TMP22]]
; CHECK-NEXT: [[TMP97:%.*]] = icmp slt i32 [[SUB_1_6]], 0
; CHECK-NEXT: [[TMP96:%.*]] = icmp slt i32 [[SUB_1_6]], 0
; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[SUB_1_6]]
; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_1_6]], i32 [[SUB_1_6]]
; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_6]]
; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]]
; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_6]]
; CHECK-NEXT: [[TMP97:%.*]] = select i1 [[TMP96]], i32 [[NEG_1_6]], i32 [[SUB_1_6]]
; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP97]], [[SPEC_SELECT8_6]]
; CHECK-NEXT: [[TMP98:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]]
; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP97]], i32 [[SPEC_SELECT8_6]]
; CHECK-NEXT: [[SUB_2_6:%.*]] = sub i32 [[TMP30]], [[TMP23]]
; CHECK-NEXT: [[TMP100:%.*]] = icmp slt i32 [[SUB_2_6]], 0
; CHECK-NEXT: [[TMP99:%.*]] = icmp slt i32 [[SUB_2_6]], 0
; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[SUB_2_6]]
; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[NEG_2_6]], i32 [[SUB_2_6]]
; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP101]], [[SPEC_SELECT8_1_6]]
; CHECK-NEXT: [[TMP102:%.*]] = or i1 [[CMP12_2_6]], [[TMP99]]
; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP101]], i32 [[SPEC_SELECT8_1_6]]
; CHECK-NEXT: [[TMP100:%.*]] = select i1 [[TMP99]], i32 [[NEG_2_6]], i32 [[SUB_2_6]]
; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP100]], [[SPEC_SELECT8_1_6]]
; CHECK-NEXT: [[TMP101:%.*]] = or i1 [[CMP12_2_6]], [[TMP98]]
; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP100]], i32 [[SPEC_SELECT8_1_6]]
; CHECK-NEXT: [[SUB_3_6:%.*]] = sub i32 [[TMP30]], [[TMP24]]
; CHECK-NEXT: [[TMP103:%.*]] = icmp slt i32 [[SUB_3_6]], 0
; CHECK-NEXT: [[TMP102:%.*]] = icmp slt i32 [[SUB_3_6]], 0
; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[SUB_3_6]]
; CHECK-NEXT: [[TMP104:%.*]] = select i1 [[TMP103]], i32 [[NEG_3_6]], i32 [[SUB_3_6]]
; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP104]], [[SPEC_SELECT8_2_6]]
; CHECK-NEXT: [[TMP105:%.*]] = or i1 [[CMP12_3_6]], [[TMP102]]
; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP105]], i32 6, i32 [[SPEC_SELECT_3_5]]
; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP104]], i32 [[SPEC_SELECT8_2_6]]
; CHECK-NEXT: [[TMP103:%.*]] = select i1 [[TMP102]], i32 [[NEG_3_6]], i32 [[SUB_3_6]]
; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP103]], [[SPEC_SELECT8_2_6]]
; CHECK-NEXT: [[TMP104:%.*]] = or i1 [[CMP12_3_6]], [[TMP101]]
; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP104]], i32 6, i32 [[SPEC_SELECT_3_5]]
; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP103]], i32 [[SPEC_SELECT8_2_6]]
; CHECK-NEXT: [[SUB_7:%.*]] = sub i32 [[TMP30]], [[TMP25]]
; CHECK-NEXT: [[TMP106:%.*]] = icmp slt i32 [[SUB_7]], 0
; CHECK-NEXT: [[TMP105:%.*]] = icmp slt i32 [[SUB_7]], 0
; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[SUB_7]]
; CHECK-NEXT: [[TMP107:%.*]] = select i1 [[TMP106]], i32 [[NEG_7]], i32 [[SUB_7]]
; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP107]], [[SPEC_SELECT8_3_6]]
; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP107]], i32 [[SPEC_SELECT8_3_6]]
; CHECK-NEXT: [[TMP106:%.*]] = select i1 [[TMP105]], i32 [[NEG_7]], i32 [[SUB_7]]
; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP106]], [[SPEC_SELECT8_3_6]]
; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP106]], i32 [[SPEC_SELECT8_3_6]]
; CHECK-NEXT: [[SUB_1_7:%.*]] = sub i32 [[TMP30]], [[TMP26]]
; CHECK-NEXT: [[TMP108:%.*]] = icmp slt i32 [[SUB_1_7]], 0
; CHECK-NEXT: [[TMP107:%.*]] = icmp slt i32 [[SUB_1_7]], 0
; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[SUB_1_7]]
; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_1_7]], i32 [[SUB_1_7]]
; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_7]]
; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]]
; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_7]]
; CHECK-NEXT: [[TMP108:%.*]] = select i1 [[TMP107]], i32 [[NEG_1_7]], i32 [[SUB_1_7]]
; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP108]], [[SPEC_SELECT8_7]]
; CHECK-NEXT: [[TMP109:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]]
; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP108]], i32 [[SPEC_SELECT8_7]]
; CHECK-NEXT: [[SUB_2_7:%.*]] = sub i32 [[TMP30]], [[TMP27]]
; CHECK-NEXT: [[TMP111:%.*]] = icmp slt i32 [[SUB_2_7]], 0
; CHECK-NEXT: [[TMP110:%.*]] = icmp slt i32 [[SUB_2_7]], 0
; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[SUB_2_7]]
; CHECK-NEXT: [[TMP112:%.*]] = select i1 [[TMP111]], i32 [[NEG_2_7]], i32 [[SUB_2_7]]
; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP112]], [[SPEC_SELECT8_1_7]]
; CHECK-NEXT: [[TMP113:%.*]] = or i1 [[CMP12_2_7]], [[TMP110]]
; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP112]], i32 [[SPEC_SELECT8_1_7]]
; CHECK-NEXT: [[TMP111:%.*]] = select i1 [[TMP110]], i32 [[NEG_2_7]], i32 [[SUB_2_7]]
; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP111]], [[SPEC_SELECT8_1_7]]
; CHECK-NEXT: [[TMP112:%.*]] = or i1 [[CMP12_2_7]], [[TMP109]]
; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP111]], i32 [[SPEC_SELECT8_1_7]]
; CHECK-NEXT: [[SUB_3_7:%.*]] = sub i32 [[TMP30]], [[TMP28]]
; CHECK-NEXT: [[TMP114:%.*]] = icmp slt i32 [[SUB_3_7]], 0
; CHECK-NEXT: [[TMP113:%.*]] = icmp slt i32 [[SUB_3_7]], 0
; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[SUB_3_7]]
; CHECK-NEXT: [[TMP115:%.*]] = select i1 [[TMP114]], i32 [[NEG_3_7]], i32 [[SUB_3_7]]
; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP115]], [[SPEC_SELECT8_2_7]]
; CHECK-NEXT: [[TMP116:%.*]] = or i1 [[CMP12_3_7]], [[TMP113]]
; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP116]], i32 7, i32 [[SPEC_SELECT_3_6]]
; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP115]], i32 [[SPEC_SELECT8_2_7]]
; CHECK-NEXT: [[TMP114:%.*]] = select i1 [[TMP113]], i32 [[NEG_3_7]], i32 [[SUB_3_7]]
; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP114]], [[SPEC_SELECT8_2_7]]
; CHECK-NEXT: [[TMP115:%.*]] = or i1 [[CMP12_3_7]], [[TMP112]]
; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP115]], i32 7, i32 [[SPEC_SELECT_3_6]]
; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP114]], i32 [[SPEC_SELECT8_2_7]]
; CHECK-NEXT: [[K:%.*]] = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[SPEC_SELECT_3_7]], i32* [[K]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1

View File

@ -11,7 +11,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: bb279:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> undef, float undef, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> poison, float undef, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float undef, i32 1
; CHECK-NEXT: br label [[BB283:%.*]]
; CHECK: bb283:

View File

@ -11,7 +11,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: bb279:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> undef, float undef, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> poison, float undef, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float undef, i32 1
; CHECK-NEXT: br label [[BB283:%.*]]
; CHECK: bb283:

Some files were not shown because too many files have changed in this diff Show More