forked from OSchip/llvm-project
[LoopVectorize] Extract the last lane from a uniform store
Changes VPReplicateRecipe to extract the last lane from an unconditional, uniform store instruction. collectLoopUniforms will also add stores to the list of uniform instructions where Legal->isUniformMemOp is true. setCostBasedWideningDecision now sets the widening decision for all uniform memory ops to Scalarize, where previously GatherScatter may have been chosen for scalable stores. This fixes an assert ("Cannot yet scalarize uniform stores") in setCostBasedWideningDecision when we have a loop containing a uniform i1 store and a scalable VF, which we cannot create a scatter for. Reviewed By: sdesmalen, david-arm, fhahn Differential Revision: https://reviews.llvm.org/D112725
This commit is contained in:
parent
092cee5f7f
commit
0d748b4d32
|
@ -1770,6 +1770,7 @@ private:
|
||||||
DenseMap<ElementCount, ScalarCostsTy> InstsToScalarize;
|
DenseMap<ElementCount, ScalarCostsTy> InstsToScalarize;
|
||||||
|
|
||||||
/// Holds the instructions known to be uniform after vectorization.
|
/// Holds the instructions known to be uniform after vectorization.
|
||||||
|
/// Entries in Uniforms may demand either the first or last lane.
|
||||||
/// The data is collected per VF.
|
/// The data is collected per VF.
|
||||||
DenseMap<ElementCount, SmallPtrSet<Instruction *, 4>> Uniforms;
|
DenseMap<ElementCount, SmallPtrSet<Instruction *, 4>> Uniforms;
|
||||||
|
|
||||||
|
@ -5409,9 +5410,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
|
||||||
assert(WideningDecision != CM_Unknown &&
|
assert(WideningDecision != CM_Unknown &&
|
||||||
"Widening decision should be ready at this moment");
|
"Widening decision should be ready at this moment");
|
||||||
|
|
||||||
// A uniform memory op is itself uniform. We exclude uniform stores
|
// A uniform memory op is itself uniform.
|
||||||
// here as they demand the last lane, not the first one.
|
if (Legal->isUniformMemOp(*I)) {
|
||||||
if (isa<LoadInst>(I) && Legal->isUniformMemOp(*I)) {
|
|
||||||
assert(WideningDecision == CM_Scalarize);
|
assert(WideningDecision == CM_Scalarize);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -5436,7 +5436,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
|
||||||
SetVector<Value *> HasUniformUse;
|
SetVector<Value *> HasUniformUse;
|
||||||
|
|
||||||
// Scan the loop for instructions which are either a) known to have only
|
// Scan the loop for instructions which are either a) known to have only
|
||||||
// lane 0 demanded or b) are uses which demand only lane 0 of their operand.
|
// lane 0 or the last lane demanded or b) are uses which demand only
|
||||||
|
// lane 0 of their operand.
|
||||||
for (auto *BB : TheLoop->blocks())
|
for (auto *BB : TheLoop->blocks())
|
||||||
for (auto &I : *BB) {
|
for (auto &I : *BB) {
|
||||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
|
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
|
||||||
|
@ -5468,10 +5469,15 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
|
||||||
if (!Ptr)
|
if (!Ptr)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// A uniform memory op is itself uniform. We exclude uniform stores
|
// A uniform memory op is itself uniform. Load instructions are added
|
||||||
// here as they demand the last lane, not the first one.
|
// to the worklist as they demand the first lane. Since store instructions
|
||||||
if (isa<LoadInst>(I) && Legal->isUniformMemOp(I))
|
// demand the last lane, we instead add these to Uniforms only.
|
||||||
|
if (Legal->isUniformMemOp(I)) {
|
||||||
|
if (isa<LoadInst>(I))
|
||||||
addToWorklistIfAllowed(&I);
|
addToWorklistIfAllowed(&I);
|
||||||
|
else if (!isOutOfScope(&I) && !isScalarWithPredication(&I))
|
||||||
|
Uniforms[VF].insert(&I);
|
||||||
|
}
|
||||||
|
|
||||||
if (isUniformDecision(&I, VF)) {
|
if (isUniformDecision(&I, VF)) {
|
||||||
assert(isVectorizedMemAccessUse(&I, Ptr) && "consistency check");
|
assert(isVectorizedMemAccessUse(&I, Ptr) && "consistency check");
|
||||||
|
@ -7490,17 +7496,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
|
||||||
// relying on instcombine to remove them.
|
// relying on instcombine to remove them.
|
||||||
// Load: Scalar load + broadcast
|
// Load: Scalar load + broadcast
|
||||||
// Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract
|
// Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract
|
||||||
InstructionCost Cost;
|
InstructionCost Cost = getUniformMemOpCost(&I, VF);
|
||||||
if (isa<StoreInst>(&I) && VF.isScalable() &&
|
|
||||||
isLegalGatherOrScatter(&I)) {
|
|
||||||
Cost = getGatherScatterCost(&I, VF);
|
|
||||||
setWideningDecision(&I, VF, CM_GatherScatter, Cost);
|
|
||||||
} else {
|
|
||||||
assert((isa<LoadInst>(&I) || !VF.isScalable()) &&
|
|
||||||
"Cannot yet scalarize uniform stores");
|
|
||||||
Cost = getUniformMemOpCost(&I, VF);
|
|
||||||
setWideningDecision(&I, VF, CM_Scalarize, Cost);
|
setWideningDecision(&I, VF, CM_Scalarize, Cost);
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9858,6 +9855,16 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the instruction is a store to a uniform address, we only need to
|
||||||
|
// generate the last lane for the last UF part.
|
||||||
|
Instruction *I = getUnderlyingInstr();
|
||||||
|
if (State.VF.isVector() && IsUniform && isa<StoreInst>(I)) {
|
||||||
|
VPLane Lane = VPLane::getLastLaneForVF(State.VF);
|
||||||
|
State.ILV->scalarizeInstruction(
|
||||||
|
I, this, *this, VPIteration(State.UF - 1, Lane), IsPredicated, State);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Generate scalar instances for all VF lanes of all UF parts, unless the
|
// Generate scalar instances for all VF lanes of all UF parts, unless the
|
||||||
// instruction is uniform inwhich case generate only the first lane for each
|
// instruction is uniform inwhich case generate only the first lane for each
|
||||||
// of the UF parts.
|
// of the UF parts.
|
||||||
|
@ -9866,9 +9873,8 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
|
||||||
"Can't scalarize a scalable vector");
|
"Can't scalarize a scalable vector");
|
||||||
for (unsigned Part = 0; Part < State.UF; ++Part)
|
for (unsigned Part = 0; Part < State.UF; ++Part)
|
||||||
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
|
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
|
||||||
State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this,
|
State.ILV->scalarizeInstruction(I, this, *this, VPIteration(Part, Lane),
|
||||||
VPIteration(Part, Lane), IsPredicated,
|
IsPredicated, State);
|
||||||
State);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
|
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
|
||||||
|
|
|
@ -4,12 +4,35 @@ target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
define void @inv_store_i16(i16* noalias %dst, i16* noalias readonly %src, i64 %N) #0 {
|
define void @inv_store_i16(i16* noalias %dst, i16* noalias readonly %src, i64 %N) #0 {
|
||||||
; CHECK-LABEL: @inv_store_i16(
|
; CHECK-LABEL: @inv_store_i16(
|
||||||
|
; CHECK-NEXT: entry:
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||||
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
|
||||||
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||||
; CHECK: vector.ph:
|
; CHECK: vector.ph:
|
||||||
; CHECK: %[[TMP1:.*]] = insertelement <vscale x 4 x i16*> poison, i16* %dst, i32 0
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||||
; CHECK-NEXT: %[[SPLAT_PTRS:.*]] = shufflevector <vscale x 4 x i16*> %[[TMP1]], <vscale x 4 x i16*> poison, <vscale x 4 x i32> zeroinitializer
|
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||||
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
|
||||||
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
|
||||||
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||||
; CHECK: vector.body:
|
; CHECK: vector.body:
|
||||||
; CHECK: %[[VECLOAD:.*]] = load <vscale x 4 x i16>, <vscale x 4 x i16>* %{{.*}}, align 2
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||||
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %[[VECLOAD]], <vscale x 4 x i16*> %[[SPLAT_PTRS]], i32 2
|
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[SRC:%.*]], i64 [[TMP4]]
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <vscale x 4 x i16>*
|
||||||
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP7]], align 2
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32()
|
||||||
|
; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 4
|
||||||
|
; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1
|
||||||
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 4 x i16> [[WIDE_LOAD]], i32 [[TMP10]]
|
||||||
|
; CHECK-NEXT: store i16 [[TMP11]], i16* [[DST:%.*]], align 2
|
||||||
|
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
|
||||||
|
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
|
||||||
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
|
||||||
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||||
|
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||||
|
;
|
||||||
entry:
|
entry:
|
||||||
br label %for.body14
|
br label %for.body14
|
||||||
|
|
||||||
|
@ -59,6 +82,98 @@ for.end: ; preds = %for.inc, %entry
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) #0 {
|
||||||
|
; CHECK-LABEL: @uniform_store_i1(
|
||||||
|
; CHECK-NEXT: entry:
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
|
||||||
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
|
||||||
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||||
|
; CHECK: vector.ph:
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
|
||||||
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
|
||||||
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
|
||||||
|
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i64, i64* [[START:%.*]], i64 [[N_VEC]]
|
||||||
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64*> poison, i64* [[START]], i32 0
|
||||||
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64*> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer
|
||||||
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||||
|
; CHECK: vector.body:
|
||||||
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
|
||||||
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX]], i32 0
|
||||||
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP5]]
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP6]]
|
||||||
|
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i64, i64* [[START]], <vscale x 2 x i64> [[TMP7]]
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
|
||||||
|
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i64, i64* [[START]], i64 [[TMP8]]
|
||||||
|
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
|
||||||
|
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i64, i64* [[START]], i64 [[TMP9]]
|
||||||
|
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
|
||||||
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[NEXT_GEP2]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to <vscale x 2 x i64>*
|
||||||
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP12]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, <vscale x 2 x i64*> [[NEXT_GEP]], i64 1
|
||||||
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 2 x i64*> [[TMP13]], [[BROADCAST_SPLAT]]
|
||||||
|
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
|
||||||
|
; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 2
|
||||||
|
; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1
|
||||||
|
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 [[TMP17]]
|
||||||
|
; CHECK-NEXT: store i1 [[TMP18]], i1* [[DST:%.*]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
|
||||||
|
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2
|
||||||
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]]
|
||||||
|
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||||
|
; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%first.sroa = phi i64* [ %incdec.ptr, %for.body ], [ %start, %entry ]
|
||||||
|
%iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
|
||||||
|
%iv.next = add i64 %iv, 1
|
||||||
|
%0 = load i64, i64* %first.sroa
|
||||||
|
%incdec.ptr = getelementptr inbounds i64, i64* %first.sroa, i64 1
|
||||||
|
%cmp.not = icmp eq i64* %incdec.ptr, %start
|
||||||
|
store i1 %cmp.not, i1* %dst
|
||||||
|
%cmp = icmp ult i64 %iv, %N
|
||||||
|
br i1 %cmp, label %for.body, label %end, !llvm.loop !6
|
||||||
|
|
||||||
|
end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Ensure conditional i1 stores do not vectorize
|
||||||
|
define void @cond_store_i1(i1* noalias %dst, i8* noalias %start, i32 %cond, i64 %N) #0 {
|
||||||
|
; CHECK-LABEL: @cond_store_i1(
|
||||||
|
; CHECK-NOT: vector.body
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%first.sroa = phi i8* [ %incdec.ptr, %if.end ], [ null, %entry ]
|
||||||
|
%incdec.ptr = getelementptr inbounds i8, i8* %first.sroa, i64 1
|
||||||
|
%0 = load i8, i8* %incdec.ptr
|
||||||
|
%tobool.not = icmp eq i8 %0, 10
|
||||||
|
br i1 %tobool.not, label %if.end, label %if.then
|
||||||
|
|
||||||
|
if.then:
|
||||||
|
%cmp.store = icmp eq i8* %start, %incdec.ptr
|
||||||
|
store i1 %cmp.store, i1* %dst
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end:
|
||||||
|
%cmp.not = icmp eq i8* %incdec.ptr, %start
|
||||||
|
br i1 %cmp.not, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
attributes #0 = { "target-features"="+neon,+sve" vscale_range(0, 16) }
|
attributes #0 = { "target-features"="+neon,+sve" vscale_range(0, 16) }
|
||||||
|
|
||||||
!0 = distinct !{!0, !1, !2, !3, !4, !5}
|
!0 = distinct !{!0, !1, !2, !3, !4, !5}
|
||||||
|
@ -68,3 +183,6 @@ attributes #0 = { "target-features"="+neon,+sve" vscale_range(0, 16) }
|
||||||
!4 = !{!"llvm.loop.vectorize.enable", i1 true}
|
!4 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||||
!5 = !{!"llvm.loop.interleave.count", i32 1}
|
!5 = !{!"llvm.loop.interleave.count", i32 1}
|
||||||
|
|
||||||
|
!6 = distinct !{!6, !1, !7, !3, !4, !5}
|
||||||
|
!7 = !{!"llvm.loop.vectorize.width", i32 2}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
; RUN: opt -S -loop-vectorize -mtriple=s390x-linux-gnu -tiny-trip-count-interleave-threshold=4 -vectorizer-min-trip-count=8 < %s | FileCheck %s
|
; RUN: opt -S -loop-vectorize -mtriple=s390x-linux-gnu -tiny-trip-count-interleave-threshold=4 -vectorizer-min-trip-count=8 -force-vector-width=4 < %s | FileCheck %s
|
||||||
; RUN: opt -S -passes=loop-vectorize -mtriple=s390x-linux-gnu -tiny-trip-count-interleave-threshold=4 -vectorizer-min-trip-count=8 < %s | FileCheck %s
|
; RUN: opt -S -passes=loop-vectorize -mtriple=s390x-linux-gnu -tiny-trip-count-interleave-threshold=4 -vectorizer-min-trip-count=8 -force-vector-width=4 < %s | FileCheck %s
|
||||||
|
|
||||||
define i32 @main(i32 %arg, i8** nocapture readnone %arg1) #0 {
|
define i32 @main(i32 %arg, i8** nocapture readnone %arg1) #0 {
|
||||||
;CHECK: vector.body:
|
;CHECK: vector.body:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
|
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
|
||||||
|
|
||||||
; This is a bugpoint reduction of a test from PR43582:
|
; This is a bugpoint reduction of a test from PR43582:
|
||||||
; https://bugs.llvm.org/show_bug.cgi?id=43582
|
; https://bugs.llvm.org/show_bug.cgi?id=43582
|
||||||
|
@ -62,25 +62,11 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 {
|
||||||
; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer
|
; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer
|
||||||
; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i32> [[TMP22]], zeroinitializer
|
; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i32> [[TMP22]], zeroinitializer
|
||||||
; CHECK-NEXT: [[TMP25:%.*]] = or <4 x i32> [[TMP23]], zeroinitializer
|
; CHECK-NEXT: [[TMP25:%.*]] = or <4 x i32> [[TMP23]], zeroinitializer
|
||||||
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0
|
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP25]], i32 3
|
||||||
; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, !tbaa [[TBAA4:![0-9]+]]
|
; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, !tbaa [[TBAA4:![0-9]+]]
|
||||||
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1
|
|
||||||
; CHECK-NEXT: store i32 [[TMP27]], i32* undef, align 4, !tbaa [[TBAA4]]
|
|
||||||
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2
|
|
||||||
; CHECK-NEXT: store i32 [[TMP28]], i32* undef, align 4, !tbaa [[TBAA4]]
|
|
||||||
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3
|
|
||||||
; CHECK-NEXT: store i32 [[TMP29]], i32* undef, align 4, !tbaa [[TBAA4]]
|
|
||||||
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP25]], i32 0
|
|
||||||
; CHECK-NEXT: store i32 [[TMP30]], i32* undef, align 4, !tbaa [[TBAA4]]
|
|
||||||
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP25]], i32 1
|
|
||||||
; CHECK-NEXT: store i32 [[TMP31]], i32* undef, align 4, !tbaa [[TBAA4]]
|
|
||||||
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP25]], i32 2
|
|
||||||
; CHECK-NEXT: store i32 [[TMP32]], i32* undef, align 4, !tbaa [[TBAA4]]
|
|
||||||
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[TMP25]], i32 3
|
|
||||||
; CHECK-NEXT: store i32 [[TMP33]], i32* undef, align 4, !tbaa [[TBAA4]]
|
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
||||||
; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||||
; CHECK-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[SW_EPILOG:%.*]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[SW_EPILOG:%.*]], label [[SCALAR_PH]]
|
||||||
|
@ -91,11 +77,11 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 {
|
||||||
; CHECK-NEXT: [[P_359:%.*]] = phi i8* [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
; CHECK-NEXT: [[P_359:%.*]] = phi i8* [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||||
; CHECK-NEXT: [[CONV70:%.*]] = zext i8 [[X]] to i32
|
; CHECK-NEXT: [[CONV70:%.*]] = zext i8 [[X]] to i32
|
||||||
; CHECK-NEXT: [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24
|
; CHECK-NEXT: [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24
|
||||||
; CHECK-NEXT: [[TMP35:%.*]] = load i8, i8* [[P]], align 1, !tbaa [[TBAA1]]
|
; CHECK-NEXT: [[TMP28:%.*]] = load i8, i8* [[P]], align 1, !tbaa [[TBAA1]]
|
||||||
; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP35]] to i32
|
; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP28]] to i32
|
||||||
; CHECK-NEXT: [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16
|
; CHECK-NEXT: [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16
|
||||||
; CHECK-NEXT: [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]]
|
; CHECK-NEXT: [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]]
|
||||||
; CHECK-NEXT: [[TMP36:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]]
|
; CHECK-NEXT: [[TMP29:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]]
|
||||||
; CHECK-NEXT: [[SHL78:%.*]] = shl nuw nsw i32 undef, 8
|
; CHECK-NEXT: [[SHL78:%.*]] = shl nuw nsw i32 undef, 8
|
||||||
; CHECK-NEXT: [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]]
|
; CHECK-NEXT: [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]]
|
||||||
; CHECK-NEXT: [[CONV81:%.*]] = zext i8 undef to i32
|
; CHECK-NEXT: [[CONV81:%.*]] = zext i8 undef to i32
|
||||||
|
|
|
@ -84,17 +84,11 @@ define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 {
|
||||||
; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
|
; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
|
||||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4
|
||||||
; CHECK-NEXT: [[TMP23:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
|
; CHECK-NEXT: [[TMP23:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
|
||||||
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP23]], i32 0
|
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3
|
||||||
; CHECK-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
|
; CHECK-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
|
||||||
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP23]], i32 1
|
|
||||||
; CHECK-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
|
|
||||||
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP23]], i32 2
|
|
||||||
; CHECK-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
|
|
||||||
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3
|
|
||||||
; CHECK-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
|
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||||
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||||
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
||||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]]
|
||||||
|
|
|
@ -207,21 +207,6 @@ define void @uniform_store_uniform_value(i32* align(4) %addr) {
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
|
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
|
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR:%.*]], align 4
|
; CHECK-NEXT: store i32 0, i32* [[ADDR:%.*]], align 4
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
||||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
|
||||||
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
||||||
|
@ -286,43 +271,29 @@ define void @uniform_store_varying_value(i32* align(4) %addr) {
|
||||||
; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <4 x i32> [[VEC_IND4]], <i32 4, i32 4, i32 4, i32 4>
|
; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <4 x i32> [[VEC_IND4]], <i32 4, i32 4, i32 4, i32 4>
|
||||||
; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <4 x i32> [[STEP_ADD5]], <i32 4, i32 4, i32 4, i32 4>
|
; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <4 x i32> [[STEP_ADD5]], <i32 4, i32 4, i32 4, i32 4>
|
||||||
; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <4 x i32> [[STEP_ADD6]], <i32 4, i32 4, i32 4, i32 4>
|
; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <4 x i32> [[STEP_ADD6]], <i32 4, i32 4, i32 4, i32 4>
|
||||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[VEC_IND4]], i32 0
|
; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDEX]] to i32
|
||||||
; CHECK-NEXT: store i32 [[TMP16]], i32* [[ADDR:%.*]], align 4
|
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 0
|
||||||
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[VEC_IND4]], i32 1
|
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], 1
|
||||||
; CHECK-NEXT: store i32 [[TMP17]], i32* [[ADDR]], align 4
|
; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TMP16]], 2
|
||||||
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[VEC_IND4]], i32 2
|
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP16]], 3
|
||||||
; CHECK-NEXT: store i32 [[TMP18]], i32* [[ADDR]], align 4
|
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP16]], 4
|
||||||
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[VEC_IND4]], i32 3
|
; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP16]], 5
|
||||||
; CHECK-NEXT: store i32 [[TMP19]], i32* [[ADDR]], align 4
|
; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP16]], 6
|
||||||
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[STEP_ADD5]], i32 0
|
; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP16]], 7
|
||||||
; CHECK-NEXT: store i32 [[TMP20]], i32* [[ADDR]], align 4
|
; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP16]], 8
|
||||||
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[STEP_ADD5]], i32 1
|
; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP16]], 9
|
||||||
; CHECK-NEXT: store i32 [[TMP21]], i32* [[ADDR]], align 4
|
; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[TMP16]], 10
|
||||||
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[STEP_ADD5]], i32 2
|
; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP16]], 11
|
||||||
; CHECK-NEXT: store i32 [[TMP22]], i32* [[ADDR]], align 4
|
; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP16]], 12
|
||||||
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[STEP_ADD5]], i32 3
|
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP16]], 13
|
||||||
; CHECK-NEXT: store i32 [[TMP23]], i32* [[ADDR]], align 4
|
; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP16]], 14
|
||||||
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[STEP_ADD6]], i32 0
|
; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP16]], 15
|
||||||
; CHECK-NEXT: store i32 [[TMP24]], i32* [[ADDR]], align 4
|
; CHECK-NEXT: store i32 [[TMP32]], i32* [[ADDR:%.*]], align 4
|
||||||
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[STEP_ADD6]], i32 1
|
|
||||||
; CHECK-NEXT: store i32 [[TMP25]], i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[STEP_ADD6]], i32 2
|
|
||||||
; CHECK-NEXT: store i32 [[TMP26]], i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[STEP_ADD6]], i32 3
|
|
||||||
; CHECK-NEXT: store i32 [[TMP27]], i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[STEP_ADD7]], i32 0
|
|
||||||
; CHECK-NEXT: store i32 [[TMP28]], i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[STEP_ADD7]], i32 1
|
|
||||||
; CHECK-NEXT: store i32 [[TMP29]], i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[STEP_ADD7]], i32 2
|
|
||||||
; CHECK-NEXT: store i32 [[TMP30]], i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[STEP_ADD7]], i32 3
|
|
||||||
; CHECK-NEXT: store i32 [[TMP31]], i32* [[ADDR]], align 4
|
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
||||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
|
||||||
; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i32> [[STEP_ADD7]], <i32 4, i32 4, i32 4, i32 4>
|
; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i32> [[STEP_ADD7]], <i32 4, i32 4, i32 4, i32 4>
|
||||||
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
|
; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
|
||||||
; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
; CHECK-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4097, 4096
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4097, 4096
|
||||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||||
|
@ -412,21 +383,6 @@ define void @uniform_copy(i32* %A, i32* %B) {
|
||||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4, !alias.scope !12
|
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4, !alias.scope !12
|
||||||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4, !alias.scope !12
|
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4, !alias.scope !12
|
||||||
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4, !alias.scope !12
|
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4, !alias.scope !12
|
||||||
; CHECK-NEXT: store i32 [[TMP4]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP4]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP4]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP4]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP6]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP6]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP6]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP6]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP7]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP7]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP7]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
|
||||||
; CHECK-NEXT: store i32 [[TMP7]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
; CHECK-NEXT: store i32 [[TMP7]], i32* [[B]], align 4, !alias.scope !15, !noalias !12
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
||||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
|
||||||
|
|
|
@ -45,13 +45,11 @@ define i16 @test_true_and_false_branch_equal() {
|
||||||
; CHECK: pred.srem.continue4:
|
; CHECK: pred.srem.continue4:
|
||||||
; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP9]], [[PRED_SREM_CONTINUE]] ], [ [[TMP12]], [[PRED_SREM_IF3]] ]
|
; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP9]], [[PRED_SREM_CONTINUE]] ], [ [[TMP12]], [[PRED_SREM_IF3]] ]
|
||||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i16> <i16 5786, i16 5786>, <2 x i16> [[TMP13]]
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i16> <i16 5786, i16 5786>, <2 x i16> [[TMP13]]
|
||||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1
|
||||||
; CHECK-NEXT: store i16 [[TMP14]], i16* @v_39, align 1
|
; CHECK-NEXT: store i16 [[TMP14]], i16* @v_39, align 1
|
||||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1
|
|
||||||
; CHECK-NEXT: store i16 [[TMP15]], i16* @v_39, align 1
|
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||||
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
|
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
|
||||||
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
|
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 12, 12
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 12, 12
|
||||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||||
|
@ -74,7 +72,7 @@ define i16 @test_true_and_false_branch_equal() {
|
||||||
; CHECK-NEXT: store i16 [[COND6]], i16* @v_39, align 1
|
; CHECK-NEXT: store i16 [[COND6]], i16* @v_39, align 1
|
||||||
; CHECK-NEXT: [[INC7]] = add nsw i16 [[I_07]], 1
|
; CHECK-NEXT: [[INC7]] = add nsw i16 [[I_07]], 1
|
||||||
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC7]], 111
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC7]], 111
|
||||||
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], [[LOOP2:!llvm.loop !.*]]
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
|
||||||
; CHECK: exit:
|
; CHECK: exit:
|
||||||
; CHECK-NEXT: [[RV:%.*]] = load i16, i16* @v_39, align 1
|
; CHECK-NEXT: [[RV:%.*]] = load i16, i16* @v_39, align 1
|
||||||
; CHECK-NEXT: ret i16 [[RV]]
|
; CHECK-NEXT: ret i16 [[RV]]
|
||||||
|
|
|
@ -48,8 +48,6 @@ define void @f() {
|
||||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
|
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
|
||||||
; CHECK-NEXT: store i32 0, i32* @f.e, align 1, !alias.scope !0, !noalias !3
|
; CHECK-NEXT: store i32 0, i32* @f.e, align 1, !alias.scope !0, !noalias !3
|
||||||
; CHECK-NEXT: store i32 0, i32* @f.e, align 1, !alias.scope !0, !noalias !3
|
|
||||||
; CHECK-NEXT: store i8 10, i8* [[TMP0]], align 1
|
|
||||||
; CHECK-NEXT: store i8 10, i8* [[TMP0]], align 1
|
; CHECK-NEXT: store i8 10, i8* [[TMP0]], align 1
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 500
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 500
|
||||||
|
|
|
@ -55,13 +55,11 @@ define i16 @test(i16** %arg, i64 %N) {
|
||||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>*
|
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>*
|
||||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, <2 x i16>* [[TMP6]], align 2, !alias.scope !0
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, <2 x i16>* [[TMP6]], align 2, !alias.scope !0
|
||||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[L_2]], i64 0
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[L_2]], i64 0
|
||||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 0
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 1
|
||||||
; CHECK-NEXT: store i16 [[TMP8]], i16* [[TMP7]], align 2, !alias.scope !3, !noalias !0
|
; CHECK-NEXT: store i16 [[TMP8]], i16* [[TMP7]], align 2, !alias.scope !3, !noalias !0
|
||||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 1
|
|
||||||
; CHECK-NEXT: store i16 [[TMP9]], i16* [[TMP7]], align 2, !alias.scope !3, !noalias !0
|
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||||
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||||
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
||||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||||
|
|
Loading…
Reference in New Issue