From 86bb7df6e6ea6cfbba3b13b82c0c48eb1c45d198 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 2 May 2022 09:58:35 +0100 Subject: [PATCH] [CostModel][X86] getScalarizationOverhead - handle vXi1 extracts with MOVMSK (pre-AVX512) We can quickly extract multiple elements of a bool vector using MOVMSK ops - since we don't know what generated the vXi1, I've been optimistic and assumed we can use PMOVMSKB to extract the maximum number of bools with a single op. The MOVMSK pattern isn't great for extract+insert round trips as vXi1 type legalization can interfere with this a lot - so this relies on us remaining good at using getScalarizationOverhead properly (and tagging both Insert and Extract modes) for those round trip cases. The AVX512 KMOV codegen for bool extraction is a bit of a mess so for now I've not included that - the per-element cost is a lot more accurate for current codegen. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 17 +- .../CostModel/X86/intrinsic-cost-kinds.ll | 4 +- .../X86/masked-intrinsic-cost-inseltpoison.ll | 256 +++--- .../CostModel/X86/masked-intrinsic-cost.ll | 256 +++--- .../CostModel/X86/shuffle-replication-i1.ll | 781 ++++++++++-------- 5 files changed, 755 insertions(+), 559 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 7f4ca3a687ba..a0bde8dc58a3 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3833,10 +3833,21 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty, } } - // TODO: Use default extraction for now, but we should investigate extending this - // to handle repeated subvector extraction. - if (Extract) + if (Extract) { + // vXi1 can be efficiently extracted with MOVMSK. + // TODO: AVX512 predicate mask handling. + // NOTE: This doesn't work well for roundtrip scalarization. + if (!Insert && Ty->getScalarSizeInBits() == 1 && !ST->hasAVX512()) { + unsigned NumElts = cast(Ty)->getNumElements(); + unsigned MaxElts = ST->hasAVX2() ? 32 : 16; + unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts; + return MOVMSKCost; + } + + // TODO: Use default extraction for now, but we should investigate extending + // this to handle repeated subvector extraction. Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract); + } return Cost; } diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index 185930955235..d0c987fdbf35 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -281,7 +281,7 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) { ; THRU-LABEL: 'maskedgather' -; THRU-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedgather' @@ -302,7 +302,7 @@ define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) { define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) { ; THRU-LABEL: 'maskedscatter' -; THRU-NEXT: Cost Model: Found an estimated cost of 92 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedscatter' diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll index 36d027bb6956..0dc3f986ae69 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -882,85 +882,139 @@ define i32 @masked_expandload() { define i32 @masked_compressstore() { ; SSE2-LABEL: 'masked_compressstore' -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_compressstore' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; -; AVX-LABEL: 'masked_compressstore' -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; AVX1-LABEL: 'masked_compressstore' +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_compressstore' +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_compressstore' +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_compressstore' ; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) @@ -1242,19 +1296,19 @@ define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) { ; SSE2-LABEL: 'test_gather_2f64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; SSE42-LABEL: 'test_gather_2f64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; AVX1-LABEL: 'test_gather_2f64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; AVX2-LABEL: 'test_gather_2f64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; SKL-LABEL: 'test_gather_2f64' @@ -1271,19 +1325,19 @@ define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32' @@ -1383,25 +1437,25 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, < ; SSE2-LABEL: 'test_gather_16f32_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_var_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' @@ -1427,25 +1481,25 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3 ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' @@ -1532,7 +1586,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_16i32' @@ -1540,7 +1594,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_scatter_16i32' @@ -1548,7 +1602,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_scatter_16i32' @@ -1556,7 +1610,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKL-LABEL: 'test_scatter_16i32' @@ -1564,7 +1618,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SKL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_16i32' @@ -1586,15 +1640,15 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { ; SSE2-LABEL: 'test_scatter_8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_8i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_8i32' @@ -1607,15 +1661,15 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SSE2-LABEL: 'test_scatter_4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; KNL-LABEL: 'test_scatter_4i32' @@ -1634,25 +1688,25 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) ; SSE2-LABEL: 'test_gather_4f32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32' diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index a6fb78f6c7e4..02a279e68aee 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -1266,85 +1266,139 @@ define i32 @masked_expandload() { define i32 @masked_compressstore() { ; SSE2-LABEL: 'masked_compressstore' -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_compressstore' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; -; AVX-LABEL: 'masked_compressstore' -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; AVX1-LABEL: 'masked_compressstore' +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_compressstore' +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_compressstore' +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_compressstore' ; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) @@ -1626,19 +1680,19 @@ define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) { ; SSE2-LABEL: 'test_gather_2f64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; SSE42-LABEL: 'test_gather_2f64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; AVX1-LABEL: 'test_gather_2f64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; AVX2-LABEL: 'test_gather_2f64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; SKL-LABEL: 'test_gather_2f64' @@ -1655,19 +1709,19 @@ define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32' @@ -1767,25 +1821,25 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, < ; SSE2-LABEL: 'test_gather_16f32_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_var_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' @@ -1811,25 +1865,25 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3 ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' @@ -1916,7 +1970,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_16i32' @@ -1924,7 +1978,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_scatter_16i32' @@ -1932,7 +1986,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_scatter_16i32' @@ -1940,7 +1994,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKL-LABEL: 'test_scatter_16i32' @@ -1948,7 +2002,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SKL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_16i32' @@ -1970,15 +2024,15 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { ; SSE2-LABEL: 'test_scatter_8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_8i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_8i32' @@ -1991,15 +2045,15 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SSE2-LABEL: 'test_scatter_4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; KNL-LABEL: 'test_scatter_4i32' @@ -2018,25 +2072,25 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) ; SSE2-LABEL: 'test_gather_4f32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll index afb73f47eda8..da38a103bd55 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll @@ -4,8 +4,8 @@ ; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 ; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 ; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 ; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 ; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 ; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQVEC512 @@ -17,70 +17,81 @@ define void @replication_i1_stride2() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride2' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride2' -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride2' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride2' -; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride2' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride2' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride2' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer @@ -183,70 +194,81 @@ define void @replication_i1_stride2() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride3() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride3' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride3' -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride3' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride3' -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride3' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride3' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride3' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride3' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 412 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride3' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer @@ -349,70 +371,81 @@ define void @replication_i1_stride3() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride4() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride4' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 560 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1120 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride4' -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 560 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 1120 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride4' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 560 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1120 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride4' -; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride4' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride4' -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 552 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride4' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer @@ -515,70 +548,81 @@ define void @replication_i1_stride4() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride5() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride5' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 864 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1728 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride5' -; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 864 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 1728 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride5' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 864 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1728 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride5' -; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride5' -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride5' -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride5' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 344 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 688 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride5' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 342 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 684 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride5' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer @@ -681,70 +725,81 @@ define void @replication_i1_stride5() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride6() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride6' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1856 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride6' -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 1856 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride6' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1856 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride6' -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride6' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride6' -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 944 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride6' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 412 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride6' +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride6' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer @@ -847,70 +902,81 @@ define void @replication_i1_stride6() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride7() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride7' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1984 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride7' -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 1984 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride7' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1984 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride7' -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride7' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride7' -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 540 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1080 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride7' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride7' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 239 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 478 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 956 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride7' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer @@ -1013,70 +1079,81 @@ define void @replication_i1_stride7() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride8() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 2112 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride8' -; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> -; SSE3-NEXT: Cost Model: Found an estimated cost of 2112 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2112 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride8' -; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> -; SSE41-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride8' -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1216 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1096 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 273 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 546 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1092 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride8' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer