From 12c629ec6c590ad71f3707735df5931dfb6a2496 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 9 Apr 2020 19:55:51 +0100 Subject: [PATCH] [CostModel][X86] Add shuffle costs for some common sub-128bit vectors v2i8/v4i8/v8i8 + v2i16/v4i16 all show up in vectorizer code and by just using the legalized types (v16i8/v8i16) we're highly exaggerating the actual cost of the shuffle. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 36 +++++++++++++++++++ .../CostModel/X86/shuffle-broadcast.ll | 10 +++--- .../CostModel/X86/shuffle-insert_subvector.ll | 12 +++---- .../Analysis/CostModel/X86/shuffle-reverse.ll | 8 ++--- .../CostModel/X86/shuffle-single-src.ll | 10 +++--- .../CostModel/X86/shuffle-transpose.ll | 10 +++--- .../Analysis/CostModel/X86/shuffle-two-src.ll | 10 +++--- 7 files changed, 66 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index cca9cdc250fc..ff57da589b9b 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -949,6 +949,42 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, } } + // Handle some common (illegal) sub-vector types as they are often very cheap + // to shuffle even on targets without PSHUFB. + EVT VT = TLI->getValueType(DL, Tp); + if (VT.isSimple() && VT.isVector() && VT.getSizeInBits() < 128 && + !ST->hasSSSE3()) { + static const CostTblEntry SSE2SubVectorShuffleTbl[] = { + {TTI::SK_Broadcast, MVT::v4i16, 1}, // pshuflw + {TTI::SK_Broadcast, MVT::v2i16, 1}, // pshuflw + {TTI::SK_Broadcast, MVT::v8i8, 2}, // punpck/pshuflw + {TTI::SK_Broadcast, MVT::v4i8, 2}, // punpck/pshuflw + {TTI::SK_Broadcast, MVT::v2i8, 1}, // punpck + + {TTI::SK_Reverse, MVT::v4i16, 1}, // pshuflw + {TTI::SK_Reverse, MVT::v2i16, 1}, // pshuflw + {TTI::SK_Reverse, MVT::v4i8, 3}, // punpck/pshuflw/packus + {TTI::SK_Reverse, MVT::v2i8, 1}, // punpck + + {TTI::SK_PermuteTwoSrc, MVT::v4i16, 2}, // punpck/pshuflw + {TTI::SK_PermuteTwoSrc, MVT::v2i16, 2}, // punpck/pshuflw + {TTI::SK_PermuteTwoSrc, MVT::v8i8, 7}, // punpck/pshuflw + {TTI::SK_PermuteTwoSrc, MVT::v4i8, 4}, // punpck/pshuflw + {TTI::SK_PermuteTwoSrc, MVT::v2i8, 2}, // punpck + + {TTI::SK_PermuteSingleSrc, MVT::v4i16, 1}, // pshuflw + {TTI::SK_PermuteSingleSrc, MVT::v2i16, 1}, // pshuflw + {TTI::SK_PermuteSingleSrc, MVT::v8i8, 5}, // punpck/pshuflw + {TTI::SK_PermuteSingleSrc, MVT::v4i8, 3}, // punpck/pshuflw + {TTI::SK_PermuteSingleSrc, MVT::v2i8, 1}, // punpck + }; + + if (ST->hasSSE2()) + if (const auto *Entry = + CostTableLookup(SSE2SubVectorShuffleTbl, Kind, VT.getSimpleVT())) + return Entry->Cost; + } + // We are going to permute multiple sources and the result will be in multiple // destinations. Providing an accurate cost only for splits where the element // type remains the same. diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll index 450655d48aa5..1f0c879b989a 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll @@ -178,8 +178,8 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { ; SSE2-LABEL: 'test_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer @@ -243,9 +243,9 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll index 29ee1c0778e9..208dd3d1a01a 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll @@ -649,7 +649,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -1009,13 +1009,13 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSE2-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll index 4ed58cf9a2fd..df5a8388afb3 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll @@ -178,8 +178,8 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { ; SSE2-LABEL: 'test_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> @@ -259,8 +259,8 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll index 5fe38e8d9dfb..1caab2985be9 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll @@ -219,8 +219,8 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024) { ; SSE2-LABEL: 'test_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> @@ -319,9 +319,9 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll index f3ce1589cc48..53f7c1ccdc75 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll @@ -154,8 +154,8 @@ define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i3 define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16> %b64, <8 x i16> %a128, <8 x i16> %b128, <16 x i16> %a256, <16 x i16> %b256, <32 x i16> %a512, <32 x i16> %b512) { ; SSE2-LABEL: 'test_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> @@ -235,9 +235,9 @@ define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16 define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b32, <8 x i8> %a64, <8 x i8> %b64, <16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256, <64 x i8> %a512, <64 x i8> %b512) { ; SSE2-LABEL: 'test_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll index 9451cb92629a..038ba2c64164 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll @@ -205,8 +205,8 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <2 x i16> %src32_1, <4 x i16> %src64_1, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) { ; SSE2-LABEL: 'test_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> @@ -305,9 +305,9 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <2 x i8> %src16_1, <4 x i8> %src32_1, <8 x i8> %src64_1, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) { ; SSE2-LABEL: 'test_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32>