[CostModel][X86] Update CTPOP costs

With the bdver2 model updates, many of the AVX1 costs were far too high - it also helped expose some costs mismatches for Atom/Silvermont
This commit is contained in:
Simon Pilgrim 2022-09-10 17:57:20 +01:00
parent 4994f87ca1
commit 10edf88458
4 changed files with 56 additions and 98 deletions

View File

@ -3428,14 +3428,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTLZ, MVT::v8i32, { 38 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTLZ, MVT::v16i16, { 30 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTLZ, MVT::v32i8, { 20 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTPOP, MVT::v4i64, { 16, 19, 19, 28 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTPOP, MVT::v2i64, { 7, 9, 10, 14 } },
{ ISD::CTPOP, MVT::v8i32, { 24, 27, 27, 36 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTPOP, MVT::v4i32, { 11, 12, 14, 18 } },
{ ISD::CTPOP, MVT::v16i16, { 20, 23, 22, 31 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTPOP, MVT::v8i16, { 9, 11, 11, 15 } },
{ ISD::CTPOP, MVT::v32i8, { 14, 17, 16, 25 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTPOP, MVT::v16i8, { 6, 7, 8, 12 } },
{ ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
{ ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
{ ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
{ ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
{ ISD::CTTZ, MVT::v4i64, { 22 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTTZ, MVT::v8i32, { 30 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTTZ, MVT::v16i16, { 26 } }, // 2 x 128-bit Op + extract/insert
@ -3519,10 +3519,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTLZ, MVT::v4i32, { 18 } },
{ ISD::CTLZ, MVT::v8i16, { 14 } },
{ ISD::CTLZ, MVT::v16i8, { 9 } },
{ ISD::CTPOP, MVT::v2i64, { 7, 19, 12, 18 } },
{ ISD::CTPOP, MVT::v4i32, { 11, 24, 16, 22 } },
{ ISD::CTPOP, MVT::v8i16, { 9, 18, 14, 20 } },
{ ISD::CTPOP, MVT::v16i8, { 6, 12, 10, 16 } },
{ ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
{ ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
{ ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
{ ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
{ ISD::CTTZ, MVT::v2i64, { 10 } },
{ ISD::CTTZ, MVT::v4i32, { 14 } },
{ ISD::CTTZ, MVT::v8i16, { 12 } },

View File

@ -95,7 +95,7 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v2i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v2i64'
@ -132,7 +132,7 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v4i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v4i64'
@ -169,7 +169,7 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v8i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v8i64'
@ -206,7 +206,7 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v4i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v4i32'
@ -243,7 +243,7 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v8i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v8i32'
@ -280,7 +280,7 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v16i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v16i32'
@ -317,7 +317,7 @@ define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v8i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v8i16'
@ -354,7 +354,7 @@ define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v16i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v16i16'
@ -391,7 +391,7 @@ define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v32i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v32i16'
@ -428,7 +428,7 @@ define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v16i8'
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v16i8'
@ -465,7 +465,7 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v32i8'
; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v32i8'
@ -502,7 +502,7 @@ define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v64i8'
; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v64i8'

View File

@ -91,7 +91,7 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v2i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v2i64'
@ -128,11 +128,11 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v4i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v4i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v4i64'
@ -165,11 +165,11 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v8i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v8i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v8i64'
@ -202,11 +202,11 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v4i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v4i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v4i32'
@ -239,11 +239,11 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v8i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v8i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v8i32'
@ -276,11 +276,11 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v16i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v16i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v16i32'
@ -313,11 +313,11 @@ define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v8i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v8i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v8i16'
@ -350,11 +350,11 @@ define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v16i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v16i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v16i16'
@ -387,11 +387,11 @@ define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v32i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v32i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v32i16'
@ -424,7 +424,7 @@ define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v16i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v16i8'
@ -461,11 +461,11 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v32i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v32i8'
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v32i8'
@ -498,11 +498,11 @@ define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v64i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v64i8'
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v64i8'

View File

@ -143,26 +143,11 @@ define void @ctpop_4i32() #0 {
; SSE42-NEXT: store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
; SSE42-NEXT: ret void
;
; AVX1-LABEL: @ctpop_4i32(
; AVX1-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
; AVX1-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
; AVX1-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
; AVX1-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
; AVX1-NEXT: [[CTPOP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD0]])
; AVX1-NEXT: [[CTPOP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD1]])
; AVX1-NEXT: [[CTPOP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD2]])
; AVX1-NEXT: [[CTPOP3:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD3]])
; AVX1-NEXT: store i32 [[CTPOP0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
; AVX1-NEXT: store i32 [[CTPOP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
; AVX1-NEXT: store i32 [[CTPOP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
; AVX1-NEXT: store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
; AVX1-NEXT: ret void
;
; AVX2-LABEL: @ctpop_4i32(
; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
; AVX2-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
; AVX2-NEXT: ret void
; AVX-LABEL: @ctpop_4i32(
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
; AVX-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
; AVX-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
; AVX-NEXT: ret void
;
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
@ -216,38 +201,11 @@ define void @ctpop_8i32() #0 {
; SSE42-NEXT: store i32 [[CTPOP7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
; SSE42-NEXT: ret void
;
; AVX1-LABEL: @ctpop_8i32(
; AVX1-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
; AVX1-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
; AVX1-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
; AVX1-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
; AVX1-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
; AVX1-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
; AVX1-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
; AVX1-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
; AVX1-NEXT: [[CTPOP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD0]])
; AVX1-NEXT: [[CTPOP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD1]])
; AVX1-NEXT: [[CTPOP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD2]])
; AVX1-NEXT: [[CTPOP3:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD3]])
; AVX1-NEXT: [[CTPOP4:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD4]])
; AVX1-NEXT: [[CTPOP5:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD5]])
; AVX1-NEXT: [[CTPOP6:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD6]])
; AVX1-NEXT: [[CTPOP7:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD7]])
; AVX1-NEXT: store i32 [[CTPOP0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
; AVX1-NEXT: store i32 [[CTPOP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
; AVX1-NEXT: store i32 [[CTPOP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
; AVX1-NEXT: store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
; AVX1-NEXT: store i32 [[CTPOP4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
; AVX1-NEXT: store i32 [[CTPOP5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
; AVX1-NEXT: store i32 [[CTPOP6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
; AVX1-NEXT: store i32 [[CTPOP7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
; AVX1-NEXT: ret void
;
; AVX2-LABEL: @ctpop_8i32(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
; AVX2-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> [[TMP1]])
; AVX2-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
; AVX2-NEXT: ret void
; AVX-LABEL: @ctpop_8i32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> [[TMP1]])
; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
; AVX-NEXT: ret void
;
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2