From a5f2fdca993eca0ae85e90f987fbbfb3b4aee626 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 14 Nov 2021 18:41:37 +0300 Subject: [PATCH] [X86][Costmodel] `trunc v16i32 to v32i16` can appear after legalization, cost is same as for `trunc v16i32 to v16i16` This was noticed in D113609, hopefully it unblocks that patch. There are likely other similar problems. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D113842 --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 3 +- llvm/test/Analysis/CostModel/X86/arith-fix.ll | 12 +++--- .../Analysis/CostModel/X86/arith-overflow.ll | 12 +++--- llvm/test/Analysis/CostModel/X86/trunc.ll | 40 +++++++++---------- 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 00e67df38fa8..601c38d3a23a 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1667,7 +1667,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 2 }, // vpmovdb { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 2 }, // vpmovdb { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 2 }, // vpmovdb - { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 }, // vpmovdb + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 }, // vpmovdw + { ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, 2 }, // vpmovdw { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 2 }, // vpmovqb { ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1 }, // vpshufb { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 2 }, // vpmovqb diff --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll index f9aaa427751a..80bd544d3ec5 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll @@ -120,7 +120,7 @@ define i32 @smul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) @@ -139,7 +139,7 @@ define i32 @smul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) @@ -158,7 +158,7 @@ define i32 @smul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) @@ -354,7 +354,7 @@ define i32 @umul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) @@ -373,7 +373,7 @@ define i32 @umul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) @@ -392,7 +392,7 @@ define i32 @umul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index 9a32f22503a4..64fed4bd57c6 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1076,7 +1076,7 @@ define i32 @smul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) @@ -1095,7 +1095,7 @@ define i32 @smul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) @@ -1114,7 +1114,7 @@ define i32 @smul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) @@ -1314,7 +1314,7 @@ define i32 @umul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) @@ -1333,7 +1333,7 @@ define i32 @umul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) @@ -1352,7 +1352,7 @@ define i32 @umul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll index 2d7041f3c67f..7de75e26fcba 100644 --- a/llvm/test/Analysis/CostModel/X86/trunc.ll +++ b/llvm/test/Analysis/CostModel/X86/trunc.ll @@ -441,26 +441,26 @@ define i32 @trunc_vXi16() { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 318 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 460 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 552 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 644 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 920 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1104 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1288 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'trunc_vXi16'