From 309f1e4ac8cca1ba1f0e28eeae8e2926dc387d04 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 16 Nov 2021 09:49:42 +0000 Subject: [PATCH] [ARM] Add datalayout to costmodel tests. NFC This adds a sensible datalayout to the ARM cost model tests, to prevent the costs reported being incorrect for the size of pointers. --- .../Analysis/CostModel/ARM/arith-overflow.ll | 26 +- .../test/Analysis/CostModel/ARM/arith-ssat.ll | 2 + .../test/Analysis/CostModel/ARM/arith-usat.ll | 2 + llvm/test/Analysis/CostModel/ARM/arith.ll | 2 + llvm/test/Analysis/CostModel/ARM/cast.ll | 8 +- llvm/test/Analysis/CostModel/ARM/cast_ldst.ll | 466 +++++++++--------- llvm/test/Analysis/CostModel/ARM/cmps.ll | 42 +- .../Analysis/CostModel/ARM/control-flow.ll | 2 + llvm/test/Analysis/CostModel/ARM/divrem.ll | 2 + llvm/test/Analysis/CostModel/ARM/fparith.ll | 2 + llvm/test/Analysis/CostModel/ARM/freeshift.ll | 2 + llvm/test/Analysis/CostModel/ARM/gep.ll | 2 + .../test/Analysis/CostModel/ARM/immediates.ll | 2 + .../CostModel/ARM/intrinsic-cost-kinds.ll | 2 + .../test/Analysis/CostModel/ARM/load_store.ll | 2 + llvm/test/Analysis/CostModel/ARM/logicalop.ll | 2 + llvm/test/Analysis/CostModel/ARM/mve-abs.ll | 2 + .../CostModel/ARM/mve-active_lane_mask.ll | 2 + llvm/test/Analysis/CostModel/ARM/mve-cmp.ll | 2 + .../CostModel/ARM/mve-gather-scatter-cost.ll | 4 +- .../test/Analysis/CostModel/ARM/mve-minmax.ll | 2 + .../CostModel/ARM/mve-vecreduce-add.ll | 2 + .../test/Analysis/CostModel/ARM/reduce-add.ll | 2 + .../test/Analysis/CostModel/ARM/reduce-and.ll | 2 + llvm/test/Analysis/CostModel/ARM/reduce-or.ll | 2 + .../Analysis/CostModel/ARM/reduce-smax.ll | 2 + .../Analysis/CostModel/ARM/reduce-smin.ll | 2 + .../Analysis/CostModel/ARM/reduce-umax.ll | 2 + .../Analysis/CostModel/ARM/reduce-umin.ll | 2 + llvm/test/Analysis/CostModel/ARM/select.ll | 2 + llvm/test/Analysis/CostModel/ARM/shuffle.ll | 2 + .../CostModel/ARM/target-intrinsics.ll | 2 + .../LoopVectorize/ARM/arm-ieee-vectorize.ll | 2 + .../ARM/mve-gather-scatter-tailpred.ll | 53 +- .../LoopVectorize/ARM/mve-shiftcost.ll | 2 +- .../ARM/prefer-tail-loop-folding.ll | 2 + .../ARM/tail-fold-multiple-icmps.ll | 4 +- .../LoopVectorize/ARM/tail-folding-allowed.ll | 2 + .../ARM/tail-folding-loop-hint.ll | 2 + .../ARM/tail-folding-not-allowed.ll | 128 +++-- .../ARM/tail-folding-prefer-flag.ll | 2 + .../ARM/tail-folding-reductions-allowed.ll | 2 + 42 files changed, 446 insertions(+), 353 deletions(-) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll index 1d7fd05fd1f2..a88893d2e675 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll @@ -6,6 +6,8 @@ ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) declare {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64>, <2 x i64>) declare {<4 x i64>, <4 x i1>} @llvm.sadd.with.overflow.v4i64(<4 x i64>, <4 x i64>) @@ -665,9 +667,9 @@ define i32 @smul(i32 %arg) { ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) @@ -722,9 +724,9 @@ define i32 @smul(i32 %arg) { ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) @@ -823,9 +825,9 @@ define i32 @umul(i32 %arg) { ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) @@ -880,9 +882,9 @@ define i32 @umul(i32 %arg) { ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll index 7769d425b302..b8678fcc7abd 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll @@ -6,6 +6,8 @@ ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + declare i64 @llvm.sadd.sat.i64(i64, i64) declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) declare <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64>, <4 x i64>) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll index 9baec1e7d25f..253345b0086d 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll @@ -6,6 +6,8 @@ ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + declare i64 @llvm.uadd.sat.i64(i64, i64) declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>) declare <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64>, <4 x i64>) diff --git a/llvm/test/Analysis/CostModel/ARM/arith.ll b/llvm/test/Analysis/CostModel/ARM/arith.ll index f1f11e2484ac..453749c11d5d 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith.ll @@ -7,6 +7,8 @@ ; RUN: opt -cost-model -analyze -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=thumbv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @i1() { ; CHECK-LABEL: 'i1' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i1 undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll index bf2488eec97b..700af7f896c6 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -9,6 +9,8 @@ ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-SIZE ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @casts() { ; -- scalars -- ; CHECK-NEON-RECIP-LABEL: 'casts' @@ -124,7 +126,7 @@ define i32 @casts() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tv4i32i8 = trunc <4 x i32> undef to <4 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tv4i32i8 = trunc <4 x i32> undef to <4 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tv4i32i16 = trunc <4 x i32> undef to <4 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tv4i16i8 = trunc <4 x i16> undef to <4 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tv8i32i8 = trunc <8 x i32> undef to <8 x i8> @@ -1700,7 +1702,7 @@ define i32 @casts() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tv4i32i8 = trunc <4 x i32> undef to <4 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tv4i32i8 = trunc <4 x i32> undef to <4 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tv4i32i16 = trunc <4 x i32> undef to <4 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tv4i16i8 = trunc <4 x i16> undef to <4 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tv8i32i8 = trunc <8 x i32> undef to <8 x i8> @@ -3276,7 +3278,7 @@ define i32 @casts() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tv4i32i8 = trunc <4 x i32> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tv4i32i8 = trunc <4 x i32> undef to <4 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tv4i32i16 = trunc <4 x i32> undef to <4 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tv4i16i8 = trunc <4 x i16> undef to <4 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tv8i32i8 = trunc <8 x i32> undef to <8 x i8> diff --git a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll index afa616260b3e..e6e10718530a 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll @@ -9,6 +9,8 @@ ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-SIZE ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @load_extends() { ; CHECK-NEON-RECIP-LABEL: 'load_extends' ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -17,12 +19,12 @@ define i32 @load_extends() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -84,12 +86,12 @@ define i32 @load_extends() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -151,12 +153,12 @@ define i32 @load_extends() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -218,12 +220,12 @@ define i32 @load_extends() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -285,12 +287,12 @@ define i32 @load_extends() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -352,12 +354,12 @@ define i32 @load_extends() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -419,12 +421,12 @@ define i32 @load_extends() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -486,12 +488,12 @@ define i32 @load_extends() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -553,12 +555,12 @@ define i32 @load_extends() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32 @@ -697,20 +699,20 @@ define i32 @store_trunc() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1 @@ -728,17 +730,17 @@ define i32 @store_trunc() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 16 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 16 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 16 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'store_trunc' @@ -753,13 +755,13 @@ define i32 @store_trunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> @@ -783,17 +785,17 @@ define i32 @store_trunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'store_trunc' @@ -808,21 +810,21 @@ define i32 @store_trunc() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1 @@ -838,17 +840,17 @@ define i32 @store_trunc() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 16 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 16 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 16 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'store_trunc' @@ -863,21 +865,21 @@ define i32 @store_trunc() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1 @@ -893,17 +895,17 @@ define i32 @store_trunc() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 16 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 16 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 16 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'store_trunc' @@ -917,20 +919,20 @@ define i32 @store_trunc() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1 @@ -948,17 +950,17 @@ define i32 @store_trunc() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 16 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 16 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 16 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'store_trunc' @@ -973,7 +975,7 @@ define i32 @store_trunc() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> @@ -1003,17 +1005,17 @@ define i32 @store_trunc() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 16 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 16 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 16 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'store_trunc' @@ -1028,21 +1030,21 @@ define i32 @store_trunc() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1 @@ -1058,17 +1060,17 @@ define i32 @store_trunc() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 16 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 16 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 16 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'store_trunc' @@ -1083,21 +1085,21 @@ define i32 @store_trunc() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1 @@ -1113,17 +1115,17 @@ define i32 @store_trunc() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 16 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 16 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 16 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'store_trunc' @@ -1137,8 +1139,8 @@ define i32 @store_trunc() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> @@ -1146,7 +1148,7 @@ define i32 @store_trunc() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> @@ -1168,17 +1170,17 @@ define i32 @store_trunc() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 16 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 16 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 16 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %i816 = trunc i16 undef to i8 @@ -1247,10 +1249,10 @@ define i32 @load_fpextends() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double @@ -1275,10 +1277,10 @@ define i32 @load_fpextends() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double @@ -1303,10 +1305,10 @@ define i32 @load_fpextends() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double @@ -1331,10 +1333,10 @@ define i32 @load_fpextends() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double @@ -1359,10 +1361,10 @@ define i32 @load_fpextends() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double @@ -1387,10 +1389,10 @@ define i32 @load_fpextends() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double @@ -1415,10 +1417,10 @@ define i32 @load_fpextends() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double @@ -1443,10 +1445,10 @@ define i32 @load_fpextends() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double @@ -1471,10 +1473,10 @@ define i32 @load_fpextends() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double @@ -1546,10 +1548,10 @@ define i32 @load_fptrunc() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'load_fptrunc' @@ -1571,10 +1573,10 @@ define i32 @load_fptrunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fptrunc' @@ -1596,10 +1598,10 @@ define i32 @load_fptrunc() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'load_fptrunc' @@ -1621,10 +1623,10 @@ define i32 @load_fptrunc() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'load_fptrunc' @@ -1646,10 +1648,10 @@ define i32 @load_fptrunc() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'load_fptrunc' @@ -1671,10 +1673,10 @@ define i32 @load_fptrunc() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'load_fptrunc' @@ -1696,10 +1698,10 @@ define i32 @load_fptrunc() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'load_fptrunc' @@ -1721,10 +1723,10 @@ define i32 @load_fptrunc() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'load_fptrunc' @@ -1746,10 +1748,10 @@ define i32 @load_fptrunc() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %i1632 = fptrunc float undef to half @@ -2312,20 +2314,20 @@ define i32 @maskedstore_trunc() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) @@ -2356,13 +2358,13 @@ define i32 @maskedstore_trunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> @@ -2399,21 +2401,21 @@ define i32 @maskedstore_trunc() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) @@ -2442,21 +2444,21 @@ define i32 @maskedstore_trunc() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) @@ -2484,20 +2486,20 @@ define i32 @maskedstore_trunc() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) @@ -2528,7 +2530,7 @@ define i32 @maskedstore_trunc() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> @@ -2571,21 +2573,21 @@ define i32 @maskedstore_trunc() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) @@ -2614,21 +2616,21 @@ define i32 @maskedstore_trunc() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) @@ -2656,8 +2658,8 @@ define i32 @maskedstore_trunc() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8> @@ -2665,7 +2667,7 @@ define i32 @maskedstore_trunc() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16> diff --git a/llvm/test/Analysis/CostModel/ARM/cmps.ll b/llvm/test/Analysis/CostModel/ARM/cmps.ll index bda8763ef18d..53831026cda8 100644 --- a/llvm/test/Analysis/CostModel/ARM/cmps.ll +++ b/llvm/test/Analysis/CostModel/ARM/cmps.ll @@ -8,6 +8,8 @@ ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-SIZE ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @cmps() { ; CHECK-MVE-RECIP-LABEL: 'cmps' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef @@ -24,7 +26,7 @@ define i32 @cmps() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a12 = fcmp oge <2 x double> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %q = icmp eq <4 x i32*> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q = icmp eq <4 x i32*> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'cmps' @@ -42,7 +44,7 @@ define i32 @cmps() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %q = icmp eq <4 x i32*> undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x i32*> undef, undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'cmps' @@ -60,7 +62,7 @@ define i32 @cmps() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %q = icmp eq <4 x i32*> undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x i32*> undef, undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'cmps' @@ -78,7 +80,7 @@ define i32 @cmps() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q = icmp eq <4 x i32*> undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x i32*> undef, undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'cmps' @@ -182,9 +184,9 @@ define void @minmax() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %c6 = icmp slt <4 x i32*> undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c6 = icmp slt <4 x i32*> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'minmax' @@ -197,9 +199,9 @@ define void @minmax() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %c6 = icmp slt <4 x i32*> undef, undef -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x i32*> undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-RECIP-LABEL: 'minmax' @@ -212,9 +214,9 @@ define void @minmax() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %c6 = icmp slt <4 x i32*> undef, undef -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x i32*> undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-RECIP-LABEL: 'minmax' @@ -227,9 +229,9 @@ define void @minmax() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c6 = icmp slt <4 x i32*> undef, undef -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x i32*> undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-SIZE-LABEL: 'minmax' @@ -242,7 +244,7 @@ define void @minmax() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x i32*> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -257,7 +259,7 @@ define void @minmax() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x i32*> undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -272,7 +274,7 @@ define void @minmax() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x i32*> undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -289,7 +291,7 @@ define void @minmax() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x i32*> undef, undef -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %c1 = icmp slt i8 undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/control-flow.ll b/llvm/test/Analysis/CostModel/ARM/control-flow.ll index d2d649d4df49..99651ee902eb 100644 --- a/llvm/test/Analysis/CostModel/ARM/control-flow.ll +++ b/llvm/test/Analysis/CostModel/ARM/control-flow.ll @@ -12,6 +12,8 @@ ; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-MVE-THROUGHPUT ; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=armv8a -mattr=+neon | FileCheck %s --check-prefix=CHECK-NEON-THROUGHPUT +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @simple_loop_cost(i32 %N) { ; CHECK-T1-SIZE-LABEL: 'simple_loop_cost' ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll index 06f6cce33463..dec91be1b603 100644 --- a/llvm/test/Analysis/CostModel/ARM/divrem.ll +++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll @@ -5,6 +5,8 @@ ; RUN: opt -cost-model -analyze -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE ; RUN: opt -cost-model -analyze -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @i8() { ; CHECK-NEON-LABEL: 'i8' ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i8 undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/fparith.ll b/llvm/test/Analysis/CostModel/ARM/fparith.ll index 3403ab25e490..5201afdbc7fd 100644 --- a/llvm/test/Analysis/CostModel/ARM/fparith.ll +++ b/llvm/test/Analysis/CostModel/ARM/fparith.ll @@ -2,6 +2,8 @@ ; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE ; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVEFP +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @f32() { ; CHECK-MVE-LABEL: 'f32' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd float undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/freeshift.ll b/llvm/test/Analysis/CostModel/ARM/freeshift.ll index dbaf90f81068..67f7088f97f5 100644 --- a/llvm/test/Analysis/CostModel/ARM/freeshift.ll +++ b/llvm/test/Analysis/CostModel/ARM/freeshift.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @shl(i32 %a, i32 %b) { ; CHECK-LABEL: 'shl' ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = shl i32 %a, 3 diff --git a/llvm/test/Analysis/CostModel/ARM/gep.ll b/llvm/test/Analysis/CostModel/ARM/gep.ll index 6d24f3d799fb..5390dd1b048e 100644 --- a/llvm/test/Analysis/CostModel/ARM/gep.ll +++ b/llvm/test/Analysis/CostModel/ARM/gep.ll @@ -7,6 +7,8 @@ ; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-T32 ; RUN: opt -cost-model -analyze -mtriple=arm-none-eabi -mcpu=cortex-a53 < %s | FileCheck %s --check-prefix=CHECK-A32 +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @testi8(i8* %a, i32 %i) { ; CHECK-V6M-LABEL: 'testi8' ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0 diff --git a/llvm/test/Analysis/CostModel/ARM/immediates.ll b/llvm/test/Analysis/CostModel/ARM/immediates.ll index 7be89cc9cf6f..692b64967b1a 100644 --- a/llvm/test/Analysis/CostModel/ARM/immediates.ll +++ b/llvm/test/Analysis/CostModel/ARM/immediates.ll @@ -6,6 +6,8 @@ ; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-THROUGHPUT ; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-T2-THROUGHPUT +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @const_costs() { ; CHECK-T1-SIZE-LABEL: 'const_costs' ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll index b8a6a9569c9a..a5630ecb181e 100644 --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -4,6 +4,8 @@ ; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -cost-model -analyze -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE ; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -cost-model -analyze -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + ; Test a cross-section of intrinsics for various cost-kinds. ; Other test files may check for accuracy of a particular intrinsic ; across subtargets or types. This is just a sanity check using an diff --git a/llvm/test/Analysis/CostModel/ARM/load_store.ll b/llvm/test/Analysis/CostModel/ARM/load_store.ll index 52f6e3cccac6..a8d3bd347fea 100644 --- a/llvm/test/Analysis/CostModel/ARM/load_store.ll +++ b/llvm/test/Analysis/CostModel/ARM/load_store.ll @@ -8,6 +8,8 @@ ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=CHECK-V8-SIZE ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @stores() { ; CHECK-NOVEC-LABEL: 'stores' ; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4 diff --git a/llvm/test/Analysis/CostModel/ARM/logicalop.ll b/llvm/test/Analysis/CostModel/ARM/logicalop.ll index ef12258d4824..7bf94456a47b 100644 --- a/llvm/test/Analysis/CostModel/ARM/logicalop.ll +++ b/llvm/test/Analysis/CostModel/ARM/logicalop.ll @@ -8,6 +8,8 @@ ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-SIZE ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @op() { ; Logical and/or - select's cost must be equivalent to that of binop ; CHECK-MVE-RECIP-LABEL: 'op' diff --git a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll index ace2d1bee070..a08ea0faaab9 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll @@ -2,6 +2,8 @@ ; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + declare i64 @llvm.abs.i64(i64, i1) declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1) declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) diff --git a/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll index ae23b3e3c449..c2550040c4b3 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + ; Note that these instructions like this (not in a look that could be tail ; predicated) should not really be free. We currently assume that all active ; lane masks are free. diff --git a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll index c2af5e1cacb5..0cb234241105 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll @@ -2,6 +2,8 @@ ; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVE ; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @icmp() { ; CHECK-LABEL: 'icmp' ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2i8 = icmp slt <2 x i8> undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll index c368991faea6..d555f468bc79 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @masked_gather() { ; CHECK-LABEL: 'masked_gather' ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 4, <4 x i1> undef, <4 x double> undef) @@ -28,7 +30,7 @@ define i32 @masked_gather() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> undef, i32 1, <4 x i1> undef, <4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> undef, i32 1, <2 x i1> undef, <2 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32p = call <4 x i32*> @llvm.masked.gather.v4p0i32.v4p0p0i32(<4 x i32**> undef, i32 4, <4 x i1> undef, <4 x i32*> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32p = call <4 x i32*> @llvm.masked.gather.v4p0i32.v4p0p0i32(<4 x i32**> undef, i32 4, <4 x i1> undef, <4 x i32*> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 4, <4 x i1> undef, <4 x double> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll index b075ba754904..15702657570a 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll @@ -4,6 +4,8 @@ ; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.-none-eabimain -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=MVE-RECIP,MVEF-RECIP ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=MVE-SIZE,MVEF-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + declare i64 @llvm.smin.i64(i64, i64) declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>) declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>) diff --git a/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll b/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll index c670d6f6baee..06e9a185d32a 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @add_i8() { ; CHECK-LABEL: 'add_i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll index 94bd13e39df5..a11cc123c83d 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll @@ -4,6 +4,8 @@ ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @reduce_i64(i32 %arg) { ; V8M-RECIP-LABEL: 'reduce_i64' ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-and.ll b/llvm/test/Analysis/CostModel/ARM/reduce-and.ll index 2bd23dd27719..66f2232f7b8b 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-and.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-and.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -cost-model -cost-kind=throughput -analyze | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @reduce_i1(i32 %arg) { ; CHECK-LABEL: 'reduce_i1' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-or.ll b/llvm/test/Analysis/CostModel/ARM/reduce-or.ll index c29f0cfd609d..a746653972e5 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-or.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-or.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -cost-model -cost-kind=throughput -analyze | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @reduce_i1(i32 %arg) { ; CHECK-LABEL: 'reduce_i1' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll index 5a72f3be05cc..642c4baf59de 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll @@ -3,6 +3,8 @@ ; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON ; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll index cd4da5435436..58f0296cc405 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll @@ -3,6 +3,8 @@ ; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON ; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll index 0447e3826ca5..0dc7f7fa5d9a 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll @@ -3,6 +3,8 @@ ; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON ; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll index 62510ae70f2b..68ad1d381863 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll @@ -3,6 +3,8 @@ ; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON ; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll index 173904d9e60b..05515e6943aa 100644 --- a/llvm/test/Analysis/CostModel/ARM/select.ll +++ b/llvm/test/Analysis/CostModel/ARM/select.ll @@ -8,6 +8,8 @@ ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-SIZE ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @selects() { ; Scalar values ; CHECK-MVE-RECIP-LABEL: 'selects' diff --git a/llvm/test/Analysis/CostModel/ARM/shuffle.ll b/llvm/test/Analysis/CostModel/ARM/shuffle.ll index 642f68fd2b98..4a7674c5dc6b 100644 --- a/llvm/test/Analysis/CostModel/ARM/shuffle.ll +++ b/llvm/test/Analysis/CostModel/ARM/shuffle.ll @@ -2,6 +2,8 @@ ; RUN: opt < %s -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE ; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @broadcast() { ; CHECK-MVE-LABEL: 'broadcast' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer diff --git a/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll b/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll index bb2cdde7422f..9f8e5bca8b3d 100644 --- a/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll @@ -4,6 +4,8 @@ ; RUN: opt < %s -cost-model -analyze -cost-kind=latency -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-THUMB2-LAT ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-THUMB2-SIZE +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @intrinsics() { ; CHECK-THUMB2-RECIP-LABEL: 'intrinsics' ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll index 8a8be764604a..ec3c6c79c347 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll @@ -4,6 +4,8 @@ ; RUN: opt -mtriple armv7-unknwon-darwin -loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=DARWIN ; REQUIRES: asserts +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + ; Testing the ability of the loop vectorizer to tell when SIMD is safe or not ; regarding IEEE 754 standard. ; On Linux, we only want the vectorizer to work when -ffast-math flag is set, diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index aac7e3751716..82e36d9af912 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-vectorize -force-vector-width=4 -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -tail-predication=force-enabled -S %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @test_stride1_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: @test_stride1_4i32( ; CHECK-NEXT: entry: @@ -73,56 +75,43 @@ define void @test_stride-1_4i32(i32* readonly %data, i32* noalias nocapture %dst ; CHECK-LABEL: @test_stride-1_4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP0]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 2, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 2, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 true, i1 [[TMP3]], i1 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP8]], -1 -; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i32 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 -3 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[TMP0]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -3 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> , [[REVERSE]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP18]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[REVERSE]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], -1 ; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]] -; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP20]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP12]] ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]] ; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll index 15c8483685b1..e08c3b141683 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll @@ -15,7 +15,7 @@ target triple = "thumbv8.1m.main-none-none-eabi" define void @test([101 x i32] *%src, i32 %N) #0 { entry: br label %for.body386 - + for.body386: ; preds = %entry, %l77 %add387 = phi i32 [ %inc532, %l77 ], [ 0, %entry ] %arrayidx388 = getelementptr inbounds [101 x i32], [101 x i32]* %src, i32 0, i32 %add387 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll index 3a8b17d2f8c6..0c514171f1a0 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll @@ -50,6 +50,8 @@ ; RUN: -enable-arm-maskedldst=true -S < %s | \ ; RUN: FileCheck %s -check-prefixes=CHECK +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @prefer_folding(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 { ; CHECK-LABEL: prefer_folding( ; PREFER-FOLDING: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll index 27a469142235..26f24fa6d243 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll @@ -27,7 +27,7 @@ define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocaptur ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) @@ -55,7 +55,7 @@ define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocaptur ; CHECK-NEXT: [[COND9]] = select i1 [[CMP4]], i32 [[TMP9]], i32 [[MIN_028]] ; CHECK-NEXT: [[INC]] = add nuw i32 [[I_029]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; entry: %cmp26.not = icmp eq i32 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll index ed12afca9b8d..4cb9e47e75a6 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll @@ -1,6 +1,8 @@ ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -S < %s | \ ; RUN: FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + ; Test that ARMTTIImpl::preferPredicateOverEpilogue triggers tail-folding. define dso_local void @f1(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) { diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll index be29b742e939..41622bde6531 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll @@ -1,6 +1,8 @@ ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -S < %s | \ ; RUN: FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + ; Check that loop hint predicate.enable loop can overrule the TTI hook. For ; this test case, the TTI hook rejects tail-predication: ; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index cdbc01c8a136..3753811ad65f 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -3,6 +3,8 @@ ; RUN: -tail-predication=enabled -loop-vectorize -S < %s | \ ; RUN: FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define void @trunc_not_allowed_different_vec_elemns(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i16* noalias nocapture %D) #0 { ; CHECK-LABEL: @trunc_not_allowed_different_vec_elemns( ; CHECK-NEXT: entry: @@ -272,59 +274,47 @@ for.body: define void @strides_different_direction(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) #0 { ; CHECK-LABEL: @strides_different_direction( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 430) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[N]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], [[N]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP0]], [[N]] -; CHECK-NEXT: [[TMP4:%.*]] = select i1 true, i1 [[TMP2]], i1 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 false, [[TMP5]] -; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = sub nsw i32 [[N]], [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 -3 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 [[N:%.*]], [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 -3 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[REVERSE]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[REVERSE]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 431, 428 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 428, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 428, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[I_09]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[N]], [[I_09]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[SUB]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_09]] ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD3]] = add nuw nsw i32 [[I_09]], 1 @@ -553,24 +543,62 @@ define dso_local void @select_not_allowed(i32* noalias nocapture %A, i32* noalia ; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32*> poison, i32* [[C:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32*> [[BROADCAST_SPLATINSERT]], <4 x i32*> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32*> poison, i32* [[B:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32*> [[BROADCAST_SPLATINSERT1]], <4 x i32*> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32*> [[BROADCAST_SPLAT]], <4 x i32*> [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, <4 x i32*> [[TMP8]], <4 x i32> [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP9]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i32 [[I_011]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 -; CHECK-NEXT: [[C_B:%.*]] = select i1 [[TOBOOL_NOT]], i32* [[C:%.*]], i32* [[B:%.*]] +; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND]], i32 [[I_011]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP14]], 0 +; CHECK-NEXT: [[C_B:%.*]] = select i1 [[TOBOOL_NOT]], i32* [[C]], i32* [[B]] ; CHECK-NEXT: [[COND_IN:%.*]] = getelementptr inbounds i32, i32* [[C_B]], i32 [[I_011]] ; CHECK-NEXT: [[COND:%.*]] = load i32, i32* [[COND_IN]], align 4 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_011]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_011]] ; CHECK-NEXT: store i32 [[COND]], i32* [[ARRAYIDX3]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_011]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; entry: %cmp10 = icmp sgt i32 %N, 0 @@ -624,7 +652,7 @@ define i32 @i32_smin_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -642,7 +670,7 @@ define i32 @i32_smin_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -694,7 +722,7 @@ define i32 @i32_smax_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -712,7 +740,7 @@ define i32 @i32_smax_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -764,7 +792,7 @@ define i32 @i32_umin_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -782,7 +810,7 @@ define i32 @i32_umin_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -834,7 +862,7 @@ define i32 @i32_umax_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -852,7 +880,7 @@ define i32 @i32_umax_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll index 605d82c71f66..96ef73e34d22 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll @@ -5,6 +5,8 @@ ; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | \ ; RUN: FileCheck -check-prefix=PREDFLAG %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + ; This test has a loop hint "predicate.predicate" set to false, so shouldn't ; get tail-folded, except with -prefer-predicate-over-epilog which then ; overrules this. diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll index 67ee95af0d09..ce04378d1d71 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll @@ -7,6 +7,8 @@ ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=force-enabled-no-reductions -S < %s | \ ; RUN: FileCheck %s -check-prefixes=COMMON,NORED +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + ; Check that this reduction is allowed, except when reductions are disable on ; the command line. ;