[SVE][Analysis]Instruction costs for ops on scalable-vec

The following operations have no associated cost for them when applied to scalable vectors, and as a consequence can trigger a crash when a call is made to AArch64TTIImpl::getCastInstrCost(): - fptrunc - trunc - fpext - fpto(u,s)i This patch adds costs for these operations and relevant regression tests. Differential Revision: https://reviews.llvm.org/D98934
2021-03-17 12:00:31 +00:00 · 2021-03-17 12:00:31 +00:00 · 19601a4c6c
parent 66a8247eb6
commit 19601a4c6c
5 changed files with 297 additions and 5 deletions
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -391,6 +391,23 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    { ISD::TRUNCATE, MVT::v8i8,  MVT::v8i32,  3 },
    { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },

+    // Truncations on nxvmiN
+    { ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 1 },
+    { ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 1 },
+    { ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 1 },
+    { ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 1 },
+    { ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 1 },
+    { ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 2 },
+    { ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 1 },
+    { ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 3 },
+    { ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 5 },
+    { ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 1 },
+    { ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 1 },
+    { ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 1 },
+    { ISD::TRUNCATE, MVT::nxv4i32, MVT::nxv4i64, 2 },
+    { ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i32, 3 },
+    { ISD::TRUNCATE, MVT::nxv8i32, MVT::nxv8i64, 6 },
+
    // The number of shll instructions for the extension.
    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i16, 3 },
    { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i16, 3 },
@ -472,6 +489,23 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
    { ISD::FP_TO_UINT, MVT::v4i8,  MVT::v4f32, 2 },

+    // Lowering scalable
+    { ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
+    { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
+    { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
+
+
+    // Complex, from nxv2f32 legal type is nxv2i32 (no cost) or nxv2i64 (1 ext)
+    { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
+    { ISD::FP_TO_SINT, MVT::nxv2i8,  MVT::nxv2f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv2i8,  MVT::nxv2f32, 1 },
+
    // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
    { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
    { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
@ -479,6 +513,75 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
    { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
    { ISD::FP_TO_UINT, MVT::v2i8,  MVT::v2f64, 2 },
+
+    // Complex, from nxv2f64: legal type is nxv2i32, 1 narrowing => ~2.
+    { ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv2i8,  MVT::nxv2f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv2i8,  MVT::nxv2f64, 2 },
+
+    // Complex, from nxv4f32 legal type is nxv4i16, 1 narrowing => ~2
+    { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv4i8,  MVT::nxv4f32, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv4i8,  MVT::nxv4f32, 2 },
+
+    // Complex, from nxv8f64: legal type is nxv8i32, 1 narrowing => ~2.
+    { ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv8i8,  MVT::nxv8f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv8i8,  MVT::nxv8f64, 2 },
+
+    // Complex, from nxv4f64: legal type is nxv4i32, 1 narrowing => ~2.
+    { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv4i8,  MVT::nxv4f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv4i8,  MVT::nxv4f64, 2 },
+
+    // Complex, from nxv8f32: legal type is nxv8i32 (no cost) or nxv8i64 (1 ext).
+    { ISD::FP_TO_SINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
+    { ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f32, 3 },
+    { ISD::FP_TO_SINT, MVT::nxv8i8,  MVT::nxv8f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
+    { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f32, 1 },
+    { ISD::FP_TO_UINT, MVT::nxv8i8,  MVT::nxv8f32, 1 },
+
+    // Truncate from nxvmf32 to nxvmf16.
+    { ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f32, 1 },
+    { ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1 },
+    { ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3 },
+
+    // Truncate from nxvmf64 to nxvmf16.
+    { ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1 },
+    { ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3 },
+    { ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7 },
+
+    // Truncate from nxvmf64 to nxvmf32.
+    { ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1 },
+    { ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3 },
+    { ISD::FP_ROUND, MVT::nxv8f32, MVT::nxv8f64, 6 },
+
+    // Extend from nxvmf16 to nxvmf32.
+    { ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
+    { ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
+    { ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
+
+    // Extend from nxvmf16 to nxvmf64.
+    { ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
+    { ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
+    { ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
+
+    // Extend from nxvmf32 to nxvmf64.
+    { ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
+    { ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
+    { ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
+
  };

  if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
@ -0,0 +1,33 @@
+; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @sve_fpext() {
+  ;CHECK-LABEL: 'sve_fpext'
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+  %nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
+  %nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
+  %nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
+
+  %nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
+  %nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
+  %nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
+
+  %nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+  %nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+  %nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+
+  ret void
+}
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
@ -0,0 +1,91 @@
+; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -o - -S < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @sve-fptoi() {
+  ;CHECK-LABEL: 'sve-fptoi'
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:   %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:   %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
+
+  %nv2f32_to_si8  = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
+  %nv2f32_to_ui8  = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
+  %nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+  %nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+  %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
+  %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
+
+  %nv2f64_to_si8  = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
+  %nv2f64_to_ui8  = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
+  %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
+  %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
+  %nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
+  %nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
+
+  %nv4f32_to_si8  = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
+  %nv4f32_to_ui8  = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
+  %nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+  %nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+  %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
+  %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
+
+  %nv4f64_to_si8  = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
+  %nv4f64_to_ui8  = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
+  %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
+  %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
+  %nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
+  %nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
+
+  %nv8f32_to_si8  = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
+  %nv8f32_to_ui8  = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
+  %nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+  %nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+  %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
+  %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
+
+  %nv8f64_to_si8  = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
+  %nv8f64_to_ui8  = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
+  %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
+  %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
+  %nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
+  %nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
+
+  ret void
+}
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
@ -0,0 +1,33 @@
+; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s  2>%t| FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @sve_fptruncs() {
+  ;CHECK-LABEL: 'sve_fptruncs'
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction:   %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+  %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
+  %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
+  %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
+
+  %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
+  %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
+  %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
+
+  %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+  %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+  %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+
+  ret void
+}
--- a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
@ -4,11 +4,43 @@
 ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning

-; CHECK: Found an estimated cost of 0 for instruction:   %0 = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
+define void @sve_truncs() {
+  ;CHECK-LABEL: 'sve_truncs'
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:   %trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
+  ;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
+  %trunc_v2i16_to_i1  = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
+  %trunc_v2i32_to_i1  = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
+  %trunc_v2i64_to_i1  = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+
+  %trunc_v4i16_to_i1  = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
+  %trunc_v4i32_to_i1  = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+  %trunc_v4i64_to_i1  = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+
+  %trunc_v8i16_to_i1  = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+  %trunc_v8i32_to_i1  = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+  %trunc_v8i64_to_i1  = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+
+  %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
+  %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
+
+  %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
+  %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+
+  %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x  8 x i16>
+  %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x  8 x i32>

-define void @trunc_nxv2i64_to_nxv2i32(<vscale x 2 x i32>* %ptr, <vscale x 2 x i64> %v) {
-entry:
-  %0 = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
-  store <vscale x 2 x i32> %0, <vscale x 2 x i32>* %ptr
  ret void
 }