[SVE][Analysis]Instruction costs for ops on scalable-vec

The following operations have no associated cost for them
when applied to scalable vectors, and as a consequence
can trigger a crash when a call is made to
AArch64TTIImpl::getCastInstrCost():
- fptrunc
- trunc
- fpext
- fpto(u,s)i

This patch adds costs for these operations and
relevant regression tests.

Differential Revision: https://reviews.llvm.org/D98934
This commit is contained in:
Nashe Mncube 2021-03-17 12:00:31 +00:00
parent 66a8247eb6
commit 19601a4c6c
5 changed files with 297 additions and 5 deletions

View File

@ -391,6 +391,23 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
// Truncations on nxvmiN
{ ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 1 },
{ ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 1 },
{ ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 1 },
{ ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 1 },
{ ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 1 },
{ ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 2 },
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 1 },
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 3 },
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 5 },
{ ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 1 },
{ ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 1 },
{ ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 1 },
{ ISD::TRUNCATE, MVT::nxv4i32, MVT::nxv4i64, 2 },
{ ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i32, 3 },
{ ISD::TRUNCATE, MVT::nxv8i32, MVT::nxv8i64, 6 },
// The number of shll instructions for the extension.
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
@ -472,6 +489,23 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
// Lowering scalable
{ ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
{ ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
{ ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
{ ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
{ ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
{ ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
// Complex, from nxv2f32 legal type is nxv2i32 (no cost) or nxv2i64 (1 ext)
{ ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
{ ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
{ ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f32, 1 },
{ ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
{ ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
{ ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f32, 1 },
// Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
@ -479,6 +513,75 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
// Complex, from nxv2f64: legal type is nxv2i32, 1 narrowing => ~2.
{ ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
{ ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
{ ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f64, 2 },
{ ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
{ ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
{ ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f64, 2 },
// Complex, from nxv4f32 legal type is nxv4i16, 1 narrowing => ~2
{ ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
{ ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f32, 2 },
{ ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
{ ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f32, 2 },
// Complex, from nxv8f64: legal type is nxv8i32, 1 narrowing => ~2.
{ ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
{ ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
{ ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f64, 2 },
{ ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
{ ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
{ ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f64, 2 },
// Complex, from nxv4f64: legal type is nxv4i32, 1 narrowing => ~2.
{ ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
{ ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
{ ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f64, 2 },
{ ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
{ ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
{ ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f64, 2 },
// Complex, from nxv8f32: legal type is nxv8i32 (no cost) or nxv8i64 (1 ext).
{ ISD::FP_TO_SINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
{ ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f32, 3 },
{ ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f32, 1 },
{ ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
{ ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f32, 1 },
{ ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f32, 1 },
// Truncate from nxvmf32 to nxvmf16.
{ ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f32, 1 },
{ ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1 },
{ ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3 },
// Truncate from nxvmf64 to nxvmf16.
{ ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1 },
{ ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3 },
{ ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7 },
// Truncate from nxvmf64 to nxvmf32.
{ ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1 },
{ ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3 },
{ ISD::FP_ROUND, MVT::nxv8f32, MVT::nxv8f64, 6 },
// Extend from nxvmf16 to nxvmf32.
{ ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
{ ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
{ ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
// Extend from nxvmf16 to nxvmf64.
{ ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
{ ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
{ ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
// Extend from nxvmf32 to nxvmf64.
{ ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
{ ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
{ ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
};
if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,

View File

@ -0,0 +1,33 @@
; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
define void @sve_fpext() {
;CHECK-LABEL: 'sve_fpext'
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
%nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
%nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
%nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
%nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
%nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
%nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
%nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
%nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
%nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
ret void
}

View File

@ -0,0 +1,91 @@
; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -o - -S < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
define void @sve-fptoi() {
;CHECK-LABEL: 'sve-fptoi'
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
%nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
%nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
%nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
%nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
%nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
%nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
%nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
%nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
%nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
%nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
%nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
%nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
%nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
%nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
%nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
%nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
%nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
%nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
%nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
%nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
%nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
%nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
%nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
%nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
%nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
%nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
%nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
%nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
%nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
%nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
%nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
%nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
%nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
%nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
%nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
%nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
ret void
}

View File

@ -0,0 +1,33 @@
; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s 2>%t| FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
define void @sve_fptruncs() {
;CHECK-LABEL: 'sve_fptruncs'
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
;CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
%nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
%nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
%nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
%nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
%nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
%nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
%nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
%nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
%nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
ret void
}

View File

@ -4,11 +4,43 @@
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
; CHECK: Found an estimated cost of 0 for instruction: %0 = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
define void @sve_truncs() {
;CHECK-LABEL: 'sve_truncs'
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
%trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
%trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
%trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
%trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
%trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
%trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
%trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
%trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
%trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
%trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
%trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
%trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
%trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
%trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
%trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
define void @trunc_nxv2i64_to_nxv2i32(<vscale x 2 x i32>* %ptr, <vscale x 2 x i64> %v) {
entry:
%0 = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
store <vscale x 2 x i32> %0, <vscale x 2 x i32>* %ptr
ret void
}