forked from OSchip/llvm-project
[SVE][Analysis]Instruction costs for ops on scalable-vec
The following operations have no associated cost for them when applied to scalable vectors, and as a consequence can trigger a crash when a call is made to AArch64TTIImpl::getCastInstrCost(): - fptrunc - trunc - fpext - fpto(u,s)i This patch adds costs for these operations and relevant regression tests. Differential Revision: https://reviews.llvm.org/D98934
This commit is contained in:
parent
66a8247eb6
commit
19601a4c6c
|
@ -391,6 +391,23 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
|
||||
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
|
||||
|
||||
// Truncations on nxvmiN
|
||||
{ ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 1 },
|
||||
{ ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 1 },
|
||||
{ ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 1 },
|
||||
{ ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 2 },
|
||||
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 1 },
|
||||
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 3 },
|
||||
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 5 },
|
||||
{ ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 1 },
|
||||
{ ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::nxv4i32, MVT::nxv4i64, 2 },
|
||||
{ ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i32, 3 },
|
||||
{ ISD::TRUNCATE, MVT::nxv8i32, MVT::nxv8i64, 6 },
|
||||
|
||||
// The number of shll instructions for the extension.
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
|
@ -472,6 +489,23 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
|
||||
|
||||
// Lowering scalable
|
||||
{ ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
|
||||
|
||||
|
||||
// Complex, from nxv2f32 legal type is nxv2i32 (no cost) or nxv2i64 (1 ext)
|
||||
{ ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f32, 1 },
|
||||
|
||||
// Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
|
||||
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
|
||||
|
@ -479,6 +513,75 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
|
||||
|
||||
// Complex, from nxv2f64: legal type is nxv2i32, 1 narrowing => ~2.
|
||||
{ ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f64, 2 },
|
||||
|
||||
// Complex, from nxv4f32 legal type is nxv4i16, 1 narrowing => ~2
|
||||
{ ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f32, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f32, 2 },
|
||||
|
||||
// Complex, from nxv8f64: legal type is nxv8i32, 1 narrowing => ~2.
|
||||
{ ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f64, 2 },
|
||||
|
||||
// Complex, from nxv4f64: legal type is nxv4i32, 1 narrowing => ~2.
|
||||
{ ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f64, 2 },
|
||||
|
||||
// Complex, from nxv8f32: legal type is nxv8i32 (no cost) or nxv8i64 (1 ext).
|
||||
{ ISD::FP_TO_SINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f32, 3 },
|
||||
{ ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f32, 1 },
|
||||
|
||||
// Truncate from nxvmf32 to nxvmf16.
|
||||
{ ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f32, 1 },
|
||||
{ ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1 },
|
||||
{ ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3 },
|
||||
|
||||
// Truncate from nxvmf64 to nxvmf16.
|
||||
{ ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1 },
|
||||
{ ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3 },
|
||||
{ ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7 },
|
||||
|
||||
// Truncate from nxvmf64 to nxvmf32.
|
||||
{ ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1 },
|
||||
{ ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3 },
|
||||
{ ISD::FP_ROUND, MVT::nxv8f32, MVT::nxv8f64, 6 },
|
||||
|
||||
// Extend from nxvmf16 to nxvmf32.
|
||||
{ ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
|
||||
{ ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
|
||||
{ ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
|
||||
|
||||
// Extend from nxvmf16 to nxvmf64.
|
||||
{ ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
|
||||
{ ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
|
||||
{ ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
|
||||
|
||||
// Extend from nxvmf32 to nxvmf64.
|
||||
{ ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
|
||||
{ ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
|
||||
{ ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
|
||||
|
||||
};
|
||||
|
||||
if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
define void @sve_fpext() {
|
||||
;CHECK-LABEL: 'sve_fpext'
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
|
||||
%nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
|
||||
%nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
|
||||
%nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
|
||||
|
||||
%nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
|
||||
%nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
|
||||
%nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
|
||||
|
||||
%nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
|
||||
%nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
|
||||
%nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
|
||||
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -o - -S < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
define void @sve-fptoi() {
|
||||
;CHECK-LABEL: 'sve-fptoi'
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
|
||||
|
||||
%nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
|
||||
%nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
|
||||
%nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
|
||||
%nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
|
||||
%nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
|
||||
%nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
|
||||
|
||||
%nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
|
||||
%nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
|
||||
%nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
|
||||
%nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
|
||||
%nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
|
||||
%nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
|
||||
|
||||
%nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
|
||||
%nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
|
||||
%nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
|
||||
%nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
|
||||
%nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
|
||||
%nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
|
||||
|
||||
%nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
|
||||
%nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
|
||||
%nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
|
||||
%nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
|
||||
%nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
|
||||
%nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
|
||||
|
||||
%nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
|
||||
%nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
|
||||
%nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
|
||||
%nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
|
||||
%nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
|
||||
%nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
|
||||
|
||||
%nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
|
||||
%nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
|
||||
%nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
|
||||
%nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
|
||||
%nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
|
||||
%nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
|
||||
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s 2>%t| FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
define void @sve_fptruncs() {
|
||||
;CHECK-LABEL: 'sve_fptruncs'
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
|
||||
%nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
|
||||
%nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
|
||||
%nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
|
||||
|
||||
%nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
|
||||
%nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
|
||||
%nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
|
||||
|
||||
%nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
|
||||
%nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
|
||||
%nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
|
||||
|
||||
ret void
|
||||
}
|
|
@ -4,11 +4,43 @@
|
|||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
; CHECK: Found an estimated cost of 0 for instruction: %0 = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
|
||||
define void @sve_truncs() {
|
||||
;CHECK-LABEL: 'sve_truncs'
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
|
||||
;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
|
||||
%trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
|
||||
%trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
|
||||
%trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
|
||||
|
||||
%trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
|
||||
%trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
|
||||
%trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
|
||||
|
||||
%trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
|
||||
%trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
|
||||
%trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
|
||||
|
||||
%trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
|
||||
%trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
|
||||
|
||||
%trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
|
||||
%trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
|
||||
|
||||
%trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
|
||||
%trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
|
||||
|
||||
define void @trunc_nxv2i64_to_nxv2i32(<vscale x 2 x i32>* %ptr, <vscale x 2 x i64> %v) {
|
||||
entry:
|
||||
%0 = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
|
||||
store <vscale x 2 x i32> %0, <vscale x 2 x i32>* %ptr
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue