Implement correct cost for SVE bitcasts

We have some bitcasts which we know will be simplified, so their cost is zero. Reviewed By: david-arm, sdesmalen Differential Revision: https://reviews.llvm.org/D118019
2022-01-26 13:33:38 +00:00 · 2022-01-26 13:33:38 +00:00 · 2feddb37b4
parent ecf7a0e338
commit 2feddb37b4
2 changed files with 64 additions and 2 deletions
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -1590,6 +1590,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
    { ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
    { ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},

+    // Bitcasts from float to integer
+    { ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0 },
+    { ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0 },
+    { ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0 },
+
+    // Bitcasts from integer to float
+    { ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0 },
+    { ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0 },
+    { ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0 },
  };

  if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
--- a/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll
@ -1,8 +1,61 @@
 ; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -cost-model -analyze < %s | FileCheck %s

-; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x double> %a to <vscale x 2 x i64>
+; Integer to float bitcasts

-define <vscale x 2 x i64> @foo(<vscale x 2 x double> %a, i32 %x) {
+define <vscale x 2 x double> @test_nxv2f64(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: test_nxv2f64
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x i64> %a to <vscale x 2 x double>
+  %b = bitcast <vscale x 2 x i64> %a to <vscale x 2 x double>
+  ret <vscale x 2 x double> %b
+}
+
+define <vscale x 2 x half> @test_nxv2f16(<vscale x 2 x i16> %a) {
+; CHECK-LABEL: test_nxv2f16
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x i16> %a to <vscale x 2 x half>
+  %b = bitcast <vscale x 2 x i16> %a to <vscale x 2 x half>
+  ret <vscale x 2 x half> %b
+}
+
+define <vscale x 4 x half> @test_nxv4f16(<vscale x 4 x i16> %a) {
+; CHECK-LABEL: test_nxv4f16
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 4 x i16> %a to <vscale x 4 x half>
+  %b = bitcast <vscale x 4 x i16> %a to <vscale x 4 x half>
+  ret <vscale x 4 x half> %b
+}
+
+define <vscale x 2 x float> @test_nxv2f32(<vscale x 2 x i32> %a) {
+; CHECK-LABEL: test_nxv2f32
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x i32> %a to <vscale x 2 x float>
+  %b = bitcast <vscale x 2 x i32> %a to <vscale x 2 x float>
+  ret <vscale x 2 x float> %b
+}
+
+; Float to integer bitcasts
+
+define <vscale x 2 x i64> @test_nxv2i64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: test_nxv2i64
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x double> %a to <vscale x 2 x i64>
  %b = bitcast <vscale x 2 x double> %a to <vscale x 2 x i64>
  ret <vscale x 2 x i64> %b
 }
+
+define <vscale x 2 x i16> @test_nxv2i16(<vscale x 2 x half> %a) {
+; CHECK-LABEL: test_nxv2i16
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x half> %a to <vscale x 2 x i16>
+  %b = bitcast <vscale x 2 x half> %a to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %b
+}
+
+define <vscale x 4 x i16> @test_nxv4i16(<vscale x 4 x half> %a) {
+; CHECK-LABEL: test_nxv4i16
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 4 x half> %a to <vscale x 4 x i16>
+  %b = bitcast <vscale x 4 x half> %a to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %b
+}
+
+define <vscale x 2 x i32> @test_nxv2i32(<vscale x 2 x float> %a) {
+; CHECK-LABEL: test_nxv2i32
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x float> %a to <vscale x 2 x i32>
+  %b = bitcast <vscale x 2 x float> %a to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %b
+}