[AArch64] Add some float -> int -> float conversion patterns

This adds some conversion match patterns for which we want to keep the int values in FP registers using the corresponding NEON instructions (not the FP instructions) to avoid more costly int <-> fp register transfers. Differential Revision: https://reviews.llvm.org/D98956
2021-03-19 14:16:17 +00:00 · 2021-03-19 14:16:17 +00:00 · 7515e81e8c
parent 02b51e5316
commit 7515e81e8c
2 changed files with 95 additions and 0 deletions
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@ -4746,6 +4746,27 @@ def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
 def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
          (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;

+// Some float -> int -> float conversion patterns for which we want to keep the
+// int values in FP registers using the corresponding NEON instructions to
+// avoid more costly int <-> fp register transfers.
+let Predicates = [HasNEON] in {
+def : Pat<(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
+          (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
+def : Pat<(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
+          (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
+def : Pat<(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
+          (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
+def : Pat<(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
+          (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
+
+let Predicates = [HasFullFP16] in {
+def : Pat<(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
+          (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
+def : Pat<(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
+          (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
+}
+}
+
 // If an integer is about to be converted to a floating point value,
 // just load it on the floating point unit.
 // Here are the patterns for 8 and 16-bits to float.
--- a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -o - %s  -mattr=+neon,+fullfp16 | FileCheck %s
+
+define double @t1(double %x) {
+; CHECK-LABEL: t1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    scvtf d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %conv = fptosi double %x to i64
+  %conv1 = sitofp i64 %conv to double
+  ret double %conv1
+}
+
+define float @t2(float %x) {
+; CHECK-LABEL: t2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    scvtf s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %conv = fptosi float %x to i32
+  %conv1 = sitofp i32 %conv to float
+  ret float %conv1
+}
+
+define half @t3(half %x)  {
+; CHECK-LABEL: t3:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs h0, h0
+; CHECK-NEXT:    scvtf h0, h0
+; CHECK-NEXT:    ret
+entry:
+  %conv = fptosi half %x to i32
+  %conv1 = sitofp i32 %conv to half
+  ret half %conv1
+}
+
+define double @t4(double %x) {
+; CHECK-LABEL: t4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ucvtf d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %conv = fptoui double %x to i64
+  %conv1 = uitofp i64 %conv to double
+  ret double %conv1
+}
+
+define float @t5(float %x) {
+; CHECK-LABEL: t5:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ucvtf s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %conv = fptoui float %x to i32
+  %conv1 = uitofp i32 %conv to float
+  ret float %conv1
+}
+
+define half @t6(half %x)  {
+; CHECK-LABEL: t6:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu h0, h0
+; CHECK-NEXT:    ucvtf h0, h0
+; CHECK-NEXT:    ret
+entry:
+  %conv = fptoui half %x to i32
+  %conv1 = uitofp i32 %conv to half
+  ret half %conv1
+}