[RISCV] Add scalable vector fcmp ISel patterns

Original patch by @rogfer01. All ordered comparisons except ONE are supported natively, and all unordered comparisons except UNE are expanded into sequences involving explicit NaN checks and mask arithmetic. Additionally, we expand GT,OGT,GE,OGE to their swapped-operand versions, and pattern-match those back to the "original", swapping operands once more. This way we catch both operations and both "vf" and "fv" forms with fewer patterns. Also add support for floating-point splat_vector, with an optimization for splatting fpimm0. Authored-by: Roger Ferrer Ibanez <rofirrim@gmail.com> Co-Authored-by: Fraser Cormack <fraser@codeplay.com> Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D94242
2021-01-06 11:25:25 +00:00 · 2021-01-06 11:25:25 +00:00 · 37b41bd087
parent 195728c75a
commit 37b41bd087
5 changed files with 5716 additions and 0 deletions
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@ -374,6 +374,48 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
    // We must custom-lower SPLAT_VECTOR vXi64 on RV32
    if (!Subtarget.is64Bit())
      setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
+
+    // Expand various CCs to best match the RVV ISA, which natively supports UNE
+    // but no other unordered comparisons, and supports all ordered comparisons
+    // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
+    // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
+    // and we pattern-match those back to the "original", swapping operands once
+    // more. This way we catch both operations and both "vf" and "fv" forms with
+    // fewer patterns.
+    ISD::CondCode VFPCCToExpand[] = {
+        ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
+        ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
+        ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
+    };
+
+    if (Subtarget.hasStdExtZfh()) {
+      for (auto VT : {RISCVVMVTs::vfloat16mf4_t, RISCVVMVTs::vfloat16mf2_t,
+                      RISCVVMVTs::vfloat16m1_t, RISCVVMVTs::vfloat16m2_t,
+                      RISCVVMVTs::vfloat16m4_t, RISCVVMVTs::vfloat16m8_t}) {
+        setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+        for (auto CC : VFPCCToExpand)
+          setCondCodeAction(CC, VT, Expand);
+      }
+    }
+
+    if (Subtarget.hasStdExtF()) {
+      for (auto VT : {RISCVVMVTs::vfloat32mf2_t, RISCVVMVTs::vfloat32m1_t,
+                      RISCVVMVTs::vfloat32m2_t, RISCVVMVTs::vfloat32m4_t,
+                      RISCVVMVTs::vfloat32m8_t}) {
+        setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+        for (auto CC : VFPCCToExpand)
+          setCondCodeAction(CC, VT, Expand);
+      }
+    }
+
+    if (Subtarget.hasStdExtD()) {
+      for (auto VT : {RISCVVMVTs::vfloat64m1_t, RISCVVMVTs::vfloat64m2_t,
+                      RISCVVMVTs::vfloat64m4_t, RISCVVMVTs::vfloat64m8_t}) {
+        setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+        for (auto CC : VFPCCToExpand)
+          setCondCodeAction(CC, VT, Expand);
+      }
+    }
  }

  // Function alignments.
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@ -192,6 +192,45 @@ multiclass VPatIntegerSetCCSDNode_VX_VI<CondCode cc,
                                   SplatPat_simm5, simm5, swap>;
 }

+multiclass VPatFPSetCCSDNode_VV<CondCode cc, string instruction_name> {
+  foreach fvti = AllFloatVectors in
+    def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
+                                (fvti.Vector fvti.RegClass:$rs2),
+                                cc)),
+              (!cast<Instruction>(instruction_name#"_VV_"#fvti.LMul.MX)
+                  fvti.RegClass:$rs1, fvti.RegClass:$rs2, VLMax, fvti.SEW)>;
+}
+
+multiclass VPatFPSetCCSDNode_VF<CondCode cc, string instruction_name> {
+  foreach fvti = AllFloatVectors in
+    def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
+                                (fvti.Vector (splat_vector fvti.ScalarRegClass:$rs2)),
+                                cc)),
+              (!cast<Instruction>(instruction_name#"_VF_"#fvti.LMul.MX)
+                  fvti.RegClass:$rs1,
+                  ToFPR32<fvti.Scalar, fvti.ScalarRegClass, "rs2">.ret,
+                  VLMax, fvti.SEW)>;
+}
+
+multiclass VPatFPSetCCSDNode_FV<CondCode cc, string swapped_op_instruction_name> {
+  foreach fvti = AllFloatVectors in
+    def : Pat<(fvti.Mask (setcc (fvti.Vector (splat_vector fvti.ScalarRegClass:$rs2)),
+                                (fvti.Vector fvti.RegClass:$rs1),
+                                cc)),
+              (!cast<Instruction>(swapped_op_instruction_name#"_VF_"#fvti.LMul.MX)
+                  fvti.RegClass:$rs1,
+                  ToFPR32<fvti.Scalar, fvti.ScalarRegClass, "rs2">.ret,
+                  VLMax, fvti.SEW)>;
+}
+
+multiclass VPatFPSetCCSDNode_VV_VF_FV<CondCode cc,
+                                      string inst_name,
+                                      string swapped_op_inst_name> {
+  defm : VPatFPSetCCSDNode_VV<cc, inst_name>;
+  defm : VPatFPSetCCSDNode_VF<cc, inst_name>;
+  defm : VPatFPSetCCSDNode_FV<cc, swapped_op_inst_name>;
+}
+
 //===----------------------------------------------------------------------===//
 // Patterns.
 //===----------------------------------------------------------------------===//
@ -299,6 +338,23 @@ foreach mti = AllMasks in {

 } // Predicates = [HasStdExtV]

+let Predicates = [HasStdExtV, HasStdExtF] in {
+
+// 14.11. Vector Floating-Point Compare Instructions
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETEQ,  "PseudoVMFEQ", "PseudoVMFEQ">;
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOEQ, "PseudoVMFEQ", "PseudoVMFEQ">;
+
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETNE,  "PseudoVMFNE", "PseudoVMFNE">;
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETUNE, "PseudoVMFNE", "PseudoVMFNE">;
+
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETLT,  "PseudoVMFLT", "PseudoVMFGT">;
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOLT, "PseudoVMFLT", "PseudoVMFGT">;
+
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETLE,  "PseudoVMFLE", "PseudoVMFGE">;
+defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
+
+} // Predicates = [HasStdExtV, HasStdExtF]
+
 //===----------------------------------------------------------------------===//
 // Vector Splats
 //===----------------------------------------------------------------------===//
@ -333,3 +389,16 @@ foreach vti = AllIntegerVectors in {
  }
 }
 } // Predicates = [HasStdExtV, IsRV32]
+
+let Predicates = [HasStdExtV, HasStdExtF] in {
+foreach fvti = AllFloatVectors in {
+  def : Pat<(fvti.Vector (splat_vector fvti.ScalarRegClass:$rs1)),
+            (!cast<Instruction>("PseudoVFMV_V_F_"#fvti.LMul.MX)
+              ToFPR32<fvti.Scalar, fvti.ScalarRegClass, "rs1">.ret,
+              VLMax, fvti.SEW)>;
+
+  def : Pat<(fvti.Vector (splat_vector (fvti.Scalar fpimm0))),
+            (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
+              0, VLMax, fvti.SEW)>;
+}
+} // Predicates = [HasStdExtV, HasStdExtF]
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+experimental-zfh,+experimental-v -target-abi ilp32d -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=RV32V
+; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+experimental-zfh,+experimental-v -target-abi lp64d -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=RV64V
+
+define <vscale x 8 x half> @vsplat_nxv8f16(half %f) {
+; RV32V-LABEL: vsplat_nxv8f16:
+; RV32V:       # %bb.0:
+; RV32V-NEXT:    # kill: def $f10_h killed $f10_h def $f10_f
+; RV32V-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV32V-NEXT:    vfmv.v.f v16, fa0
+; RV32V-NEXT:    ret
+;
+; RV64V-LABEL: vsplat_nxv8f16:
+; RV64V:       # %bb.0:
+; RV64V-NEXT:    # kill: def $f10_h killed $f10_h def $f10_f
+; RV64V-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV64V-NEXT:    vfmv.v.f v16, fa0
+; RV64V-NEXT:    ret
+  %head = insertelement <vscale x 8 x half> undef, half %f, i32 0
+  %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %splat
+}
+
+define <vscale x 8 x half> @vsplat_zero_nxv8f16() {
+; RV32V-LABEL: vsplat_zero_nxv8f16:
+; RV32V:       # %bb.0:
+; RV32V-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV32V-NEXT:    vmv.v.i v16, 0
+; RV32V-NEXT:    ret
+;
+; RV64V-LABEL: vsplat_zero_nxv8f16:
+; RV64V:       # %bb.0:
+; RV64V-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV64V-NEXT:    vmv.v.i v16, 0
+; RV64V-NEXT:    ret
+  %head = insertelement <vscale x 8 x half> undef, half zeroinitializer, i32 0
+  %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %splat
+}
+
+define <vscale x 8 x float> @vsplat_nxv8f32(float %f) {
+; RV32V-LABEL: vsplat_nxv8f32:
+; RV32V:       # %bb.0:
+; RV32V-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV32V-NEXT:    vfmv.v.f v16, fa0
+; RV32V-NEXT:    ret
+;
+; RV64V-LABEL: vsplat_nxv8f32:
+; RV64V:       # %bb.0:
+; RV64V-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV64V-NEXT:    vfmv.v.f v16, fa0
+; RV64V-NEXT:    ret
+  %head = insertelement <vscale x 8 x float> undef, float %f, i32 0
+  %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x float> %splat
+}
+
+define <vscale x 8 x float> @vsplat_zero_nxv8f32() {
+; RV32V-LABEL: vsplat_zero_nxv8f32:
+; RV32V:       # %bb.0:
+; RV32V-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV32V-NEXT:    vmv.v.i v16, 0
+; RV32V-NEXT:    ret
+;
+; RV64V-LABEL: vsplat_zero_nxv8f32:
+; RV64V:       # %bb.0:
+; RV64V-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV64V-NEXT:    vmv.v.i v16, 0
+; RV64V-NEXT:    ret
+  %head = insertelement <vscale x 8 x float> undef, float zeroinitializer, i32 0
+  %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x float> %splat
+}
+
+define <vscale x 8 x double> @vsplat_nxv8f64(double %f) {
+; RV32V-LABEL: vsplat_nxv8f64:
+; RV32V:       # %bb.0:
+; RV32V-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
+; RV32V-NEXT:    vfmv.v.f v16, fa0
+; RV32V-NEXT:    ret
+;
+; RV64V-LABEL: vsplat_nxv8f64:
+; RV64V:       # %bb.0:
+; RV64V-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
+; RV64V-NEXT:    vfmv.v.f v16, fa0
+; RV64V-NEXT:    ret
+  %head = insertelement <vscale x 8 x double> undef, double %f, i32 0
+  %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x double> %splat
+}
+
+define <vscale x 8 x double> @vsplat_zero_nxv8f64() {
+; RV32V-LABEL: vsplat_zero_nxv8f64:
+; RV32V:       # %bb.0:
+; RV32V-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
+; RV32V-NEXT:    vmv.v.i v16, 0
+; RV32V-NEXT:    ret
+;
+; RV64V-LABEL: vsplat_zero_nxv8f64:
+; RV64V:       # %bb.0:
+; RV64V-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
+; RV64V-NEXT:    vmv.v.i v16, 0
+; RV64V-NEXT:    ret
+  %head = insertelement <vscale x 8 x double> undef, double zeroinitializer, i32 0
+  %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x double> %splat
+}