[RISCV] Add scalable vector fcmp ISel patterns

Original patch by @rogfer01.

All ordered comparisons except ONE are supported natively, and all
unordered comparisons except UNE are expanded into sequences involving
explicit NaN checks and mask arithmetic.

Additionally, we expand GT,OGT,GE,OGE to their swapped-operand versions, and
pattern-match those back to the "original", swapping operands once more. This
way we catch both operations and both "vf" and "fv" forms with fewer patterns.

Also add support for floating-point splat_vector, with an optimization for
splatting fpimm0.

Authored-by: Roger Ferrer Ibanez <rofirrim@gmail.com>
Co-Authored-by: Fraser Cormack <fraser@codeplay.com>

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D94242
This commit is contained in:
Fraser Cormack 2021-01-06 11:25:25 +00:00
parent 195728c75a
commit 37b41bd087
5 changed files with 5716 additions and 0 deletions

View File

@ -374,6 +374,48 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// We must custom-lower SPLAT_VECTOR vXi64 on RV32
if (!Subtarget.is64Bit())
setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
// Expand various CCs to best match the RVV ISA, which natively supports UNE
// but no other unordered comparisons, and supports all ordered comparisons
// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
// and we pattern-match those back to the "original", swapping operands once
// more. This way we catch both operations and both "vf" and "fv" forms with
// fewer patterns.
ISD::CondCode VFPCCToExpand[] = {
ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
};
if (Subtarget.hasStdExtZfh()) {
for (auto VT : {RISCVVMVTs::vfloat16mf4_t, RISCVVMVTs::vfloat16mf2_t,
RISCVVMVTs::vfloat16m1_t, RISCVVMVTs::vfloat16m2_t,
RISCVVMVTs::vfloat16m4_t, RISCVVMVTs::vfloat16m8_t}) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
}
}
if (Subtarget.hasStdExtF()) {
for (auto VT : {RISCVVMVTs::vfloat32mf2_t, RISCVVMVTs::vfloat32m1_t,
RISCVVMVTs::vfloat32m2_t, RISCVVMVTs::vfloat32m4_t,
RISCVVMVTs::vfloat32m8_t}) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
}
}
if (Subtarget.hasStdExtD()) {
for (auto VT : {RISCVVMVTs::vfloat64m1_t, RISCVVMVTs::vfloat64m2_t,
RISCVVMVTs::vfloat64m4_t, RISCVVMVTs::vfloat64m8_t}) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
}
}
}
// Function alignments.

View File

@ -192,6 +192,45 @@ multiclass VPatIntegerSetCCSDNode_VX_VI<CondCode cc,
SplatPat_simm5, simm5, swap>;
}
multiclass VPatFPSetCCSDNode_VV<CondCode cc, string instruction_name> {
foreach fvti = AllFloatVectors in
def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
(fvti.Vector fvti.RegClass:$rs2),
cc)),
(!cast<Instruction>(instruction_name#"_VV_"#fvti.LMul.MX)
fvti.RegClass:$rs1, fvti.RegClass:$rs2, VLMax, fvti.SEW)>;
}
multiclass VPatFPSetCCSDNode_VF<CondCode cc, string instruction_name> {
foreach fvti = AllFloatVectors in
def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
(fvti.Vector (splat_vector fvti.ScalarRegClass:$rs2)),
cc)),
(!cast<Instruction>(instruction_name#"_VF_"#fvti.LMul.MX)
fvti.RegClass:$rs1,
ToFPR32<fvti.Scalar, fvti.ScalarRegClass, "rs2">.ret,
VLMax, fvti.SEW)>;
}
multiclass VPatFPSetCCSDNode_FV<CondCode cc, string swapped_op_instruction_name> {
foreach fvti = AllFloatVectors in
def : Pat<(fvti.Mask (setcc (fvti.Vector (splat_vector fvti.ScalarRegClass:$rs2)),
(fvti.Vector fvti.RegClass:$rs1),
cc)),
(!cast<Instruction>(swapped_op_instruction_name#"_VF_"#fvti.LMul.MX)
fvti.RegClass:$rs1,
ToFPR32<fvti.Scalar, fvti.ScalarRegClass, "rs2">.ret,
VLMax, fvti.SEW)>;
}
multiclass VPatFPSetCCSDNode_VV_VF_FV<CondCode cc,
string inst_name,
string swapped_op_inst_name> {
defm : VPatFPSetCCSDNode_VV<cc, inst_name>;
defm : VPatFPSetCCSDNode_VF<cc, inst_name>;
defm : VPatFPSetCCSDNode_FV<cc, swapped_op_inst_name>;
}
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@ -299,6 +338,23 @@ foreach mti = AllMasks in {
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
// 14.11. Vector Floating-Point Compare Instructions
defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETEQ, "PseudoVMFEQ", "PseudoVMFEQ">;
defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOEQ, "PseudoVMFEQ", "PseudoVMFEQ">;
defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETNE, "PseudoVMFNE", "PseudoVMFNE">;
defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETUNE, "PseudoVMFNE", "PseudoVMFNE">;
defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETLT, "PseudoVMFLT", "PseudoVMFGT">;
defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOLT, "PseudoVMFLT", "PseudoVMFGT">;
defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETLE, "PseudoVMFLE", "PseudoVMFGE">;
defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
} // Predicates = [HasStdExtV, HasStdExtF]
//===----------------------------------------------------------------------===//
// Vector Splats
//===----------------------------------------------------------------------===//
@ -333,3 +389,16 @@ foreach vti = AllIntegerVectors in {
}
}
} // Predicates = [HasStdExtV, IsRV32]
let Predicates = [HasStdExtV, HasStdExtF] in {
foreach fvti = AllFloatVectors in {
def : Pat<(fvti.Vector (splat_vector fvti.ScalarRegClass:$rs1)),
(!cast<Instruction>("PseudoVFMV_V_F_"#fvti.LMul.MX)
ToFPR32<fvti.Scalar, fvti.ScalarRegClass, "rs1">.ret,
VLMax, fvti.SEW)>;
def : Pat<(fvti.Vector (splat_vector (fvti.Scalar fpimm0))),
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
0, VLMax, fvti.SEW)>;
}
} // Predicates = [HasStdExtV, HasStdExtF]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,109 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+experimental-zfh,+experimental-v -target-abi ilp32d -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=RV32V
; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+experimental-zfh,+experimental-v -target-abi lp64d -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=RV64V
define <vscale x 8 x half> @vsplat_nxv8f16(half %f) {
; RV32V-LABEL: vsplat_nxv8f16:
; RV32V: # %bb.0:
; RV32V-NEXT: # kill: def $f10_h killed $f10_h def $f10_f
; RV32V-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV32V-NEXT: vfmv.v.f v16, fa0
; RV32V-NEXT: ret
;
; RV64V-LABEL: vsplat_nxv8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: # kill: def $f10_h killed $f10_h def $f10_f
; RV64V-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV64V-NEXT: vfmv.v.f v16, fa0
; RV64V-NEXT: ret
%head = insertelement <vscale x 8 x half> undef, half %f, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x half> %splat
}
define <vscale x 8 x half> @vsplat_zero_nxv8f16() {
; RV32V-LABEL: vsplat_zero_nxv8f16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV32V-NEXT: vmv.v.i v16, 0
; RV32V-NEXT: ret
;
; RV64V-LABEL: vsplat_zero_nxv8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV64V-NEXT: vmv.v.i v16, 0
; RV64V-NEXT: ret
%head = insertelement <vscale x 8 x half> undef, half zeroinitializer, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x half> %splat
}
define <vscale x 8 x float> @vsplat_nxv8f32(float %f) {
; RV32V-LABEL: vsplat_nxv8f32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV32V-NEXT: vfmv.v.f v16, fa0
; RV32V-NEXT: ret
;
; RV64V-LABEL: vsplat_nxv8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV64V-NEXT: vfmv.v.f v16, fa0
; RV64V-NEXT: ret
%head = insertelement <vscale x 8 x float> undef, float %f, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x float> %splat
}
define <vscale x 8 x float> @vsplat_zero_nxv8f32() {
; RV32V-LABEL: vsplat_zero_nxv8f32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV32V-NEXT: vmv.v.i v16, 0
; RV32V-NEXT: ret
;
; RV64V-LABEL: vsplat_zero_nxv8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV64V-NEXT: vmv.v.i v16, 0
; RV64V-NEXT: ret
%head = insertelement <vscale x 8 x float> undef, float zeroinitializer, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x float> %splat
}
define <vscale x 8 x double> @vsplat_nxv8f64(double %f) {
; RV32V-LABEL: vsplat_nxv8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; RV32V-NEXT: vfmv.v.f v16, fa0
; RV32V-NEXT: ret
;
; RV64V-LABEL: vsplat_nxv8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; RV64V-NEXT: vfmv.v.f v16, fa0
; RV64V-NEXT: ret
%head = insertelement <vscale x 8 x double> undef, double %f, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x double> %splat
}
define <vscale x 8 x double> @vsplat_zero_nxv8f64() {
; RV32V-LABEL: vsplat_zero_nxv8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; RV32V-NEXT: vmv.v.i v16, 0
; RV32V-NEXT: ret
;
; RV64V-LABEL: vsplat_zero_nxv8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; RV64V-NEXT: vmv.v.i v16, 0
; RV64V-NEXT: ret
%head = insertelement <vscale x 8 x double> undef, double zeroinitializer, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x double> %splat
}