diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index fc931b964f7f..75145db28ba0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -502,7 +502,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_SITOFP, ISD::VP_UITOFP, - ISD::VP_SETCC}; + ISD::VP_SETCC, ISD::VP_FP_ROUND}; if (!Subtarget.is64Bit()) { // We must custom-lower certain vXi64 operations on RV32 due to the vector @@ -3280,48 +3280,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return convertFromScalableVector(VT, Extend, DAG, Subtarget); return Extend; } - case ISD::FP_ROUND: { - // RVV can only do fp_round to types half the size as the source. We - // custom-lower f64->f16 rounds via RVV's round-to-odd float - // conversion instruction. - SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - - // Prepare any fixed-length vector operands. - MVT ContainerVT = VT; - if (VT.isFixedLengthVector()) { - MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); - ContainerVT = - SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); - Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); - } - - if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || - SrcVT.getVectorElementType() != MVT::f64) { - // For scalable vectors, we only need to close the gap between - // vXf64<->vXf16. - if (!VT.isFixedLengthVector()) - return Op; - // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. - Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); - return convertFromScalableVector(VT, Src, DAG, Subtarget); - } - - SDValue Mask, VL; - std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - - MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); - SDValue IntermediateRound = - DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); - SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, - DL, DAG, Subtarget); - - if (VT.isFixedLengthVector()) - return convertFromScalableVector(VT, Round, DAG, Subtarget); - return Round; - } + case ISD::FP_ROUND: + if (!Op.getValueType().isVector()) + return Op; + return lowerVectorFPRoundLike(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: @@ -3664,6 +3626,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, : RISCVISD::VZEXT_VL); case ISD::VP_TRUNC: return lowerVectorTruncLike(Op, DAG); + case ISD::VP_FP_ROUND: + return lowerVectorFPRoundLike(Op, DAG); case ISD::VP_FPTOSI: return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL); case ISD::VP_FPTOUI: @@ -4430,6 +4394,67 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, return Result; } +SDValue RISCVTargetLowering::lowerVectorFPRoundLike(SDValue Op, + SelectionDAG &DAG) const { + bool IsVPFPTrunc = Op.getOpcode() == ISD::VP_FP_ROUND; + // RVV can only do truncate fp to types half the size as the source. We + // custom-lower f64->f16 rounds via RVV's round-to-odd float + // conversion instruction. + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + + assert(VT.isVector() && "Unexpected type for vector truncate lowering"); + + SDValue Src = Op.getOperand(0); + MVT SrcVT = Src.getSimpleValueType(); + + bool IsDirectConv = VT.getVectorElementType() != MVT::f16 || + SrcVT.getVectorElementType() != MVT::f64; + + // For FP_ROUND of scalable vectors, leave it to the pattern. + if (!VT.isFixedLengthVector() && !IsVPFPTrunc && IsDirectConv) + return Op; + + // Prepare any fixed-length vector operands. + MVT ContainerVT = VT; + SDValue Mask, VL; + if (IsVPFPTrunc) { + Mask = Op.getOperand(1); + VL = Op.getOperand(2); + } + if (VT.isFixedLengthVector()) { + MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); + ContainerVT = + SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); + Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); + if (IsVPFPTrunc) { + MVT MaskVT = + MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + } + + if (!IsVPFPTrunc) + std::tie(Mask, VL) = + getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); + + if (IsDirectConv) { + Src = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, ContainerVT, Src, Mask, VL); + if (VT.isFixedLengthVector()) + Src = convertFromScalableVector(VT, Src, DAG, Subtarget); + return Src; + } + + MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); + SDValue IntermediateRound = + DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); + SDValue Round = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, ContainerVT, + IntermediateRound, Mask, VL); + if (VT.isFixedLengthVector()) + return convertFromScalableVector(VT, Round, DAG, Subtarget); + return Round; +} + // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the // first position of a vector, and that vector is slid up to the insert index. // By limiting the active vector length to index+1 and merging with the diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 56d0779905fc..29524197c392 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -614,6 +614,7 @@ private: int64_t ExtTrueVal) const; SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorFPRoundLike(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 563ace263e68..7d92795d3ff4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1591,6 +1591,13 @@ foreach fvti = AllFloatVectors in { VLOpFrag)), (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX) fwti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1), + (fwti.Mask V0), + VLOpFrag)), + (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, + (fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll new file mode 100644 index 000000000000..7657382ccf85 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s + +declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float>, <2 x i1>, i32) + +define <2 x half> @vfptrunc_v2f16_v2f32(<2 x float> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_v2f16_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl) + ret <2 x half> %v +} + +define <2 x half> @vfptrunc_v2f16_v2f32_unmasked(<2 x float> %a, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_v2f16_v2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x half> %v +} + +declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double>, <2 x i1>, i32) + +define <2 x half> @vfptrunc_v2f16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_v2f16_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> %m, i32 %vl) + ret <2 x half> %v +} + +define <2 x half> @vfptrunc_v2f16_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_v2f16_v2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x half> %v +} + +declare <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double>, <2 x i1>, i32) + +define <2 x float> @vfptrunc_v2f32_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_v2f32_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> %m, i32 %vl) + ret <2 x float> %v +} + +define <2 x float> @vfptrunc_v2f32_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_v2f32_v2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x float> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll new file mode 100644 index 000000000000..47032c3cbb38 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s + +declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl) + ret <vscale x 2 x half> %v +} + +define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32_unmasked(<vscale x 2 x float> %a, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl) + ret <vscale x 2 x half> %v +} + +declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl) + ret <vscale x 2 x half> %v +} + +define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl) + ret <vscale x 2 x half> %v +} + +declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v10, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl) + ret <vscale x 2 x float> %v +} + +define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl) + ret <vscale x 2 x float> %v +}