[RISCV] Add rvv codegen support for vp.fptrunc.

This patch adds rvv codegen support for vp.fptrunc. The lowering of fp_round and vp.fptrunc share most code so use a common lowering function to handle those two, similar to vp.trunc.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D123841
This commit is contained in:
jacquesguan 2022-04-15 06:54:27 +00:00
parent 1881d6fc80
commit 25445b94db
5 changed files with 230 additions and 43 deletions

View File

@ -502,7 +502,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX,
ISD::VP_MERGE, ISD::VP_SELECT,
ISD::VP_SITOFP, ISD::VP_UITOFP,
ISD::VP_SETCC};
ISD::VP_SETCC, ISD::VP_FP_ROUND};
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
@ -3280,48 +3280,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return convertFromScalableVector(VT, Extend, DAG, Subtarget);
return Extend;
}
case ISD::FP_ROUND: {
// RVV can only do fp_round to types half the size as the source. We
// custom-lower f64->f16 rounds via RVV's round-to-odd float
// conversion instruction.
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
// Prepare any fixed-length vector operands.
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
ContainerVT =
SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
}
if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
SrcVT.getVectorElementType() != MVT::f64) {
// For scalable vectors, we only need to close the gap between
// vXf64<->vXf16.
if (!VT.isFixedLengthVector())
return Op;
// For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
return convertFromScalableVector(VT, Src, DAG, Subtarget);
}
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
SDValue IntermediateRound =
DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
DL, DAG, Subtarget);
if (VT.isFixedLengthVector())
return convertFromScalableVector(VT, Round, DAG, Subtarget);
return Round;
}
case ISD::FP_ROUND:
if (!Op.getValueType().isVector())
return Op;
return lowerVectorFPRoundLike(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
@ -3664,6 +3626,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
: RISCVISD::VZEXT_VL);
case ISD::VP_TRUNC:
return lowerVectorTruncLike(Op, DAG);
case ISD::VP_FP_ROUND:
return lowerVectorFPRoundLike(Op, DAG);
case ISD::VP_FPTOSI:
return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL);
case ISD::VP_FPTOUI:
@ -4430,6 +4394,67 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
return Result;
}
SDValue RISCVTargetLowering::lowerVectorFPRoundLike(SDValue Op,
SelectionDAG &DAG) const {
bool IsVPFPTrunc = Op.getOpcode() == ISD::VP_FP_ROUND;
// RVV can only do truncate fp to types half the size as the source. We
// custom-lower f64->f16 rounds via RVV's round-to-odd float
// conversion instruction.
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
assert(VT.isVector() && "Unexpected type for vector truncate lowering");
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
bool IsDirectConv = VT.getVectorElementType() != MVT::f16 ||
SrcVT.getVectorElementType() != MVT::f64;
// For FP_ROUND of scalable vectors, leave it to the pattern.
if (!VT.isFixedLengthVector() && !IsVPFPTrunc && IsDirectConv)
return Op;
// Prepare any fixed-length vector operands.
MVT ContainerVT = VT;
SDValue Mask, VL;
if (IsVPFPTrunc) {
Mask = Op.getOperand(1);
VL = Op.getOperand(2);
}
if (VT.isFixedLengthVector()) {
MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
ContainerVT =
SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
if (IsVPFPTrunc) {
MVT MaskVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
}
if (!IsVPFPTrunc)
std::tie(Mask, VL) =
getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
if (IsDirectConv) {
Src = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, ContainerVT, Src, Mask, VL);
if (VT.isFixedLengthVector())
Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
return Src;
}
MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
SDValue IntermediateRound =
DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
SDValue Round = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, ContainerVT,
IntermediateRound, Mask, VL);
if (VT.isFixedLengthVector())
return convertFromScalableVector(VT, Round, DAG, Subtarget);
return Round;
}
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
// first position of a vector, and that vector is slid up to the insert index.
// By limiting the active vector length to index+1 and merging with the

View File

@ -614,6 +614,7 @@ private:
int64_t ExtTrueVal) const;
SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVectorFPRoundLike(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;

View File

@ -1591,6 +1591,13 @@ foreach fvti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX)
fwti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask V0),
VLOpFrag)),
(!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
(fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
}

View File

@ -0,0 +1,77 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float>, <2 x i1>, i32)
define <2 x half> @vfptrunc_v2f16_v2f32(<2 x float> %a, <2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_v2f16_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl)
ret <2 x half> %v
}
define <2 x half> @vfptrunc_v2f16_v2f32_unmasked(<2 x float> %a, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_v2f16_v2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v9, v8
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
ret <2 x half> %v
}
declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double>, <2 x i1>, i32)
define <2 x half> @vfptrunc_v2f16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_v2f16_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v8, v9, v0.t
; CHECK-NEXT: ret
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> %m, i32 %vl)
ret <2 x half> %v
}
define <2 x half> @vfptrunc_v2f16_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_v2f16_v2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v8, v9
; CHECK-NEXT: ret
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
ret <2 x half> %v
}
declare <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double>, <2 x i1>, i32)
define <2 x float> @vfptrunc_v2f32_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_v2f32_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> %m, i32 %vl)
ret <2 x float> %v
}
define <2 x float> @vfptrunc_v2f32_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_v2f32_v2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v9, v8
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
ret <2 x float> %v
}

View File

@ -0,0 +1,77 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x half> %v
}
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32_unmasked(<vscale x 2 x float> %a, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v9, v8
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
ret <vscale x 2 x half> %v
}
declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x half> %v
}
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
ret <vscale x 2 x half> %v
}
declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v10, v8, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x float> %v
}
define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v10, v8
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
ret <vscale x 2 x float> %v
}