forked from OSchip/llvm-project
[RISCV] Add rvv codegen support for vp.fptrunc.
This patch adds rvv codegen support for vp.fptrunc. The lowering of fp_round and vp.fptrunc share most code so use a common lowering function to handle those two, similar to vp.trunc. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D123841
This commit is contained in:
parent
1881d6fc80
commit
25445b94db
|
@ -502,7 +502,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
|||
ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX,
|
||||
ISD::VP_MERGE, ISD::VP_SELECT,
|
||||
ISD::VP_SITOFP, ISD::VP_UITOFP,
|
||||
ISD::VP_SETCC};
|
||||
ISD::VP_SETCC, ISD::VP_FP_ROUND};
|
||||
|
||||
if (!Subtarget.is64Bit()) {
|
||||
// We must custom-lower certain vXi64 operations on RV32 due to the vector
|
||||
|
@ -3280,48 +3280,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
|
|||
return convertFromScalableVector(VT, Extend, DAG, Subtarget);
|
||||
return Extend;
|
||||
}
|
||||
case ISD::FP_ROUND: {
|
||||
// RVV can only do fp_round to types half the size as the source. We
|
||||
// custom-lower f64->f16 rounds via RVV's round-to-odd float
|
||||
// conversion instruction.
|
||||
SDLoc DL(Op);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDValue Src = Op.getOperand(0);
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
|
||||
// Prepare any fixed-length vector operands.
|
||||
MVT ContainerVT = VT;
|
||||
if (VT.isFixedLengthVector()) {
|
||||
MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
|
||||
ContainerVT =
|
||||
SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
|
||||
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
|
||||
}
|
||||
|
||||
if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
|
||||
SrcVT.getVectorElementType() != MVT::f64) {
|
||||
// For scalable vectors, we only need to close the gap between
|
||||
// vXf64<->vXf16.
|
||||
if (!VT.isFixedLengthVector())
|
||||
return Op;
|
||||
// For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
|
||||
Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
|
||||
return convertFromScalableVector(VT, Src, DAG, Subtarget);
|
||||
}
|
||||
|
||||
SDValue Mask, VL;
|
||||
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
|
||||
|
||||
MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
|
||||
SDValue IntermediateRound =
|
||||
DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
|
||||
SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
|
||||
DL, DAG, Subtarget);
|
||||
|
||||
if (VT.isFixedLengthVector())
|
||||
return convertFromScalableVector(VT, Round, DAG, Subtarget);
|
||||
return Round;
|
||||
}
|
||||
case ISD::FP_ROUND:
|
||||
if (!Op.getValueType().isVector())
|
||||
return Op;
|
||||
return lowerVectorFPRoundLike(Op, DAG);
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT:
|
||||
case ISD::SINT_TO_FP:
|
||||
|
@ -3664,6 +3626,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
|
|||
: RISCVISD::VZEXT_VL);
|
||||
case ISD::VP_TRUNC:
|
||||
return lowerVectorTruncLike(Op, DAG);
|
||||
case ISD::VP_FP_ROUND:
|
||||
return lowerVectorFPRoundLike(Op, DAG);
|
||||
case ISD::VP_FPTOSI:
|
||||
return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL);
|
||||
case ISD::VP_FPTOUI:
|
||||
|
@ -4430,6 +4394,67 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
|
|||
return Result;
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::lowerVectorFPRoundLike(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
bool IsVPFPTrunc = Op.getOpcode() == ISD::VP_FP_ROUND;
|
||||
// RVV can only do truncate fp to types half the size as the source. We
|
||||
// custom-lower f64->f16 rounds via RVV's round-to-odd float
|
||||
// conversion instruction.
|
||||
SDLoc DL(Op);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
||||
assert(VT.isVector() && "Unexpected type for vector truncate lowering");
|
||||
|
||||
SDValue Src = Op.getOperand(0);
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
|
||||
bool IsDirectConv = VT.getVectorElementType() != MVT::f16 ||
|
||||
SrcVT.getVectorElementType() != MVT::f64;
|
||||
|
||||
// For FP_ROUND of scalable vectors, leave it to the pattern.
|
||||
if (!VT.isFixedLengthVector() && !IsVPFPTrunc && IsDirectConv)
|
||||
return Op;
|
||||
|
||||
// Prepare any fixed-length vector operands.
|
||||
MVT ContainerVT = VT;
|
||||
SDValue Mask, VL;
|
||||
if (IsVPFPTrunc) {
|
||||
Mask = Op.getOperand(1);
|
||||
VL = Op.getOperand(2);
|
||||
}
|
||||
if (VT.isFixedLengthVector()) {
|
||||
MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
|
||||
ContainerVT =
|
||||
SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
|
||||
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
|
||||
if (IsVPFPTrunc) {
|
||||
MVT MaskVT =
|
||||
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
|
||||
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
|
||||
}
|
||||
}
|
||||
|
||||
if (!IsVPFPTrunc)
|
||||
std::tie(Mask, VL) =
|
||||
getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
|
||||
|
||||
if (IsDirectConv) {
|
||||
Src = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, ContainerVT, Src, Mask, VL);
|
||||
if (VT.isFixedLengthVector())
|
||||
Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
|
||||
return Src;
|
||||
}
|
||||
|
||||
MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
|
||||
SDValue IntermediateRound =
|
||||
DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
|
||||
SDValue Round = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, ContainerVT,
|
||||
IntermediateRound, Mask, VL);
|
||||
if (VT.isFixedLengthVector())
|
||||
return convertFromScalableVector(VT, Round, DAG, Subtarget);
|
||||
return Round;
|
||||
}
|
||||
|
||||
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
|
||||
// first position of a vector, and that vector is slid up to the insert index.
|
||||
// By limiting the active vector length to index+1 and merging with the
|
||||
|
|
|
@ -614,6 +614,7 @@ private:
|
|||
int64_t ExtTrueVal) const;
|
||||
SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerVectorFPRoundLike(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -1591,6 +1591,13 @@ foreach fvti = AllFloatVectors in {
|
|||
VLOpFrag)),
|
||||
(!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX)
|
||||
fwti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>;
|
||||
|
||||
def : Pat<(fvti.Vector (riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1),
|
||||
(fwti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_MASK")
|
||||
(fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
|
||||
(fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float>, <2 x i1>, i32)
|
||||
|
||||
define <2 x half> @vfptrunc_v2f16_v2f32(<2 x float> %a, <2 x i1> %m, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_v2f16_v2f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl)
|
||||
ret <2 x half> %v
|
||||
}
|
||||
|
||||
define <2 x half> @vfptrunc_v2f16_v2f32_unmasked(<2 x float> %a, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_v2f16_v2f32_unmasked:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v9, v8
|
||||
; CHECK-NEXT: vmv1r.v v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
|
||||
ret <2 x half> %v
|
||||
}
|
||||
|
||||
declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double>, <2 x i1>, i32)
|
||||
|
||||
define <2 x half> @vfptrunc_v2f16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_v2f16_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v8, v9, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> %m, i32 %vl)
|
||||
ret <2 x half> %v
|
||||
}
|
||||
|
||||
define <2 x half> @vfptrunc_v2f16_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_v2f16_v2f64_unmasked:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
|
||||
ret <2 x half> %v
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double>, <2 x i1>, i32)
|
||||
|
||||
define <2 x float> @vfptrunc_v2f32_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_v2f32_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> %m, i32 %vl)
|
||||
ret <2 x float> %v
|
||||
}
|
||||
|
||||
define <2 x float> @vfptrunc_v2f32_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_v2f32_v2f64_unmasked:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v9, v8
|
||||
; CHECK-NEXT: vmv1r.v v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
|
||||
ret <2 x float> %v
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
|
||||
|
||||
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl)
|
||||
ret <vscale x 2 x half> %v
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32_unmasked(<vscale x 2 x float> %a, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32_unmasked:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v9, v8
|
||||
; CHECK-NEXT: vmv1r.v v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
|
||||
ret <vscale x 2 x half> %v
|
||||
}
|
||||
|
||||
declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
|
||||
|
||||
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
|
||||
ret <vscale x 2 x half> %v
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64_unmasked:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v8, v10
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
|
||||
ret <vscale x 2 x half> %v
|
||||
}
|
||||
|
||||
declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
|
||||
|
||||
define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v10, v8, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v8, v10
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
|
||||
ret <vscale x 2 x float> %v
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) {
|
||||
; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64_unmasked:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfncvt.f.f.w v10, v8
|
||||
; CHECK-NEXT: vmv.v.v v8, v10
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
|
||||
ret <vscale x 2 x float> %v
|
||||
}
|
Loading…
Reference in New Issue