forked from OSchip/llvm-project
[RISCV] Add passthru operand to RISCVISD::SETCC_VL.
Use it to the fix a bug in the fceil/ffloor lowerings. We were setting the passthru to IMPLICIT_DEF before and using a mask agnostic policy. This means where the incoming bits in the mask were 0 they could be anything in the outgoing mask. We want those bits in the outgoing mask to be 0. This means we need to pass the input mask as the passthru. This generates worse code because we are unable to allocate the v0 register to the output due to an earlyclobber constraint. We probably need a special TIED pseudoinstruction and probably custom isel since you can't use V0 twice in the input pattern. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D132058
This commit is contained in:
parent
c9a41fe60a
commit
961838cc13
|
@ -1877,8 +1877,9 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
|
|||
|
||||
// If abs(Src) was larger than MaxVal or nan, keep it.
|
||||
MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
|
||||
SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Abs, MaxValSplat,
|
||||
DAG.getCondCode(ISD::SETOLT), TrueMask, VL);
|
||||
SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
|
||||
{Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
|
||||
DAG.getUNDEF(SetccVT), TrueMask, VL});
|
||||
|
||||
// Truncate to integer and convert back to FP.
|
||||
MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
|
||||
|
@ -1896,9 +1897,9 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
|
|||
DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType());
|
||||
SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
|
||||
DAG.getUNDEF(ContainerVT), SplatVal, VL);
|
||||
SDValue NeedAdjust =
|
||||
DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Truncated, Src,
|
||||
DAG.getCondCode(ISD::SETOLT), Mask, VL);
|
||||
SDValue NeedAdjust = DAG.getNode(
|
||||
RISCVISD::SETCC_VL, DL, SetccVT,
|
||||
{Truncated, Src, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
|
||||
Truncated = DAG.getNode(RISCVISD::FADD_VL, DL, ContainerVT, Truncated,
|
||||
Splat, Truncated, NeedAdjust, VL);
|
||||
} else if (Op.getOpcode() == ISD::FFLOOR) {
|
||||
|
@ -1910,9 +1911,9 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
|
|||
DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType());
|
||||
SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
|
||||
DAG.getUNDEF(ContainerVT), SplatVal, VL);
|
||||
SDValue NeedAdjust =
|
||||
DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Src, Truncated,
|
||||
DAG.getCondCode(ISD::SETOLT), Mask, VL);
|
||||
SDValue NeedAdjust = DAG.getNode(
|
||||
RISCVISD::SETCC_VL, DL, SetccVT,
|
||||
{Src, Truncated, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
|
||||
Truncated = DAG.getNode(RISCVISD::FSUB_VL, DL, ContainerVT, Truncated,
|
||||
Splat, Truncated, NeedAdjust, VL);
|
||||
}
|
||||
|
@ -1973,8 +1974,9 @@ static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG,
|
|||
|
||||
// If abs(Src) was larger than MaxVal or nan, keep it.
|
||||
MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
|
||||
SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Abs, MaxValSplat,
|
||||
DAG.getCondCode(ISD::SETOLT), TrueMask, VL);
|
||||
SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
|
||||
{Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
|
||||
DAG.getUNDEF(SetccVT), TrueMask, VL});
|
||||
|
||||
bool Ignored;
|
||||
APFloat Point5Pred = APFloat(0.5f);
|
||||
|
@ -3757,7 +3759,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
|
|||
case ISD::VP_SETCC:
|
||||
if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
|
||||
return lowerVPSetCCMaskOp(Op, DAG);
|
||||
return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL);
|
||||
return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true);
|
||||
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
|
||||
return lowerVPStridedLoad(Op, DAG);
|
||||
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
|
||||
|
@ -4412,8 +4414,9 @@ SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
|
|||
MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
|
||||
SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
|
||||
DAG.getUNDEF(ContainerVT), Mask, VL);
|
||||
Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
|
||||
DAG.getCondCode(ISD::SETNE), Mask, VL);
|
||||
Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
|
||||
{Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
|
||||
DAG.getUNDEF(MaskContainerVT), Mask, VL});
|
||||
if (MaskVT.isFixedLengthVector())
|
||||
Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
|
||||
return Trunc;
|
||||
|
@ -5036,8 +5039,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
|
||||
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
|
||||
SDValue SelectCond =
|
||||
DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
|
||||
DAG.getCondCode(ISD::SETEQ), Mask, VL);
|
||||
DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
|
||||
{VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
|
||||
DAG.getUNDEF(MaskVT), Mask, VL});
|
||||
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
|
||||
Vec, VL);
|
||||
}
|
||||
|
@ -6127,8 +6131,9 @@ RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
|
|||
MVT MaskVT = getMaskTypeFor(ContainerVT);
|
||||
SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
|
||||
|
||||
SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
|
||||
Op.getOperand(2), Mask, VL);
|
||||
SDValue Cmp =
|
||||
DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
|
||||
{Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
|
||||
|
||||
return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
|
||||
}
|
||||
|
@ -6529,8 +6534,9 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
|
|||
SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
|
||||
SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
|
||||
DAG.getUNDEF(InterimIVT), SplatZero);
|
||||
Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, Result, SplatZero,
|
||||
DAG.getCondCode(ISD::SETNE), Mask, VL);
|
||||
Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
|
||||
{Result, SplatZero, DAG.getCondCode(ISD::SETNE),
|
||||
DAG.getUNDEF(DstVT), Mask, VL});
|
||||
} else {
|
||||
MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
|
||||
DstVT.getVectorElementCount());
|
||||
|
|
|
@ -147,13 +147,14 @@ def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
|
|||
def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
|
||||
|
||||
def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
|
||||
SDTypeProfile<1, 5, [SDTCVecEltisVT<0, i1>,
|
||||
SDTypeProfile<1, 6, [SDTCVecEltisVT<0, i1>,
|
||||
SDTCisVec<1>,
|
||||
SDTCisSameNumEltsAs<0, 1>,
|
||||
SDTCisSameAs<1, 2>,
|
||||
SDTCisVT<3, OtherVT>,
|
||||
SDTCisSameAs<0, 4>,
|
||||
SDTCisVT<5, XLenVT>]>>;
|
||||
SDTCisSameAs<0, 5>,
|
||||
SDTCisVT<6, XLenVT>]>>;
|
||||
|
||||
def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL",
|
||||
SDTypeProfile<1, 5, [SDTCisVec<0>,
|
||||
|
@ -490,10 +491,11 @@ multiclass VPatIntegerSetCCVL_VV<VTypeInfo vti, string instruction_name,
|
|||
CondCode cc> {
|
||||
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
|
||||
vti.RegClass:$rs2, cc,
|
||||
VR:$merge,
|
||||
(vti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX#"_MASK")
|
||||
(vti.Mask (IMPLICIT_DEF)),
|
||||
VR:$merge,
|
||||
vti.RegClass:$rs1,
|
||||
vti.RegClass:$rs2,
|
||||
(vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
|
||||
|
@ -505,10 +507,11 @@ multiclass VPatIntegerSetCCVL_VV_Swappable<VTypeInfo vti, string instruction_nam
|
|||
: VPatIntegerSetCCVL_VV<vti, instruction_name, cc> {
|
||||
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs2),
|
||||
vti.RegClass:$rs1, invcc,
|
||||
VR:$merge,
|
||||
(vti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX#"_MASK")
|
||||
(vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
|
||||
VR:$merge, vti.RegClass:$rs1,
|
||||
vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
|
||||
}
|
||||
|
||||
|
@ -517,15 +520,17 @@ multiclass VPatIntegerSetCCVL_VX_Swappable<VTypeInfo vti, string instruction_nam
|
|||
defvar instruction_masked = !cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX#"_MASK");
|
||||
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
|
||||
(SplatPat (XLenVT GPR:$rs2)), cc,
|
||||
VR:$merge,
|
||||
(vti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
|
||||
(instruction_masked VR:$merge, vti.RegClass:$rs1,
|
||||
GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
|
||||
def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat (XLenVT GPR:$rs2)),
|
||||
(vti.Vector vti.RegClass:$rs1), invcc,
|
||||
VR:$merge,
|
||||
(vti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
|
||||
(instruction_masked VR:$merge, vti.RegClass:$rs1,
|
||||
GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
|
||||
}
|
||||
|
||||
|
@ -534,18 +539,20 @@ multiclass VPatIntegerSetCCVL_VI_Swappable<VTypeInfo vti, string instruction_nam
|
|||
defvar instruction_masked = !cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX#"_MASK");
|
||||
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
|
||||
(SplatPat_simm5 simm5:$rs2), cc,
|
||||
VR:$merge,
|
||||
(vti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
|
||||
(instruction_masked VR:$merge, vti.RegClass:$rs1,
|
||||
XLenVT:$rs2, (vti.Mask V0), GPR:$vl,
|
||||
vti.Log2SEW)>;
|
||||
|
||||
// FIXME: Can do some canonicalization to remove these patterns.
|
||||
def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat_simm5 simm5:$rs2),
|
||||
(vti.Vector vti.RegClass:$rs1), invcc,
|
||||
VR:$merge,
|
||||
(vti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
|
||||
(instruction_masked VR:$merge, vti.RegClass:$rs1,
|
||||
simm5:$rs2, (vti.Mask V0), GPR:$vl,
|
||||
vti.Log2SEW)>;
|
||||
}
|
||||
|
@ -557,18 +564,20 @@ multiclass VPatIntegerSetCCVL_VIPlus1_Swappable<VTypeInfo vti,
|
|||
defvar instruction_masked = !cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX#"_MASK");
|
||||
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
|
||||
(splatpat_kind simm5:$rs2), cc,
|
||||
VR:$merge,
|
||||
(vti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
|
||||
(instruction_masked VR:$merge, vti.RegClass:$rs1,
|
||||
(DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl,
|
||||
vti.Log2SEW)>;
|
||||
|
||||
// FIXME: Can do some canonicalization to remove these patterns.
|
||||
def : Pat<(vti.Mask (riscv_setcc_vl (splatpat_kind simm5:$rs2),
|
||||
(vti.Vector vti.RegClass:$rs1), invcc,
|
||||
VR:$merge,
|
||||
(vti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
|
||||
(instruction_masked VR:$merge, vti.RegClass:$rs1,
|
||||
(DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl,
|
||||
vti.Log2SEW)>;
|
||||
}
|
||||
|
@ -580,28 +589,31 @@ multiclass VPatFPSetCCVL_VV_VF_FV<CondCode cc,
|
|||
def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1),
|
||||
fvti.RegClass:$rs2,
|
||||
cc,
|
||||
VR:$merge,
|
||||
(fvti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX#"_MASK")
|
||||
(fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1,
|
||||
VR:$merge, fvti.RegClass:$rs1,
|
||||
fvti.RegClass:$rs2, (fvti.Mask V0),
|
||||
GPR:$vl, fvti.Log2SEW)>;
|
||||
def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1),
|
||||
(SplatFPOp fvti.ScalarRegClass:$rs2),
|
||||
cc,
|
||||
VR:$merge,
|
||||
(fvti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
|
||||
(fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1,
|
||||
VR:$merge, fvti.RegClass:$rs1,
|
||||
fvti.ScalarRegClass:$rs2, (fvti.Mask V0),
|
||||
GPR:$vl, fvti.Log2SEW)>;
|
||||
def : Pat<(fvti.Mask (riscv_setcc_vl (SplatFPOp fvti.ScalarRegClass:$rs2),
|
||||
(fvti.Vector fvti.RegClass:$rs1),
|
||||
cc,
|
||||
VR:$merge,
|
||||
(fvti.Mask V0),
|
||||
VLOpFrag)),
|
||||
(!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
|
||||
(fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1,
|
||||
VR:$merge, fvti.RegClass:$rs1,
|
||||
fvti.ScalarRegClass:$rs2, (fvti.Mask V0),
|
||||
GPR:$vl, fvti.Log2SEW)>;
|
||||
}
|
||||
|
|
|
@ -14,15 +14,15 @@ define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI0_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
|
||||
ret <vscale x 1 x half> %a
|
||||
|
@ -39,15 +39,15 @@ define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI1_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x)
|
||||
ret <vscale x 2 x half> %a
|
||||
|
@ -64,15 +64,15 @@ define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI2_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
|
||||
ret <vscale x 4 x half> %a
|
||||
|
@ -89,12 +89,11 @@ define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI3_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
|
@ -115,12 +114,11 @@ define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
|
@ -141,12 +139,11 @@ define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
|
@ -167,15 +164,15 @@ define <vscale x 1 x float> @ceil_nxv1f32(<vscale x 1 x float> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI6_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
|
||||
ret <vscale x 1 x float> %a
|
||||
|
@ -192,15 +189,15 @@ define <vscale x 2 x float> @ceil_nxv2f32(<vscale x 2 x float> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI7_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %x)
|
||||
ret <vscale x 2 x float> %a
|
||||
|
@ -217,12 +214,11 @@ define <vscale x 4 x float> @ceil_nxv4f32(<vscale x 4 x float> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI8_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
|
@ -243,12 +239,11 @@ define <vscale x 8 x float> @ceil_nxv8f32(<vscale x 8 x float> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
|
@ -269,12 +264,11 @@ define <vscale x 16 x float> @ceil_nxv16f32(<vscale x 16 x float> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
|
@ -295,15 +289,15 @@ define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI11_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
|
||||
ret <vscale x 1 x double> %a
|
||||
|
@ -320,12 +314,11 @@ define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI12_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
|
@ -346,12 +339,11 @@ define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI13_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
|
@ -372,12 +364,11 @@ define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
|
|
|
@ -14,15 +14,15 @@ define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI0_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> %x)
|
||||
ret <vscale x 1 x half> %a
|
||||
|
@ -39,15 +39,15 @@ define <vscale x 2 x half> @floor_nxv2f16(<vscale x 2 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI1_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> %x)
|
||||
ret <vscale x 2 x half> %a
|
||||
|
@ -64,15 +64,15 @@ define <vscale x 4 x half> @floor_nxv4f16(<vscale x 4 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI2_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> %x)
|
||||
ret <vscale x 4 x half> %a
|
||||
|
@ -89,12 +89,11 @@ define <vscale x 8 x half> @floor_nxv8f16(<vscale x 8 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI3_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
|
@ -115,12 +114,11 @@ define <vscale x 16 x half> @floor_nxv16f16(<vscale x 16 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
|
@ -141,12 +139,11 @@ define <vscale x 32 x half> @floor_nxv32f16(<vscale x 32 x half> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
|
@ -167,15 +164,15 @@ define <vscale x 1 x float> @floor_nxv1f32(<vscale x 1 x float> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI6_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x float> @llvm.floor.nxv1f32(<vscale x 1 x float> %x)
|
||||
ret <vscale x 1 x float> %a
|
||||
|
@ -192,15 +189,15 @@ define <vscale x 2 x float> @floor_nxv2f32(<vscale x 2 x float> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI7_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> %x)
|
||||
ret <vscale x 2 x float> %a
|
||||
|
@ -217,12 +214,11 @@ define <vscale x 4 x float> @floor_nxv4f32(<vscale x 4 x float> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI8_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
|
@ -243,12 +239,11 @@ define <vscale x 8 x float> @floor_nxv8f32(<vscale x 8 x float> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
|
@ -269,12 +264,11 @@ define <vscale x 16 x float> @floor_nxv16f32(<vscale x 16 x float> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
|
@ -295,15 +289,15 @@ define <vscale x 1 x double> @floor_nxv1f64(<vscale x 1 x double> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI11_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> %x)
|
||||
ret <vscale x 1 x double> %a
|
||||
|
@ -320,12 +314,11 @@ define <vscale x 2 x double> @floor_nxv2f64(<vscale x 2 x double> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI12_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
|
@ -346,12 +339,11 @@ define <vscale x 4 x double> @floor_nxv4f64(<vscale x 4 x double> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI13_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
|
@ -372,12 +364,11 @@ define <vscale x 8 x double> @floor_nxv8f64(<vscale x 8 x double> %x) {
|
|||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
|
|
|
@ -2030,23 +2030,23 @@ define void @ceil_v8f16(<8 x half>* %x) {
|
|||
; CHECK-LABEL: ceil_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vle16.v v9, (a0)
|
||||
; CHECK-NEXT: vle16.v v10, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI94_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI94_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v9
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI94_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI94_1)(a1)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v10, v9, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
|
||||
; CHECK-NEXT: vse16.v v9, (a0)
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse16.v v10, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x half>, <8 x half>* %x
|
||||
%b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
|
||||
|
@ -2059,23 +2059,23 @@ define void @ceil_v4f32(<4 x float>* %x) {
|
|||
; CHECK-LABEL: ceil_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vle32.v v9, (a0)
|
||||
; CHECK-NEXT: vle32.v v10, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI95_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI95_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v9
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI95_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI95_1)(a1)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v10, v9, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
|
||||
; CHECK-NEXT: vse32.v v9, (a0)
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse32.v v10, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
|
||||
|
@ -2088,23 +2088,23 @@ define void @ceil_v2f64(<2 x double>* %x) {
|
|||
; CHECK-LABEL: ceil_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vle64.v v9, (a0)
|
||||
; CHECK-NEXT: vle64.v v10, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI96_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI96_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v9
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI96_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI96_1)(a1)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v10, v9, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vfadd.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
|
||||
; CHECK-NEXT: vse64.v v9, (a0)
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse64.v v10, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
|
||||
|
@ -2117,23 +2117,23 @@ define void @floor_v8f16(<8 x half>* %x) {
|
|||
; CHECK-LABEL: floor_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vle16.v v9, (a0)
|
||||
; CHECK-NEXT: vle16.v v10, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI97_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI97_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v9
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI97_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI97_1)(a1)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
|
||||
; CHECK-NEXT: vse16.v v9, (a0)
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse16.v v10, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x half>, <8 x half>* %x
|
||||
%b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
|
||||
|
@ -2146,23 +2146,23 @@ define void @floor_v4f32(<4 x float>* %x) {
|
|||
; CHECK-LABEL: floor_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vle32.v v9, (a0)
|
||||
; CHECK-NEXT: vle32.v v10, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI98_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI98_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v9
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI98_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI98_1)(a1)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
|
||||
; CHECK-NEXT: vse32.v v9, (a0)
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse32.v v10, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
|
||||
|
@ -2175,23 +2175,23 @@ define void @floor_v2f64(<2 x double>* %x) {
|
|||
; CHECK-LABEL: floor_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vle64.v v9, (a0)
|
||||
; CHECK-NEXT: vle64.v v10, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI99_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI99_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v9
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v9, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI99_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI99_1)(a1)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
|
||||
; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vfsub.vf v10, v10, ft0, v0.t
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v9, v10, v9, v0.t
|
||||
; CHECK-NEXT: vse64.v v9, (a0)
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse64.v v10, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
|
||||
|
|
Loading…
Reference in New Issue