forked from OSchip/llvm-project
[SVE][CodeGen] Lower floating point -> integer conversions
This patch adds new ISD nodes, FCVTZS_MERGE_PASSTHRU & FCVTZU_MERGE_PASSTHRU, which are used to lower scalable vector FP_TO_SINT/FP_TO_UINT operations and the following intrinsics: - llvm.aarch64.sve.fcvtzu - llvm.aarch64.sve.fcvtzs Reviewed By: efriedma, paulwalker-arm Differential Revision: https://reviews.llvm.org/D87232
This commit is contained in:
parent
279943edf8
commit
f7185b271f
|
@ -145,6 +145,8 @@ static bool isMergePassthruOpcode(unsigned Opc) {
|
|||
case AArch64ISD::FROUND_MERGE_PASSTHRU:
|
||||
case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
|
||||
case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
|
||||
case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
|
||||
case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
|
||||
case AArch64ISD::FSQRT_MERGE_PASSTHRU:
|
||||
return true;
|
||||
}
|
||||
|
@ -945,6 +947,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
|
||||
if (isTypeLegal(VT)) {
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
|
||||
setOperationAction(ISD::MUL, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
|
@ -1504,6 +1508,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
|
||||
MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
|
||||
MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
|
||||
MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
|
||||
MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
|
||||
MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
|
||||
MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
|
||||
MAKE_CASE(AArch64ISD::ADC)
|
||||
|
@ -2870,6 +2876,14 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
|
|||
// in the cost tables.
|
||||
EVT InVT = Op.getOperand(0).getValueType();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
if (VT.isScalableVector()) {
|
||||
unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
|
||||
? AArch64ISD::FCVTZU_MERGE_PASSTHRU
|
||||
: AArch64ISD::FCVTZS_MERGE_PASSTHRU;
|
||||
return LowerToPredicatedOp(Op, DAG, Opcode);
|
||||
}
|
||||
|
||||
unsigned NumElts = InVT.getVectorNumElements();
|
||||
|
||||
// f16 conversions are promoted to f32 when full fp16 is not supported.
|
||||
|
@ -3388,6 +3402,14 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
case Intrinsic::aarch64_sve_frintz:
|
||||
return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
|
||||
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
|
||||
case Intrinsic::aarch64_sve_fcvtzu:
|
||||
return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
|
||||
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
|
||||
Op.getOperand(1));
|
||||
case Intrinsic::aarch64_sve_fcvtzs:
|
||||
return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
|
||||
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
|
||||
Op.getOperand(1));
|
||||
case Intrinsic::aarch64_sve_fsqrt:
|
||||
return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
|
||||
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
|
||||
|
|
|
@ -104,6 +104,8 @@ enum NodeType : unsigned {
|
|||
FROUNDEVEN_MERGE_PASSTHRU,
|
||||
FSQRT_MERGE_PASSTHRU,
|
||||
FTRUNC_MERGE_PASSTHRU,
|
||||
FCVTZU_MERGE_PASSTHRU,
|
||||
FCVTZS_MERGE_PASSTHRU,
|
||||
SIGN_EXTEND_INREG_MERGE_PASSTHRU,
|
||||
ZERO_EXTEND_INREG_MERGE_PASSTHRU,
|
||||
|
||||
|
|
|
@ -211,6 +211,14 @@ def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch
|
|||
def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>;
|
||||
def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>;
|
||||
|
||||
def SDT_AArch64FCVT : SDTypeProfile<1, 3, [
|
||||
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
|
||||
SDTCVecEltisVT<1,i1>
|
||||
]>;
|
||||
|
||||
def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>;
|
||||
def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>;
|
||||
|
||||
def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
|
||||
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
|
||||
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
|
||||
|
@ -1388,40 +1396,40 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
|||
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
|
||||
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
|
||||
|
||||
defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv4i1, nxv4f32, ElementSizeS>;
|
||||
defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv4i1, nxv8f16, ElementSizeS>;
|
||||
defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
|
||||
defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
|
||||
defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
|
||||
defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
|
||||
defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
|
||||
defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
|
||||
defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
|
||||
defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
|
||||
defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv2i1, nxv8f16, ElementSizeD>;
|
||||
defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, nxv2f64, nxv2i1, nxv4f32, ElementSizeD>;
|
||||
defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
|
||||
defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
|
||||
defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>;
|
||||
defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>;
|
||||
defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;
|
||||
defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;
|
||||
defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
|
||||
defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;
|
||||
defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
|
||||
defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;
|
||||
defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, null_frag, nxv8f16, nxv4i1, nxv4f32, ElementSizeS>;
|
||||
defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, null_frag, nxv4f32, nxv4i1, nxv8f16, ElementSizeS>;
|
||||
defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, null_frag, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
|
||||
defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, null_frag, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
|
||||
defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, null_frag, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
|
||||
defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, null_frag, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
|
||||
defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
|
||||
defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
|
||||
defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
|
||||
defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
|
||||
defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, null_frag, nxv8f16, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, null_frag, nxv2f64, nxv2i1, nxv8f16, ElementSizeD>;
|
||||
defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, null_frag, nxv4f32, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, null_frag, nxv2f64, nxv2i1, nxv4f32, ElementSizeD>;
|
||||
defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
|
||||
defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
|
||||
defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, null_frag, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>;
|
||||
defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, null_frag, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, null_frag, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>;
|
||||
defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, null_frag, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, null_frag, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, null_frag, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, null_frag, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, null_frag, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
|
||||
defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
|
||||
defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
|
||||
defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
|
||||
defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
|
||||
defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
|
||||
defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
|
||||
defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
|
||||
|
||||
defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", null_frag, AArch64frintn_mt>;
|
||||
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", null_frag, AArch64frintp_mt>;
|
||||
|
|
|
@ -2279,11 +2279,20 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
|
|||
multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
|
||||
RegisterOperand i_zprtype,
|
||||
RegisterOperand o_zprtype,
|
||||
SDPatternOperator op, ValueType vt1,
|
||||
SDPatternOperator int_op,
|
||||
SDPatternOperator ir_op, ValueType vt1,
|
||||
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
|
||||
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
|
||||
|
||||
def : SVE_3_Op_Pat<vt1, op, vt1, vt2, vt3, !cast<Instruction>(NAME)>;
|
||||
// convert vt3 to a packed type for the intrinsic patterns
|
||||
defvar packedvt3 = !cond(!eq(!cast<string>(vt3), "nxv2f16"): nxv8f16,
|
||||
!eq(!cast<string>(vt3), "nxv4f16"): nxv8f16,
|
||||
!eq(!cast<string>(vt3), "nxv2f32"): nxv4f32,
|
||||
1 : vt3);
|
||||
|
||||
def : SVE_3_Op_Pat<vt1, int_op, vt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
|
||||
|
||||
def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
|
||||
}
|
||||
|
||||
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op_merge,
|
||||
|
|
|
@ -0,0 +1,296 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
;
|
||||
; FP_TO_SINT
|
||||
;
|
||||
|
||||
define <vscale x 2 x i16> @fcvtzs_h_nxv2f16(<vscale x 2 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzs_h_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 2 x half> %a to <vscale x 2 x i16>
|
||||
ret <vscale x 2 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i16> @fcvtzs_h_nxv2f32(<vscale x 2 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzs_h_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 2 x float> %a to <vscale x 2 x i16>
|
||||
ret <vscale x 2 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i16> @fcvtzs_h_nxv2f64(<vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fcvtzs_h_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 2 x double> %a to <vscale x 2 x i16>
|
||||
ret <vscale x 2 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i16> @fcvtzs_h_nxv4f16(<vscale x 4 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzs_h_nxv4f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 4 x half> %a to <vscale x 4 x i16>
|
||||
ret <vscale x 4 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i16> @fcvtzs_h_nxv4f32(<vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzs_h_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i16>
|
||||
ret <vscale x 4 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @fcvtzs_h_nxv8f16(<vscale x 8 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzs_h_nxv8f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 8 x half> %a to <vscale x 8 x i16>
|
||||
ret <vscale x 8 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @fcvtzs_s_nxv2f16(<vscale x 2 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzs_s_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 2 x half> %a to <vscale x 2 x i32>
|
||||
ret <vscale x 2 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @fcvtzs_s_nxv2f32(<vscale x 2 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzs_s_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 2 x float> %a to <vscale x 2 x i32>
|
||||
ret <vscale x 2 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @fcvtzs_s_nxv2f64(<vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fcvtzs_s_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 2 x double> %a to <vscale x 2 x i32>
|
||||
ret <vscale x 2 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @fcvtzs_s_nxv4f16(<vscale x 4 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzs_s_nxv4f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 4 x half> %a to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @fcvtzs_s_nxv4f32(<vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzs_s_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fcvtzs_d_nxv2f16(<vscale x 2 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzs_d_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 2 x half> %a to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fcvtzs_d_nxv2f32(<vscale x 2 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzs_d_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 2 x float> %a to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fcvtzs_d_nxv2f64(<vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fcvtzs_d_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 2 x double> %a to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
;
|
||||
; FP_TO_UINT
|
||||
;
|
||||
|
||||
; NOTE: Using fcvtzs is safe as fptoui overflow is considered poison and a
|
||||
; 64bit signed value encompasses the entire range of a 16bit unsigned value
|
||||
define <vscale x 2 x i16> @fcvtzu_h_nxv2f16(<vscale x 2 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzu_h_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 2 x half> %a to <vscale x 2 x i16>
|
||||
ret <vscale x 2 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i16> @fcvtzu_h_nxv2f32(<vscale x 2 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzu_h_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 2 x float> %a to <vscale x 2 x i16>
|
||||
ret <vscale x 2 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i16> @fcvtzu_h_nxv2f64(<vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fcvtzu_h_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 2 x double> %a to <vscale x 2 x i16>
|
||||
ret <vscale x 2 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i16> @fcvtzu_h_nxv4f16(<vscale x 4 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzu_h_nxv4f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 4 x half> %a to <vscale x 4 x i16>
|
||||
ret <vscale x 4 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i16> @fcvtzu_h_nxv4f32(<vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzu_h_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i16>
|
||||
ret <vscale x 4 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @fcvtzu_h_nxv8f16(<vscale x 8 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzu_h_nxv8f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 8 x half> %a to <vscale x 8 x i16>
|
||||
ret <vscale x 8 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @fcvtzu_s_nxv2f16(<vscale x 2 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzu_s_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 2 x half> %a to <vscale x 2 x i32>
|
||||
ret <vscale x 2 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @fcvtzu_s_nxv2f32(<vscale x 2 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzu_s_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 2 x float> %a to <vscale x 2 x i32>
|
||||
ret <vscale x 2 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @fcvtzu_s_nxv2f64(<vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fcvtzu_s_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 2 x double> %a to <vscale x 2 x i32>
|
||||
ret <vscale x 2 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @fcvtzu_s_nxv4f16(<vscale x 4 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzu_s_nxv4f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 4 x half> %a to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @fcvtzu_s_nxv4f32(<vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzu_s_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fcvtzu_d_nxv2f16(<vscale x 2 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzu_d_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 2 x half> %a to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fcvtzu_d_nxv2f32(<vscale x 2 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzu_d_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 2 x float> %a to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fcvtzu_d_nxv2f64(<vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fcvtzu_d_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 2 x double> %a to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
; FP_TO_SINT
|
||||
|
||||
; Split operand
|
||||
define <vscale x 4 x i32> @fcvtzs_s_nxv4f64(<vscale x 4 x double> %a) {
|
||||
; CHECK-LABEL: fcvtzs_s_nxv4f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 4 x double> %a to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @fcvtzs_h_nxv8f64(<vscale x 8 x double> %a) {
|
||||
; CHECK-LABEL: fcvtzs_h_nxv8f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
|
||||
; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
|
||||
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
||||
; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 8 x double> %a to <vscale x 8 x i16>
|
||||
ret <vscale x 8 x i16> %res
|
||||
}
|
||||
|
||||
; Split result
|
||||
define <vscale x 4 x i64> @fcvtzs_d_nxv4f32(<vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzs_d_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uunpklo z1.d, z0.s
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: uunpkhi z2.d, z0.s
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
|
||||
; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>
|
||||
ret <vscale x 4 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @fcvtzs_s_nxv16f16(<vscale x 16 x half> %a) {
|
||||
; CHECK-LABEL: fcvtzs_s_nxv16f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uunpklo z2.s, z0.h
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: uunpkhi z3.s, z0.h
|
||||
; CHECK-NEXT: uunpklo z4.s, z1.h
|
||||
; CHECK-NEXT: uunpkhi z5.s, z1.h
|
||||
; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h
|
||||
; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h
|
||||
; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h
|
||||
; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptosi <vscale x 16 x half> %a to <vscale x 16 x i32>
|
||||
ret <vscale x 16 x i32> %res
|
||||
}
|
||||
|
||||
; FP_TO_UINT
|
||||
|
||||
; Split operand
|
||||
define <vscale x 4 x i32> @fcvtzu_s_nxv4f64(<vscale x 4 x double> %a) {
|
||||
; CHECK-LABEL: fcvtzu_s_nxv4f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
|
||||
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 4 x double> %a to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
; Split result
|
||||
define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fcvtzu_d_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uunpklo z1.d, z0.s
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: uunpkhi z2.d, z0.s
|
||||
; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s
|
||||
; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
|
||||
ret <vscale x 4 x i64> %res
|
||||
}
|
Loading…
Reference in New Issue