[AVX-512] Split X86ISD::VFPROUND and X86ISD::VFPEXT into separate opcodes for each type constraint.

This revealed that scalar intrinsics could create nodes with a rounding mode of FROUND_CUR_DIRECTION, but the patterns didn't check for it. It just worked because isel doesn't check operand count and we had a pattern without the rounding mode argument at all.

llvm-svn: 282231
This commit is contained in:
Craig Topper 2016-09-23 06:24:43 +00:00
parent 3174b6e467
commit a02e394872
5 changed files with 24 additions and 29 deletions

View File

@ -22514,7 +22514,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
case X86ISD::VINSERT: return "X86ISD::VINSERT"; case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD"; case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD"; case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK"; case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";

View File

@ -297,10 +297,10 @@ namespace llvm {
VTRUNCUS, VTRUNCS, VTRUNCUS, VTRUNCS,
// Vector FP extend. // Vector FP extend.
VFPEXT, VFPEXT, VFPEXT_RND, VFPEXTS_RND,
// Vector FP round. // Vector FP round.
VFPROUND, VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
// Vector signed/unsigned integer to double. // Vector signed/unsigned integer to double.
CVTDQ2PD, CVTUDQ2PD, CVTDQ2PD, CVTUDQ2PD,

View File

@ -5725,14 +5725,16 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2", "$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1), (_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2)))>, (_Src.VT _Src.RC:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr, (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2", "$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1), (_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT (scalar_to_vector (_Src.VT (scalar_to_vector
(_Src.ScalarLdFrag addr:$src2)))))>, (_Src.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>; EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
} }
@ -5759,29 +5761,29 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
EVEX_B, EVEX_RC; EVEX_B, EVEX_RC;
} }
multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
SDNode OpNodeRnd, X86VectorVTInfo _src, SDNode OpNodeRnd, X86VectorVTInfo _src,
X86VectorVTInfo _dst> { X86VectorVTInfo _dst> {
let Predicates = [HasAVX512] in { let Predicates = [HasAVX512] in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>, defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>, OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>,
EVEX_V512, XD; EVEX_V512, XD;
} }
} }
multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
SDNode OpNodeRnd, X86VectorVTInfo _src, SDNode OpNodeRnd, X86VectorVTInfo _src,
X86VectorVTInfo _dst> { X86VectorVTInfo _dst> {
let Predicates = [HasAVX512] in { let Predicates = [HasAVX512] in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>, defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>, avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
EVEX_CD8<32, CD8VT1>, XS, EVEX_V512; EVEX_CD8<32, CD8VT1>, XS, EVEX_V512;
} }
} }
defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround, defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
X86froundRnd, f64x_info, f32x_info>; X86froundRnd, f64x_info, f32x_info>;
defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext, defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
X86fpextRnd,f32x_info, f64x_info >; X86fpextRnd,f32x_info, f64x_info >;
def : Pat<(f64 (fpextend FR32X:$src)), def : Pat<(f64 (fpextend FR32X:$src)),

View File

@ -143,25 +143,14 @@ def X86vfpround: SDNode<"X86ISD::VFPROUND",
SDTCVecEltisVT<1, f64>, SDTCVecEltisVT<1, f64>,
SDTCisSameSizeAs<0, 1>]>>; SDTCisSameSizeAs<0, 1>]>>;
def X86fround: SDNode<"X86ISD::VFPROUND", def X86froundRnd: SDNode<"X86ISD::VFPROUNDS_RND",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
SDTCisSameAs<0, 1>,
SDTCVecEltisVT<2, f64>,
SDTCisSameSizeAs<0, 2>]>>;
def X86froundRnd: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>, SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 1>,
SDTCVecEltisVT<2, f64>, SDTCVecEltisVT<2, f64>,
SDTCisSameSizeAs<0, 2>, SDTCisSameSizeAs<0, 2>,
SDTCisVT<3, i32>]>>; SDTCisVT<3, i32>]>>;
def X86fpext : SDNode<"X86ISD::VFPEXT", def X86fpextRnd : SDNode<"X86ISD::VFPEXTS_RND",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
SDTCisSameAs<0, 1>,
SDTCVecEltisVT<2, f32>,
SDTCisSameSizeAs<0, 2>]>>;
def X86fpextRnd : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f64>, SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f64>,
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 1>,
SDTCVecEltisVT<2, f32>, SDTCVecEltisVT<2, f32>,
@ -567,12 +556,12 @@ def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>, SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, f32>, SDTCVecEltisVT<1, f32>,
SDTCisVT<2, i32>]> >; SDTCisVT<2, i32>]> >;
def X86vfpextRnd : SDNode<"X86ISD::VFPEXT", def X86vfpextRnd : SDNode<"X86ISD::VFPEXT_RND",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>, SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
SDTCVecEltisVT<1, f32>, SDTCVecEltisVT<1, f32>,
SDTCisOpSmallerThanOp<1, 0>, SDTCisOpSmallerThanOp<1, 0>,
SDTCisVT<2, i32>]>>; SDTCisVT<2, i32>]>>;
def X86vfproundRnd: SDNode<"X86ISD::VFPROUND", def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>, SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, f64>, SDTCVecEltisVT<1, f64>,
SDTCisOpSmallerThanOp<0, 1>, SDTCisOpSmallerThanOp<0, 1>,

View File

@ -502,7 +502,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS, X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS,
ISD::FP_ROUND, 0), ISD::FP_ROUND, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS, X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS,
ISD::FP_ROUND, X86ISD::VFPROUND), ISD::FP_ROUND, X86ISD::VFPROUND_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, 0), X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK,
@ -532,7 +532,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_256, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_256, INTR_TYPE_1OP_MASK,
ISD::FP_EXTEND, 0), ISD::FP_EXTEND, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK,
ISD::FP_EXTEND, X86ISD::VFPEXT), ISD::FP_EXTEND, X86ISD::VFPEXT_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_128, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, 0), X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_256, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_256, INTR_TYPE_1OP_MASK,
@ -564,9 +564,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM, X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::VFPROUND, 0), X86ISD::VFPROUNDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM, X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::VFPEXT, 0), X86ISD::VFPEXTS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
ISD::FP_TO_SINT, 0), ISD::FP_TO_SINT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK,