From 31156bbdb9b3d27cf762eeec6a1b06db6d6c652c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 13 Jan 2019 02:59:59 +0000 Subject: [PATCH] [X86] Add more ISD nodes to handle masked versions of VCVT(T)PD2DQZ128/VCVT(T)PD2UDQZ128 which only produce 2 result elements and zeroes the upper elements. We can't represent this properly with vselect like we normally do. We also have to update the instruction definition to use a VK2WM mask instead of VK4WM to represent this. Fixes another case from PR34877 llvm-svn: 351018 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 5 + llvm/lib/Target/X86/X86ISelLowering.h | 4 + llvm/lib/Target/X86/X86InstrAVX512.td | 124 ++++++++++++++++++- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 13 ++ llvm/lib/Target/X86/X86IntrinsicsInfo.h | 18 +-- 5 files changed, 152 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e1e30ec5aaee..ed1672cd4e93 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -22059,6 +22059,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getMergeValues(Results, dl); } case CVTPD2PS_MASK: + case CVTPD2I_MASK: case TRUNCATE_TO_REG: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); @@ -27376,6 +27377,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; case X86ISD::CVTTP2SI: return "X86ISD::CVTTP2SI"; case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI"; + case X86ISD::MCVTTP2SI: return "X86ISD::MCVTTP2SI"; + case X86ISD::MCVTTP2UI: return "X86ISD::MCVTTP2UI"; case X86ISD::CVTTP2SI_RND: return "X86ISD::CVTTP2SI_RND"; case X86ISD::CVTTP2UI_RND: return "X86ISD::CVTTP2UI_RND"; case X86ISD::CVTTS2SI: return "X86ISD::CVTTS2SI"; @@ -27395,6 +27398,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND"; case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI"; case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI"; + case X86ISD::MCVTP2SI: return "X86ISD::MCVTP2SI"; + case X86ISD::MCVTP2UI: return "X86ISD::MCVTP2UI"; case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND"; case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND"; case X86ISD::CVTS2SI: return "X86ISD::CVTS2SI"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 4b4c2d94e6fb..4866cb2b82a6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -513,6 +513,10 @@ namespace llvm { // Vector signed/unsigned integer to float/double. CVTSI2P, CVTUI2P, + // Masked versions of above. Used for v2f64->v4f32. + // SRC, PASSTHRU, MASK + MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI, + // Save xmm argument registers to the stack, according to %al. An operator // is needed so that this can be expanded with control flow. VASTART_SAVE_XMM_REGS, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index e05669f9ef9d..7423cb85acd2 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8214,7 +8214,8 @@ multiclass avx512_cvttpd2dq opc, string OpcodeStr, SDNode OpNode, // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp, EVEX_V128; + null_frag, sched.XMM, "{1to2}", "{x}", f128mem, + VK2WM>, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; @@ -8243,8 +8244,9 @@ multiclass avx512_cvtpd2dq opc, string OpcodeStr, SDNode OpNode, // memory forms of these instructions in Asm Parcer. They have the same // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. - defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; @@ -8527,6 +8529,122 @@ let Predicates = [HasVLX] in { (VCVTTPD2UDQZ256rr VR256X:$src)>; def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))), (VCVTTPD2UDQZ256rm addr:$src)>; + + // Special patterns to allow use of X86mcvtp2Int for masking. Instruction + // patterns have been disabled with null_frag. + def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), + (VCVTPD2DQZ128rr VR128X:$src)>; + def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), + VK2WM:$mask), + (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; + def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; + + def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), + (VCVTPD2DQZ128rm addr:$src)>; + def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), + VK2WM:$mask), + (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; + + def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + (VCVTPD2DQZ128rmb addr:$src)>; + def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (v4i32 VR128X:$src0), VK2WM:$mask), + (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))), + v4i32x_info.ImmAllZerosV, VK2WM:$mask), + (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; + + // Special patterns to allow use of X86mcvttp2si for masking. Instruction + // patterns have been disabled with null_frag. + def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))), + (VCVTTPD2DQZ128rr VR128X:$src)>; + def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), + VK2WM:$mask), + (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; + def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; + + def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))), + (VCVTTPD2DQZ128rm addr:$src)>; + def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), + VK2WM:$mask), + (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; + + def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + (VCVTTPD2DQZ128rmb addr:$src)>; + def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (v4i32 VR128X:$src0), VK2WM:$mask), + (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))), + v4i32x_info.ImmAllZerosV, VK2WM:$mask), + (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; + + // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction + // patterns have been disabled with null_frag. + def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), + (VCVTPD2UDQZ128rr VR128X:$src)>; + def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), + VK2WM:$mask), + (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; + def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; + + def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), + (VCVTPD2UDQZ128rm addr:$src)>; + def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), + VK2WM:$mask), + (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; + + def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + (VCVTPD2UDQZ128rmb addr:$src)>; + def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (v4i32 VR128X:$src0), VK2WM:$mask), + (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))), + v4i32x_info.ImmAllZerosV, VK2WM:$mask), + (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; + + // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction + // patterns have been disabled with null_frag. + def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))), + (VCVTTPD2UDQZ128rr VR128X:$src)>; + def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), + VK2WM:$mask), + (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; + def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; + + def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))), + (VCVTTPD2UDQZ128rm addr:$src)>; + def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), + VK2WM:$mask), + (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; + + def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + (VCVTTPD2UDQZ128rmb addr:$src)>; + def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (v4i32 VR128X:$src0), VK2WM:$mask), + (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))), + v4i32x_info.ImmAllZerosV, VK2WM:$mask), + (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; } let Predicates = [HasDQI] in { diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 1a79ebec6207..47c54ed70406 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -590,6 +590,19 @@ def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>; def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>; +def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisFP<1>, + SDTCisSameSizeAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<1, 3>]>; + +def X86mcvtp2Int : SDNode<"X86ISD::MCVTP2SI", SDTMFloatToInt>; +def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>; +def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>; +def X86mcvttp2ui : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>; + + def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, i16>]> >; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 3959e35581f2..c9cfd3065935 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -32,7 +32,7 @@ enum IntrinsicType : uint16_t { IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK, COMPRESS_EXPAND_IN_REG, - TRUNCATE_TO_REG, CVTPS2PH_MASK, + TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2I_MASK, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, FIXUPIMMS_MASKZ, GATHER_AVX2, @@ -458,8 +458,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CONFLICT, 0), X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), //er - X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, INTR_TYPE_1OP_MASK, - X86ISD::CVTP2SI, 0), + X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2I_MASK, + X86ISD::CVTP2SI, X86ISD::MCVTP2SI), X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, CVTPD2PS_MASK, @@ -472,8 +472,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, INTR_TYPE_1OP_MASK, - X86ISD::CVTP2UI, 0), + X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, CVTPD2I_MASK, + X86ISD::CVTP2UI, X86ISD::MCVTP2UI), X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK, X86ISD::CVTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_512, INTR_TYPE_1OP_MASK, @@ -522,8 +522,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VFPROUNDS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VFPEXTS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK, - X86ISD::CVTTP2SI, 0), + X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2I_MASK, + X86ISD::CVTTP2SI, X86ISD::MCVTTP2SI), X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK, @@ -532,8 +532,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK, - X86ISD::CVTTP2UI, 0), + X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2I_MASK, + X86ISD::CVTTP2UI, X86ISD::MCVTTP2UI), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,