From 2d306b2d57f22cc94cca3f83ee8b0d574f0a9579 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 2 Jul 2019 05:53:37 +0000 Subject: [PATCH] [X86] Add PreprocessISelDAG support for turning ISD::FP_TO_SINT/UINT into X86ISD::CVTTP2SI/CVTTP2UI and to reduce the number of isel patterns. llvm-svn: 364887 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 21 ++++ llvm/lib/Target/X86/X86InstrAVX512.td | 122 ++---------------------- llvm/lib/Target/X86/X86InstrSSE.td | 18 ---- 3 files changed, 30 insertions(+), 131 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 2a4477be43c1..ccda8babf514 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -792,6 +792,27 @@ void X86DAGToDAGISel::PreprocessISelDAG() { } switch (N->getOpcode()) { + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + // Replace vector fp_to_s/uint with their X86 specific equivalent so we + // don't need 2 sets of patterns. + if (!N->getSimpleValueType(0).isVector()) + break; + + unsigned NewOpc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break; + case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break; + } + SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), + N->getOperand(0)); + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); + ++I; + CurDAG->DeleteNode(N); + continue; + } case ISD::SHL: case ISD::SRA: case ISD::SRL: { diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1ec34e3db02d..2cdcb1e1f0e5 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8173,59 +8173,7 @@ defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, EVEX_CD8<64, CD8VF>; -let Predicates = [HasAVX512] in { - def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))), - (VCVTTPS2DQZrr VR512:$src)>; - def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))), - (VCVTTPS2DQZrm addr:$src)>; - - def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))), - (VCVTTPS2UDQZrr VR512:$src)>; - def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))), - (VCVTTPS2UDQZrm addr:$src)>; - - def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))), - (VCVTTPD2DQZrr VR512:$src)>; - def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))), - (VCVTTPD2DQZrm addr:$src)>; - - def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))), - (VCVTTPD2UDQZrr VR512:$src)>; - def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))), - (VCVTTPD2UDQZrm addr:$src)>; -} - let Predicates = [HasVLX] in { - def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))), - (VCVTTPS2DQZ128rr VR128X:$src)>; - def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), - (VCVTTPS2DQZ128rm addr:$src)>; - - def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))), - (VCVTTPS2UDQZ128rr VR128X:$src)>; - def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))), - (VCVTTPS2UDQZ128rm addr:$src)>; - - def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))), - (VCVTTPS2DQZ256rr VR256X:$src)>; - def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), - (VCVTTPS2DQZ256rm addr:$src)>; - - def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))), - (VCVTTPS2UDQZ256rr VR256X:$src)>; - def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))), - (VCVTTPS2UDQZ256rm addr:$src)>; - - def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))), - (VCVTTPD2DQZ256rr VR256X:$src)>; - def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), - (VCVTTPD2DQZ256rm addr:$src)>; - - def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))), - (VCVTTPD2UDQZ256rr VR256X:$src)>; - def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))), - (VCVTTPD2UDQZ256rm addr:$src)>; - // Special patterns to allow use of X86mcvtp2Int for masking. Instruction // patterns have been disabled with null_frag. def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), @@ -8343,28 +8291,6 @@ let Predicates = [HasVLX] in { (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; } -let Predicates = [HasDQI] in { - def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))), - (VCVTTPS2QQZrr VR256X:$src)>; - def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))), - (VCVTTPS2QQZrm addr:$src)>; - - def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))), - (VCVTTPS2UQQZrr VR256X:$src)>; - def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))), - (VCVTTPS2UQQZrm addr:$src)>; - - def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))), - (VCVTTPD2QQZrr VR512:$src)>; - def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))), - (VCVTTPD2QQZrm addr:$src)>; - - def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))), - (VCVTTPD2UQQZrr VR512:$src)>; - def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))), - (VCVTTPD2UQQZrm addr:$src)>; -} - let Predicates = [HasDQI, HasVLX] in { def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src))))), (VCVTPS2QQZ128rm addr:$src)>; @@ -8409,50 +8335,20 @@ let Predicates = [HasDQI, HasVLX] in { (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))), v2i64x_info.ImmAllZerosV)), (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; - - def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))), - (VCVTTPS2QQZ256rr VR128X:$src)>; - def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))), - (VCVTTPS2QQZ256rm addr:$src)>; - - def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))), - (VCVTTPS2UQQZ256rr VR128X:$src)>; - def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))), - (VCVTTPS2UQQZ256rm addr:$src)>; - - def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))), - (VCVTTPD2QQZ128rr VR128X:$src)>; - def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))), - (VCVTTPD2QQZ128rm addr:$src)>; - - def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))), - (VCVTTPD2UQQZ128rr VR128X:$src)>; - def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))), - (VCVTTPD2UQQZ128rm addr:$src)>; - - def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))), - (VCVTTPD2QQZ256rr VR256X:$src)>; - def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))), - (VCVTTPD2QQZ256rm addr:$src)>; - - def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))), - (VCVTTPD2UQQZ256rr VR256X:$src)>; - def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))), - (VCVTTPD2UQQZ256rm addr:$src)>; } let Predicates = [HasAVX512, NoVLX] in { -def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), +def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; -def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), +def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; -def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))), +def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_xmm)>; @@ -8563,32 +8459,32 @@ let Predicates = [HasDQI, HasVLX] in { } let Predicates = [HasDQI, NoVLX] in { -def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))), +def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; -def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))), +def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_ymm)>; -def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))), +def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; -def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))), +def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; -def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))), +def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_ymm)>; -def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))), +def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 133161864900..c79cf7ade887 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1480,17 +1480,6 @@ def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src) Sched<[WriteCvtPS2IYLd]>, VEX_WIG; } -let Predicates = [HasAVX, NoVLX] in { - def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (VCVTTPS2DQrr VR128:$src)>; - def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), - (VCVTTPS2DQrm addr:$src)>; - def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), - (VCVTTPS2DQYrr VR256:$src)>; - def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), - (VCVTTPS2DQYrm addr:$src)>; -} - def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -1502,13 +1491,6 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>, Sched<[WriteCvtPS2ILd]>; -let Predicates = [UseSSE2] in { - def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (CVTTPS2DQrr VR128:$src)>; - def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), - (CVTTPS2DQrm addr:$src)>; -} - // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly.