From 9a4a3af5ddf886383d69f745b80b4aeaa5ee6e95 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 6 Nov 2016 04:12:42 +0000 Subject: [PATCH] [AVX-512] Lower SSE/AVX cvtdq2ps intrinsics directly to ISD::SINT_TO_FP so they can use EVEX instructions when available. llvm-svn: 286056 --- llvm/lib/Target/X86/X86InstrSSE.td | 18 -------------- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 2 ++ llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 26 ++++++++++++++------ llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 13 +++++++--- 4 files changed, 29 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 2a2539e473c6..dbcf0d736c77 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2031,13 +2031,6 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), - (VCVTDQ2PSrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))), - (VCVTDQ2PSrm addr:$src)>; -} - let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), (VCVTDQ2PSrr VR128:$src)>; @@ -2066,11 +2059,6 @@ let Predicates = [UseSSE2] in { def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), (CVTDQ2PSrm addr:$src)>; - def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), - (CVTDQ2PSrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), - (CVTDQ2PSrm addr:$src)>; - def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (CVTTPS2DQrr VR128:$src)>; def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), @@ -2263,12 +2251,6 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // AVX 256-bit register conversion intrinsics // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below // whenever possible to avoid declaring two versions of each one. -let Predicates = [HasAVX] in { - def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), - (VCVTDQ2PSYrr VR256:$src)>; - def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))), - (VCVTDQ2PSYrm addr:$src)>; -} let Predicates = [HasAVX, NoVLX] in { // Match fpround and fpextend for 128/256-bit conversions diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index a2fffe89df68..75654717c715 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -251,6 +251,7 @@ static const IntrinsicData* getIntrinsicWithChain(uint16_t IntNo) { * the alphabetical order. */ static const IntrinsicData IntrinsicsWithoutChain[] = { + X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0), @@ -1749,6 +1750,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_comile_sd, COMI, X86ISD::COMI, ISD::SETLE), X86_INTRINSIC_DATA(sse2_comilt_sd, COMI, X86ISD::COMI, ISD::SETLT), X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE), + X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0), X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0), diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index 466abf146fa3..a6835ad65e2a 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -230,10 +230,15 @@ declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readn define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { -; CHECK-LABEL: test_x86_sse2_cvtdq2ps: -; CHECK: ## BB#0: -; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0] -; CHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_sse2_cvtdq2ps: +; AVX: ## BB#0: +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0] +; AVX-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_sse2_cvtdq2ps: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5b,0xc0] +; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -2856,10 +2861,15 @@ declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { -; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0x5b,0xc0] -; CHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_avx_cvtdq2_ps_256: +; AVX: ## BB#0: +; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0x5b,0xc0] +; AVX-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5b,0xc0] +; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] ret <8 x float> %res } diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index e95b449ae440..79453011f9fe 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -231,10 +231,15 @@ define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ## encoding: [0x0f,0x5b,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse2_cvtdq2ps: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX2-LABEL: test_x86_sse2_cvtdq2ps: +; AVX2: ## BB#0: +; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_cvtdq2ps: +; SKX: ## BB#0: +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5b,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res }