diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 451303054f56..d4676b57455d 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3186,7 +3186,7 @@ let Predicates = prds in { /// sse2_fp_unop_p - SSE2 unops in vector forms. multiclass sse2_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def V#NAME#PDr : PDI opc, string OpcodeStr, SDNode OpNode, // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX]>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX, NoVLX]>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>; diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index 44eb14160ee1..b0cf4e3b29f6 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -635,10 +635,15 @@ declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { -; CHECK-LABEL: test_x86_avx_sqrt_pd_256: -; CHECK: # BB#0: -; CHECK-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; AVX-LABEL: test_x86_avx_sqrt_pd_256: +; AVX: # BB#0: +; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0] +; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] ret <4 x double> %res } @@ -646,10 +651,15 @@ declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { -; CHECK-LABEL: test_x86_avx_sqrt_ps_256: -; CHECK: # BB#0: -; CHECK-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; AVX-LABEL: test_x86_avx_sqrt_ps_256: +; AVX: # BB#0: +; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0] +; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] ret <8 x float> %res } diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll index f178e18a2596..5ba9f9a2645e 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll @@ -475,10 +475,15 @@ define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { ; SSE-NEXT: sqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x51,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse_sqrt_ps: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x51,0xc0] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX2-LABEL: test_x86_sse_sqrt_ps: +; AVX2: ## BB#0: +; AVX2-NEXT: vsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x51,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse_sqrt_ps: +; SKX: ## BB#0: +; SKX-NEXT: vsqrtps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index d4047faad9bb..dcccdbfc2e67 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -1592,10 +1592,15 @@ define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { ; SSE-NEXT: sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse2_sqrt_pd: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX2-LABEL: test_x86_sse2_sqrt_pd: +; AVX2: ## BB#0: +; AVX2-NEXT: vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_sqrt_pd: +; SKX: ## BB#0: +; SKX-NEXT: vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] ret <2 x double> %res }