[X86] Add avx512f and avx512dq+vl command lines to the vector strictfp int<->fp tests.

This commit is contained in:
Craig Topper 2019-12-25 23:34:32 -08:00
parent 287307a0c6
commit de60c2633b
4 changed files with 1290 additions and 14 deletions

View File

@ -3,10 +3,14 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-64
declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
@ -118,6 +122,40 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512F-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: andl $-8, %esp
; AVX512F-32-NEXT: subl $16, %esp
; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vmovhps %xmm0, (%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: movl %ebp, %esp
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
;
; AVX512F-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm0
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
@ -160,6 +198,11 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i64> %ret
@ -346,6 +389,62 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512F-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: andl $-8, %esp
; AVX512F-32-NEXT: subl $16, %esp
; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512F-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; AVX512F-32-NEXT: xorl %eax, %eax
; AVX512F-32-NEXT: vcomisd %xmm2, %xmm1
; AVX512F-32-NEXT: setb %cl
; AVX512F-32-NEXT: kmovw %ecx, %k1
; AVX512F-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; AVX512F-32-NEXT: vmovapd %xmm2, %xmm4
; AVX512F-32-NEXT: vmovsd %xmm3, %xmm4, %xmm4 {%k1}
; AVX512F-32-NEXT: vsubsd %xmm4, %xmm1, %xmm1
; AVX512F-32-NEXT: vmovsd %xmm1, (%esp)
; AVX512F-32-NEXT: fldl (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
; AVX512F-32-NEXT: setae %al
; AVX512F-32-NEXT: shll $31, %eax
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: xorl %ecx, %ecx
; AVX512F-32-NEXT: vcomisd %xmm2, %xmm0
; AVX512F-32-NEXT: setb %dl
; AVX512F-32-NEXT: kmovw %edx, %k1
; AVX512F-32-NEXT: vmovsd %xmm3, %xmm2, %xmm2 {%k1}
; AVX512F-32-NEXT: vsubsd %xmm2, %xmm0, %xmm0
; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: setae %cl
; AVX512F-32-NEXT: shll $31, %ecx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
; AVX512F-32-NEXT: movl %ebp, %esp
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
;
; AVX512F-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm0
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
@ -410,6 +509,11 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i64> %ret
@ -497,6 +601,40 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: andl $-8, %esp
; AVX512F-32-NEXT: subl $16, %esp
; AVX512F-32-NEXT: vmovd %xmm0, (%esp)
; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: movl %ebp, %esp
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
;
; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm0
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
@ -554,6 +692,11 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX512DQ-64-NEXT: vmovq %rax, %xmm0
; AVX512DQ-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512DQ-64-NEXT: retq
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i64> %ret
@ -740,6 +883,62 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: andl $-8, %esp
; AVX512F-32-NEXT: subl $16, %esp
; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512F-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: xorl %eax, %eax
; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1
; AVX512F-32-NEXT: setb %cl
; AVX512F-32-NEXT: kmovw %ecx, %k1
; AVX512F-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
; AVX512F-32-NEXT: vmovaps %xmm2, %xmm4
; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
; AVX512F-32-NEXT: vsubss %xmm4, %xmm1, %xmm1
; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: setae %al
; AVX512F-32-NEXT: shll $31, %eax
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: xorl %ecx, %ecx
; AVX512F-32-NEXT: vcomiss %xmm2, %xmm0
; AVX512F-32-NEXT: setb %dl
; AVX512F-32-NEXT: kmovw %edx, %k1
; AVX512F-32-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
; AVX512F-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
; AVX512F-32-NEXT: vmovss %xmm0, (%esp)
; AVX512F-32-NEXT: flds (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
; AVX512F-32-NEXT: setae %cl
; AVX512F-32-NEXT: shll $31, %ecx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
; AVX512F-32-NEXT: movl %ebp, %esp
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
;
; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm0
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
@ -819,6 +1018,11 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX512DQ-64-NEXT: vmovq %rax, %xmm0
; AVX512DQ-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512DQ-64-NEXT: retq
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i64> %ret
@ -840,6 +1044,11 @@ define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 {
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
@ -849,6 +1058,11 @@ define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 {
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i32> %ret
@ -932,6 +1146,15 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
; AVX-64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512F-NEXT: vcvttsd2usi %xmm1, %eax
; AVX512F-NEXT: vcvttsd2usi %xmm0, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm0
; AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
@ -945,6 +1168,11 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i32> %ret
@ -982,6 +1210,15 @@ define <2 x i32> @strict_vector_fptosi_v2f32_to_v2i32(<2 x float> %a) #0 {
; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512F-NEXT: vcvttss2si %xmm1, %eax
; AVX512F-NEXT: vcvttss2si %xmm0, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm0
; AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
@ -999,6 +1236,15 @@ define <2 x i32> @strict_vector_fptosi_v2f32_to_v2i32(<2 x float> %a) #0 {
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %ecx
; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
; AVX512VLDQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i32> %ret
@ -1082,6 +1328,15 @@ define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
; AVX-64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512F-NEXT: vcvttss2usi %xmm1, %eax
; AVX512F-NEXT: vcvttss2usi %xmm0, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm0
; AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
@ -1099,6 +1354,15 @@ define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VLDQ-NEXT: vcvttss2usi %xmm1, %eax
; AVX512VLDQ-NEXT: vcvttss2usi %xmm0, %ecx
; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
; AVX512VLDQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i32> %ret
@ -1123,6 +1387,12 @@ define <2 x i16> @strict_vector_fptosi_v2f64_to_v2i16(<2 x double> %a) #0 {
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
@ -1134,6 +1404,12 @@ define <2 x i16> @strict_vector_fptosi_v2f64_to_v2i16(<2 x double> %a) #0 {
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i16> %ret
@ -1158,6 +1434,12 @@ define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 {
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
@ -1169,6 +1451,12 @@ define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 {
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i16> %ret
@ -1202,6 +1490,15 @@ define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 {
; AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512F-NEXT: vcvttss2si %xmm1, %eax
; AVX512F-NEXT: vcvttss2si %xmm0, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm0
; AVX512F-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
@ -1219,6 +1516,15 @@ define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 {
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %ecx
; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
; AVX512VLDQ-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i16> %ret
@ -1252,6 +1558,15 @@ define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 {
; AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512F-NEXT: vcvttss2si %xmm1, %eax
; AVX512F-NEXT: vcvttss2si %xmm0, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm0
; AVX512F-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
@ -1269,6 +1584,15 @@ define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 {
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %ecx
; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
; AVX512VLDQ-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i16> %ret
@ -1297,6 +1621,12 @@ define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
@ -1308,6 +1638,12 @@ define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i8> %ret
@ -1336,6 +1672,12 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
@ -1347,6 +1689,12 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i8> %ret
@ -1384,6 +1732,15 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
; AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512F-NEXT: vcvttss2si %xmm1, %eax
; AVX512F-NEXT: vcvttss2si %xmm0, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm0
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
@ -1401,6 +1758,15 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %ecx
; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
; AVX512VLDQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i8> %ret
@ -1438,6 +1804,15 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512F-NEXT: vcvttss2si %xmm1, %eax
; AVX512F-NEXT: vcvttss2si %xmm0, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm0
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
@ -1455,6 +1830,15 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %ecx
; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
; AVX512VLDQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i8> %ret
@ -1541,6 +1925,16 @@ define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
@ -1559,6 +1953,14 @@ define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i1> %ret
@ -1745,6 +2147,17 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovaps %xmm0, %xmm0
; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
@ -1764,6 +2177,14 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i1> %ret
@ -1851,6 +2272,21 @@ define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttss2si %xmm0, %eax
; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: kmovw %eax, %k0
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512F-NEXT: vcvttss2si %xmm0, %eax
; AVX512F-NEXT: kmovw %eax, %k1
; AVX512F-NEXT: kshiftlw $1, %k1, %k1
; AVX512F-NEXT: korw %k1, %k0, %k1
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttss2si %xmm0, %eax
@ -1880,6 +2316,20 @@ define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
; AVX512VLDQ-NEXT: kmovw %eax, %k0
; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0
; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax
; AVX512VLDQ-NEXT: kmovw %eax, %k1
; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1
; AVX512VLDQ-NEXT: kshiftrb $7, %k1, %k1
; AVX512VLDQ-NEXT: korw %k0, %k1, %k0
; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i1> %ret
@ -2066,6 +2516,21 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttss2si %xmm0, %eax
; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: kmovw %eax, %k0
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512F-NEXT: vcvttss2si %xmm0, %eax
; AVX512F-NEXT: kmovw %eax, %k1
; AVX512F-NEXT: kshiftlw $1, %k1, %k1
; AVX512F-NEXT: korw %k1, %k0, %k1
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttss2si %xmm0, %eax
@ -2095,6 +2560,20 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
; AVX512VLDQ-NEXT: kmovw %eax, %k0
; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0
; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax
; AVX512VLDQ-NEXT: kmovw %eax, %k1
; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1
; AVX512VLDQ-NEXT: kshiftrb $7, %k1, %k1
; AVX512VLDQ-NEXT: korw %k0, %k1, %k0
; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i1> %ret
@ -2116,6 +2595,11 @@ define <4 x i32> @strict_vector_fptosi_v4f32_to_v4i32(<4 x float> %a) #0 {
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
@ -2125,6 +2609,11 @@ define <4 x i32> @strict_vector_fptosi_v4f32_to_v4i32(<4 x float> %a) #0 {
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i32> %ret
@ -2264,6 +2753,15 @@ define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 {
; AVX-64-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
; AVX-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
@ -2277,6 +2775,11 @@ define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i32> %ret
@ -2305,6 +2808,12 @@ define <4 x i8> @strict_vector_fptosi_v4f32_to_v4i8(<4 x float> %a) #0 {
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
@ -2316,6 +2825,12 @@ define <4 x i8> @strict_vector_fptosi_v4f32_to_v4i8(<4 x float> %a) #0 {
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i8> %ret
@ -2344,6 +2859,12 @@ define <4 x i8> @strict_vector_fptoui_v4f32_to_v4i8(<4 x float> %a) #0 {
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
@ -2355,6 +2876,12 @@ define <4 x i8> @strict_vector_fptoui_v4f32_to_v4i8(<4 x float> %a) #0 {
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i8> %ret
@ -2376,6 +2903,15 @@ define <4 x i1> @strict_vector_fptosi_v4f32_to_v4i1(<4 x float> %a) #0 {
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
@ -2392,6 +2928,13 @@ define <4 x i1> @strict_vector_fptosi_v4f32_to_v4i1(<4 x float> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
; AVX512VLDQ-NEXT: vpmovm2d %k0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i1> %ret
@ -2413,6 +2956,16 @@ define <4 x i1> @strict_vector_fptoui_v4f32_to_v4i1(<4 x float> %a) #0 {
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
@ -2431,6 +2984,14 @@ define <4 x i1> @strict_vector_fptoui_v4f32_to_v4i1(<4 x float> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
; AVX512VLDQ-NEXT: vpmovm2d %k0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i1> %ret

View File

@ -1,10 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -disable-strictnode-mutation < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX-32
; RUN: llc -disable-strictnode-mutation < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-64
declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata)
@ -85,6 +89,60 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
; AVX512F-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: andl $-8, %esp
; AVX512F-32-NEXT: subl $32, %esp
; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vmovhps %xmm0, (%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-32-NEXT: movl %ebp, %esp
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
;
; AVX512F-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512F-64-NEXT: vcvttsd2si %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512F-64-NEXT: vcvttsd2si %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm0
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
@ -146,6 +204,11 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttpd2qq %ymm0, %ymm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double> %a,
metadata !"fpexcept.strict")
ret <4 x i64> %ret
@ -298,6 +361,110 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
; AVX512F-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: pushl %ebx
; AVX512F-32-NEXT: pushl %esi
; AVX512F-32-NEXT: andl $-8, %esp
; AVX512F-32-NEXT: subl $32, %esp
; AVX512F-32-NEXT: .cfi_offset %esi, -16
; AVX512F-32-NEXT: .cfi_offset %ebx, -12
; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX512F-32-NEXT: vcomisd %xmm1, %xmm2
; AVX512F-32-NEXT: setb %cl
; AVX512F-32-NEXT: kmovw %ecx, %k1
; AVX512F-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; AVX512F-32-NEXT: vmovapd %xmm1, %xmm4
; AVX512F-32-NEXT: vmovsd %xmm3, %xmm4, %xmm4 {%k1}
; AVX512F-32-NEXT: vsubsd %xmm4, %xmm2, %xmm2
; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: movl $0, %eax
; AVX512F-32-NEXT: setae %al
; AVX512F-32-NEXT: shll $31, %eax
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: movl %eax, %esi
; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,0]
; AVX512F-32-NEXT: xorl %ecx, %ecx
; AVX512F-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512F-32-NEXT: setb %dl
; AVX512F-32-NEXT: kmovw %edx, %k1
; AVX512F-32-NEXT: vmovapd %xmm1, %xmm5
; AVX512F-32-NEXT: vmovsd %xmm3, %xmm5, %xmm5 {%k1}
; AVX512F-32-NEXT: vsubsd %xmm5, %xmm4, %xmm4
; AVX512F-32-NEXT: vmovsd %xmm4, (%esp)
; AVX512F-32-NEXT: fldl (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
; AVX512F-32-NEXT: setae %cl
; AVX512F-32-NEXT: shll $31, %ecx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: xorl %edx, %edx
; AVX512F-32-NEXT: vcomisd %xmm1, %xmm2
; AVX512F-32-NEXT: setb %bl
; AVX512F-32-NEXT: kmovw %ebx, %k1
; AVX512F-32-NEXT: vmovapd %xmm1, %xmm4
; AVX512F-32-NEXT: vmovsd %xmm3, %xmm4, %xmm4 {%k1}
; AVX512F-32-NEXT: vsubsd %xmm4, %xmm2, %xmm2
; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: setae %dl
; AVX512F-32-NEXT: shll $31, %edx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: xorl %ebx, %ebx
; AVX512F-32-NEXT: vcomisd %xmm1, %xmm0
; AVX512F-32-NEXT: setb %al
; AVX512F-32-NEXT: kmovw %eax, %k1
; AVX512F-32-NEXT: vmovsd %xmm3, %xmm1, %xmm1 {%k1}
; AVX512F-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
; AVX512F-32-NEXT: setae %bl
; AVX512F-32-NEXT: shll $31, %ebx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX512F-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1
; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX512F-32-NEXT: leal -8(%ebp), %esp
; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: popl %ebx
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
;
; AVX512F-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512F-64-NEXT: vcvttsd2usi %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512F-64-NEXT: vcvttsd2usi %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm0
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
@ -409,6 +576,11 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttpd2uqq %ymm0, %ymm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double> %a,
metadata !"fpexcept.strict")
ret <4 x i64> %ret
@ -468,6 +640,59 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
; AVX512F-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: andl $-8, %esp
; AVX512F-32-NEXT: subl $32, %esp
; AVX512F-32-NEXT: vmovd %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vextractps $3, %xmm0, (%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-32-NEXT: movl %ebp, %esp
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
;
; AVX512F-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
; AVX512F-64-NEXT: vcvttss2si %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-64-NEXT: vcvttss2si %xmm2, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm0
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
@ -521,12 +746,18 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttps2qq %xmm0, %ymm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i64> %ret
@ -679,6 +910,110 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
; AVX512F-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: pushl %ebx
; AVX512F-32-NEXT: pushl %esi
; AVX512F-32-NEXT: andl $-8, %esp
; AVX512F-32-NEXT: subl $32, %esp
; AVX512F-32-NEXT: .cfi_offset %esi, -16
; AVX512F-32-NEXT: .cfi_offset %ebx, -12
; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX512F-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512F-32-NEXT: setb %cl
; AVX512F-32-NEXT: kmovw %ecx, %k1
; AVX512F-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
; AVX512F-32-NEXT: vmovaps %xmm1, %xmm4
; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
; AVX512F-32-NEXT: vsubss %xmm4, %xmm2, %xmm2
; AVX512F-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: movl $0, %eax
; AVX512F-32-NEXT: setae %al
; AVX512F-32-NEXT: shll $31, %eax
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: movl %eax, %esi
; AVX512F-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
; AVX512F-32-NEXT: xorl %ecx, %ecx
; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512F-32-NEXT: setb %dl
; AVX512F-32-NEXT: kmovw %edx, %k1
; AVX512F-32-NEXT: vmovaps %xmm1, %xmm4
; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
; AVX512F-32-NEXT: vsubss %xmm4, %xmm2, %xmm2
; AVX512F-32-NEXT: vmovss %xmm2, (%esp)
; AVX512F-32-NEXT: flds (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
; AVX512F-32-NEXT: setae %cl
; AVX512F-32-NEXT: shll $31, %ecx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-32-NEXT: xorl %edx, %edx
; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512F-32-NEXT: setb %bl
; AVX512F-32-NEXT: kmovw %ebx, %k1
; AVX512F-32-NEXT: vmovaps %xmm1, %xmm4
; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
; AVX512F-32-NEXT: vsubss %xmm4, %xmm2, %xmm2
; AVX512F-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: setae %dl
; AVX512F-32-NEXT: shll $31, %edx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: xorl %ebx, %ebx
; AVX512F-32-NEXT: vcomiss %xmm1, %xmm0
; AVX512F-32-NEXT: setb %al
; AVX512F-32-NEXT: kmovw %eax, %k1
; AVX512F-32-NEXT: vmovss %xmm3, %xmm1, %xmm1 {%k1}
; AVX512F-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
; AVX512F-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
; AVX512F-32-NEXT: setae %bl
; AVX512F-32-NEXT: shll $31, %ebx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX512F-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1
; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX512F-32-NEXT: leal -8(%ebp), %esp
; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: popl %ebx
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
;
; AVX512F-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
; AVX512F-64-NEXT: vcvttss2usi %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512F-64-NEXT: vcvttss2usi %xmm2, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm2
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm0
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
@ -783,12 +1118,18 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttps2uqq %xmm0, %ymm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i64> %ret
@ -854,6 +1195,15 @@ define <4 x i32> @strict_vector_fptoui_v4f64_to_v4i32(<4 x double> %a) #0 {
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
@ -868,6 +1218,12 @@ define <4 x i32> @strict_vector_fptoui_v4f64_to_v4i32(<4 x double> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttpd2udq %ymm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double> %a,
metadata !"fpexcept.strict")
ret <4 x i32> %ret
@ -934,6 +1290,15 @@ define <4 x i1> @strict_vector_fptosi_v4f64_to_v4i1(<4 x double> %a) #0 {
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0
@ -951,6 +1316,14 @@ define <4 x i1> @strict_vector_fptosi_v4f64_to_v4i1(<4 x double> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0
; AVX512DQVL-NEXT: vpmovd2m %xmm0, %k0
; AVX512DQVL-NEXT: vpmovm2d %k0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f64(<4 x double> %a,
metadata !"fpexcept.strict")
ret <4 x i1> %ret
@ -969,6 +1342,16 @@ define <4 x i1> @strict_vector_fptoui_v4f64_to_v4i1(<4 x double> %a) #0 {
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0
; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0
@ -988,6 +1371,15 @@ define <4 x i1> @strict_vector_fptoui_v4f64_to_v4i1(<4 x double> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0
; AVX512DQVL-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512DQVL-NEXT: vpmovd2m %xmm0, %k0
; AVX512DQVL-NEXT: vpmovm2d %k0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f64(<4 x double> %a,
metadata !"fpexcept.strict")
ret <4 x i1> %ret
@ -1080,6 +1472,14 @@ define <8 x i32> @strict_vector_fptoui_v8f32_to_v8i32(<8 x float> %a) #0 {
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
@ -1092,6 +1492,11 @@ define <8 x i32> @strict_vector_fptoui_v8f32_to_v8i32(<8 x float> %a) #0 {
; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttps2udq %ymm0, %ymm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i32> %ret
@ -1114,6 +1519,14 @@ define <8 x i16> @strict_vector_fptosi_v8f32_to_v8i16(<8 x float> %a) #0 {
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
@ -1128,6 +1541,13 @@ define <8 x i16> @strict_vector_fptosi_v8f32_to_v8i16(<8 x float> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i16> %ret
@ -1150,6 +1570,14 @@ define <8 x i16> @strict_vector_fptoui_v8f32_to_v8i16(<8 x float> %a) #0 {
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
@ -1164,6 +1592,13 @@ define <8 x i16> @strict_vector_fptoui_v8f32_to_v8i16(<8 x float> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i16> %ret
@ -1188,6 +1623,13 @@ define <8 x i8> @strict_vector_fptosi_v8f32_to_v8i8(<8 x float> %a) #0 {
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
@ -1201,6 +1643,13 @@ define <8 x i8> @strict_vector_fptosi_v8f32_to_v8i8(<8 x float> %a) #0 {
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQVL-NEXT: vpmovdb %ymm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i8> %ret
@ -1225,6 +1674,13 @@ define <8 x i8> @strict_vector_fptoui_v8f32_to_v8i8(<8 x float> %a) #0 {
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
@ -1238,6 +1694,13 @@ define <8 x i8> @strict_vector_fptoui_v8f32_to_v8i8(<8 x float> %a) #0 {
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQVL-NEXT: vpmovdb %ymm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i8> %ret
@ -1260,6 +1723,16 @@ define <8 x i1> @strict_vector_fptosi_v8f32_to_v8i1(<8 x float> %a) #0 {
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
@ -1279,6 +1752,15 @@ define <8 x i1> @strict_vector_fptosi_v8f32_to_v8i1(<8 x float> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQVL-NEXT: vpmovd2m %ymm0, %k0
; AVX512DQVL-NEXT: vpmovm2d %k0, %ymm0
; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i1> %ret
@ -1301,6 +1783,17 @@ define <8 x i1> @strict_vector_fptoui_v8f32_to_v8i1(<8 x float> %a) #0 {
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
@ -1322,6 +1815,16 @@ define <8 x i1> @strict_vector_fptoui_v8f32_to_v8i1(<8 x float> %a) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQVL-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpmovd2m %ymm0, %k0
; AVX512DQVL-NEXT: vpmovm2d %k0, %ymm0
; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i1> %ret

View File

@ -3,10 +3,14 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE,SSE-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX1,AVX-32,AVX1-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX1,AVX-64,AVX1-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-32,AVX512VL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-64,AVX512VL-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512F,AVX-32,AVX512F-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512F,AVX-64,AVX512F-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-32,AVX512VL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-64,AVX512VL-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512DQVL,AVX512DQVL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512DQVL,AVX512DQVL-64
declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i1(<4 x i1>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1>, metadata, metadata)
@ -72,6 +76,13 @@ define <4 x float> @uitofp_v4i1_v4f32(<4 x i1> %x) #0 {
; AVX1-64-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX1-64-NEXT: retq
;
; AVX512F-LABEL: uitofp_v4i1_v4f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-32-LABEL: uitofp_v4i1_v4f32:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
@ -90,6 +101,18 @@ define <4 x float> @uitofp_v4i1_v4f32(<4 x i1> %x) #0 {
; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-32-LABEL: uitofp_v4i1_v4f32:
; AVX512DQVL-32: # %bb.0:
; AVX512DQVL-32-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
; AVX512DQVL-32-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX512DQVL-32-NEXT: retl
;
; AVX512DQVL-64-LABEL: uitofp_v4i1_v4f32:
; AVX512DQVL-64: # %bb.0:
; AVX512DQVL-64-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512DQVL-64-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX512DQVL-64-NEXT: retq
%result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
@ -231,11 +254,21 @@ define <4 x float> @uitofp_v4i32_v4f32(<4 x i32> %x) #0 {
; AVX1-64-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX1-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: uitofp_v4i32_v4f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: uitofp_v4i32_v4f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v4i32_v4f32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
@ -243,6 +276,11 @@ define <4 x float> @uitofp_v4i32_v4f32(<4 x i32> %x) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: uitofp_v4i32_v4f32:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvtudq2ps %xmm0, %xmm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
@ -300,6 +338,14 @@ define <2 x double> @uitofp_v2i1_v2f64(<2 x i1> %x) #0 {
; AVX1-64-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX1-64-NEXT: retq
;
; AVX512F-LABEL: uitofp_v2i1_v2f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-32-LABEL: uitofp_v2i1_v2f64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@ -321,6 +367,20 @@ define <2 x double> @uitofp_v2i1_v2f64(<2 x i1> %x) #0 {
; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-32-LABEL: uitofp_v2i1_v2f64:
; AVX512DQVL-32: # %bb.0:
; AVX512DQVL-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512DQVL-32-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
; AVX512DQVL-32-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512DQVL-32-NEXT: retl
;
; AVX512DQVL-64-LABEL: uitofp_v2i1_v2f64:
; AVX512DQVL-64: # %bb.0:
; AVX512DQVL-64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512DQVL-64-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512DQVL-64-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512DQVL-64-NEXT: retq
%result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i1(<2 x i1> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
@ -466,11 +526,21 @@ define <2 x double> @uitofp_v2i32_v2f64(<2 x i32> %x) #0 {
; AVX1-64-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX1-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: uitofp_v2i32_v2f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: uitofp_v2i32_v2f64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v2i32_v2f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
@ -478,6 +548,11 @@ define <2 x double> @uitofp_v2i32_v2f64(<2 x i32> %x) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: uitofp_v2i32_v2f64:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvtudq2pd %xmm0, %xmm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
@ -552,6 +627,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: sitofp_v2i64_v2f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
@ -559,6 +635,11 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: sitofp_v2i64_v2f64:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvtqq2pd %xmm0, %xmm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
@ -610,6 +691,28 @@ define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX1-64-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX1-64-NEXT: retq
;
; AVX512F-32-LABEL: uitofp_v2i64_v2f64:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-32-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX512F-32-NEXT: vpor {{\.LCPI.*}}, %xmm1, %xmm1
; AVX512F-32-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX512F-32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0
; AVX512F-32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0
; AVX512F-32-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX512F-32-NEXT: retl
;
; AVX512F-64-LABEL: uitofp_v2i64_v2f64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-64-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX512F-64-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1
; AVX512F-64-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX512F-64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
; AVX512F-64-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
; AVX512F-64-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX512F-64-NEXT: retq
;
; AVX512VL-32-LABEL: uitofp_v2i64_v2f64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm1
@ -630,6 +733,7 @@ define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX512VL-64-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX512VL-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v2i64_v2f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
@ -637,6 +741,11 @@ define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: uitofp_v2i64_v2f64:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvtuqq2pd %xmm0, %xmm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0

View File

@ -1,10 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX1,AVX-32,AVX1-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX1,AVX-64,AVX1-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-32,AVX512VL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-64,AVX512VL-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX-32,AVX512F-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX-64,AVX512F-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-32,AVX512VL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-64,AVX512VL-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-64
declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1>, metadata, metadata)
declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1>, metadata, metadata)
@ -67,6 +71,20 @@ define <8 x float> @uitofp_v8i1_v8f32(<8 x i1> %x) #0 {
; AVX512DQ-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQ-64-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512DQ-64-NEXT: retq
;
; AVX512DQVL-32-LABEL: uitofp_v8i1_v8f32:
; AVX512DQVL-32: # %bb.0:
; AVX512DQVL-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
; AVX512DQVL-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQVL-32-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512DQVL-32-NEXT: retl
;
; AVX512DQVL-64-LABEL: uitofp_v8i1_v8f32:
; AVX512DQVL-64: # %bb.0:
; AVX512DQVL-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX512DQVL-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQVL-64-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512DQVL-64-NEXT: retq
%result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
@ -145,17 +163,31 @@ define <8 x float> @uitofp_v8i32_v8f32(<8 x i32> %x) #0 {
; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: uitofp_v8i32_v8f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: uitofp_v8i32_v8f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v8i32_v8f32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: uitofp_v8i32_v8f32:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvtudq2ps %ymm0, %ymm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i32(<8 x i32> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
@ -183,6 +215,13 @@ define <4 x double> @uitofp_v4i1_v4f64(<4 x i1> %x) #0 {
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: uitofp_v4i1_v4f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-32-LABEL: uitofp_v4i1_v4f64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
@ -201,6 +240,18 @@ define <4 x double> @uitofp_v4i1_v4f64(<4 x i1> %x) #0 {
; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-32-LABEL: uitofp_v4i1_v4f64:
; AVX512DQVL-32: # %bb.0:
; AVX512DQVL-32-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
; AVX512DQVL-32-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX512DQVL-32-NEXT: retl
;
; AVX512DQVL-64-LABEL: uitofp_v4i1_v4f64:
; AVX512DQVL-64: # %bb.0:
; AVX512DQVL-64-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512DQVL-64-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX512DQVL-64-NEXT: retq
%result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
@ -279,17 +330,31 @@ define <4 x double> @uitofp_v4i32_v4f64(<4 x i32> %x) #0 {
; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: uitofp_v4i32_v4f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: uitofp_v4i32_v4f64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v4i32_v4f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: uitofp_v4i32_v4f64:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvtudq2pd %xmm0, %ymm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
@ -347,12 +412,18 @@ define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: sitofp_v4i64_v4f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: sitofp_v4i64_v4f64:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvtqq2pd %ymm0, %ymm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
@ -386,6 +457,32 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX1-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX1-64-NEXT: retq
;
; AVX512F-32-LABEL: uitofp_v4i64_v4f64:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vpsrlq $32, %ymm0, %ymm1
; AVX512F-32-NEXT: vpor {{\.LCPI.*}}, %ymm1, %ymm1
; AVX512F-32-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
; AVX512F-32-NEXT: vsubpd %ymm2, %ymm1, %ymm1
; AVX512F-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX512F-32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
; AVX512F-32-NEXT: vpor {{\.LCPI.*}}, %ymm0, %ymm0
; AVX512F-32-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX512F-32-NEXT: retl
;
; AVX512F-64-LABEL: uitofp_v4i64_v4f64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-64-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX512F-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
; AVX512F-64-NEXT: vpor %ymm2, %ymm1, %ymm1
; AVX512F-64-NEXT: vpsrlq $32, %ymm0, %ymm0
; AVX512F-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
; AVX512F-64-NEXT: vpor %ymm2, %ymm0, %ymm0
; AVX512F-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
; AVX512F-64-NEXT: vsubpd %ymm2, %ymm0, %ymm0
; AVX512F-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX512F-64-NEXT: retq
;
; AVX512VL-32-LABEL: uitofp_v4i64_v4f64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm1
@ -406,12 +503,18 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX512VL-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX512VL-64-NEXT: retq
;
; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v4i64_v4f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-LABEL: uitofp_v4i64_v4f64:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vcvtuqq2pd %ymm0, %ymm0
; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0