2018-06-13 22:32:12 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2018-11-05 23:28:10 +08:00
|
|
|
; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s
|
2019-11-12 07:51:00 +08:00
|
|
|
; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
|
|
|
; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
|
|
|
|
; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512dq < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
|
2018-06-13 22:32:12 +08:00
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_fdiv_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fdiv_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: divss {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fdiv_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vdivss {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32(
|
|
|
|
<1 x float> <float 1.000000e+00>,
|
|
|
|
<1 x float> <float 1.000000e+01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %div
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_fdiv_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fdiv_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
|
|
|
|
; CHECK-NEXT: divpd {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fdiv_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
|
|
|
|
; AVX-NEXT: vdivpd {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
|
|
|
|
<2 x double> <double 1.000000e+00, double 2.000000e+00>,
|
|
|
|
<2 x double> <double 1.000000e+01, double 1.000000e+01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %div
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_fdiv_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fdiv_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: divss %xmm1, %xmm2
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: divss %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: divss %xmm1, %xmm3
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fdiv_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vdivss %xmm0, %xmm2, %xmm2
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vdivss %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
|
|
|
|
<3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
|
|
|
|
<3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %div
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_fdiv_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fdiv_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
|
|
|
|
; CHECK-NEXT: divpd {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: divsd {{.*}}(%rip), %xmm1
|
|
|
|
; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: movapd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fdiv_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0]
|
|
|
|
; AVX-NEXT: vdivpd {{.*}}(%rip), %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
|
|
|
|
<3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
|
|
|
|
<3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %div
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_fdiv_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fdiv_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm2 = [1.0E+1,1.0E+1]
|
2019-12-06 19:08:26 +08:00
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [3.0E+0,4.0E+0]
|
|
|
|
; CHECK-NEXT: divpd %xmm2, %xmm1
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
|
|
|
|
; CHECK-NEXT: divpd %xmm2, %xmm0
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fdiv_v4f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
|
|
|
|
; AVX1-NEXT: vdivpd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fdiv_v4f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.0E+1,1.0E+1,1.0E+1,1.0E+1]
|
|
|
|
; AVX512-NEXT: vmovapd {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
|
|
|
|
; AVX512-NEXT: vdivpd %ymm0, %ymm1, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
2018-08-21 03:28:56 +08:00
|
|
|
entry:
|
2018-07-23 22:40:17 +08:00
|
|
|
%div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
|
|
|
|
<4 x double> <double 1.000000e+00, double 2.000000e+00,
|
|
|
|
double 3.000000e+00, double 4.000000e+00>,
|
|
|
|
<4 x double> <double 1.000000e+01, double 1.000000e+01,
|
|
|
|
double 1.000000e+01, double 1.000000e+01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %div
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_frem_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_frem_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fmodf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_frem_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fmodf
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-21 03:28:56 +08:00
|
|
|
entry:
|
|
|
|
%rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32(
|
|
|
|
<1 x float> <float 1.000000e+00>,
|
|
|
|
<1 x float> <float 1.000000e+01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-21 03:28:56 +08:00
|
|
|
ret <1 x float> %rem
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_frem_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_frem_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmod
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmod
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_frem_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmod
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmod
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-21 03:28:56 +08:00
|
|
|
entry:
|
|
|
|
%rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64(
|
|
|
|
<2 x double> <double 1.000000e+00, double 2.000000e+00>,
|
|
|
|
<2 x double> <double 1.000000e+01, double 1.000000e+01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-21 03:28:56 +08:00
|
|
|
ret <2 x double> %rem
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_frem_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_frem_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fmodf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fmodf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fmodf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_frem_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fmodf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fmodf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fmodf
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-21 03:28:56 +08:00
|
|
|
entry:
|
|
|
|
%rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32(
|
|
|
|
<3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
|
|
|
|
<3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-21 03:28:56 +08:00
|
|
|
ret <3 x float> %rem
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_frem_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_frem_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmod
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmod
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmod
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_frem_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmod
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmod
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq fmod
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-21 03:28:56 +08:00
|
|
|
entry:
|
|
|
|
%rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64(
|
|
|
|
<3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
|
|
|
|
<3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-21 03:28:56 +08:00
|
|
|
ret <3 x double> %rem
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_frem_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_frem_v4f64:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmod
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmod
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmod
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmod
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_frem_v4f64:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmod
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmod
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmod
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmod
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-21 03:28:56 +08:00
|
|
|
%rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64(
|
|
|
|
<4 x double> <double 1.000000e+00, double 2.000000e+00,
|
|
|
|
double 3.000000e+00, double 4.000000e+00>,
|
|
|
|
<4 x double> <double 1.000000e+01, double 1.000000e+01,
|
|
|
|
double 1.000000e+01, double 1.000000e+01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-21 03:28:56 +08:00
|
|
|
ret <4 x double> %rem
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_fmul_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fmul_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fmul_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32(
|
|
|
|
<1 x float> <float 0x7FF0000000000000>,
|
|
|
|
<1 x float> <float 2.000000e+00>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %mul
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_fmul_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fmul_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fmul_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; AVX-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
|
|
|
|
<2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
|
|
|
|
<2 x double> <double 2.000000e+00, double 3.000000e+00>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %mul
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_fmul_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fmul_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: mulss %xmm1, %xmm2
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: mulss %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: mulss {{.*}}(%rip), %xmm1
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fmul_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
|
|
|
|
; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm2
|
|
|
|
; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
|
|
|
|
<3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
|
|
|
|
float 0x7FF0000000000000>,
|
|
|
|
<3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %mul
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_fmul_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fmul_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm1
|
|
|
|
; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: movapd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fmul_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; AVX-NEXT: vmulpd {{.*}}(%rip), %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
|
|
|
|
<3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
|
|
|
|
double 0x7FEFFFFFFFFFFFFF>,
|
|
|
|
<3 x double> <double 1.000000e+00, double 1.000000e+01, double 1.000000e+02>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %mul
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_fmul_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fmul_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.0E+0,5.0E+0]
|
|
|
|
; CHECK-NEXT: mulpd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fmul_v4f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fmul_v4f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; AVX512-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
|
|
|
|
<4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
|
|
|
|
double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
|
|
|
|
<4 x double> <double 2.000000e+00, double 3.000000e+00,
|
|
|
|
double 4.000000e+00, double 5.000000e+00>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %mul
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_fadd_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fadd_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: addss {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fadd_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(
|
|
|
|
<1 x float> <float 0x7FF0000000000000>,
|
|
|
|
<1 x float> <float 1.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %add
|
|
|
|
}
|
2018-07-23 22:40:17 +08:00
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_fadd_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fadd_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-11-22 06:24:32 +08:00
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fadd_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
2019-11-22 06:24:32 +08:00
|
|
|
; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0
|
2019-04-30 00:06:04 +08:00
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
|
|
|
|
<2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
|
|
|
|
<2 x double> <double 1.000000e+00, double 1.000000e-01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %add
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_fadd_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fadd_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: addss %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: addss %xmm2, %xmm0
|
|
|
|
; CHECK-NEXT: addss {{.*}}(%rip), %xmm2
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fadd_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm2
|
|
|
|
; AVX-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
|
|
|
|
<3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
|
|
|
|
float 0xFFFFFFFFE0000000>,
|
|
|
|
<3 x float> <float 2.0, float 1.0, float 0.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %add
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_fadd_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fadd_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-11-22 06:24:32 +08:00
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: xorpd %xmm1, %xmm1
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1
|
2019-11-22 06:24:32 +08:00
|
|
|
; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: movapd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fadd_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
2019-11-22 06:24:32 +08:00
|
|
|
; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1
|
2019-04-30 00:06:04 +08:00
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
|
|
|
|
<3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
|
|
|
|
double 0x7FEFFFFFFFFFFFFF>,
|
|
|
|
<3 x double> <double 2.0, double 1.0, double 0.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %add
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_fadd_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fadd_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [2.0E+0,2.0000000000000001E-1]
|
|
|
|
; CHECK-NEXT: addpd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
2019-11-22 06:24:32 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fadd_v4f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; AVX1-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fadd_v4f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
|
|
|
|
; AVX512-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
|
|
|
|
<4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
|
|
|
|
double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
|
|
|
|
<4 x double> <double 1.000000e+00, double 1.000000e-01,
|
|
|
|
double 2.000000e+00, double 2.000000e-01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %add
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_fsub_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fsub_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: subss {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fsub_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vsubss {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32(
|
|
|
|
<1 x float> <float 0x7FF0000000000000>,
|
|
|
|
<1 x float> <float 1.000000e+00>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %sub
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_fsub_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fsub_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-11-22 06:24:32 +08:00
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
|
|
|
|
; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fsub_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
2019-11-22 06:24:32 +08:00
|
|
|
; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
|
|
|
|
; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
2019-04-30 00:06:04 +08:00
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
|
|
|
|
<2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
|
|
|
|
<2 x double> <double 1.000000e+00, double 1.000000e-01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %sub
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_fsub_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fsub_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm2
|
|
|
|
; CHECK-NEXT: subss %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: subss {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: subss {{.*}}(%rip), %xmm1
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fsub_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: vsubss {{.*}}(%rip), %xmm1, %xmm2
|
|
|
|
; AVX-NEXT: vsubss {{.*}}(%rip), %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
|
|
|
|
<3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
|
|
|
|
float 0xFFFFFFFFE0000000>,
|
|
|
|
<3 x float> <float 2.0, float 1.0, float 0.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %sub
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_fsub_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fsub_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: xorpd %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
2019-11-22 06:24:32 +08:00
|
|
|
; CHECK-NEXT: subsd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
|
|
|
|
; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: movapd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fsub_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
|
2019-11-22 06:24:32 +08:00
|
|
|
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
|
|
|
|
; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
|
2019-04-30 00:06:04 +08:00
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
|
|
|
|
<3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
|
|
|
|
double 0xFFEFFFFFFFFFFFFF>,
|
|
|
|
<3 x double> <double 2.0, double 1.0, double 0.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %sub
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_fsub_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fsub_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
|
|
|
|
; CHECK-NEXT: movapd %xmm0, %xmm1
|
2019-12-06 19:08:26 +08:00
|
|
|
; CHECK-NEXT: subpd {{.*}}(%rip), %xmm1
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
2019-11-22 06:24:32 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fsub_v4f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308]
|
|
|
|
; AVX1-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fsub_v4f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308]
|
|
|
|
; AVX512-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
|
|
|
|
<4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
|
|
|
|
double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
|
|
|
|
<4 x double> <double 1.000000e+00, double 1.000000e-01,
|
|
|
|
double 2.000000e+00, double 2.000000e-01>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %sub
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_sqrt_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_sqrt_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: sqrtss %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sqrt_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %sqrt
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_sqrt_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_sqrt_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: sqrtpd {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sqrt_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vsqrtpd {{.*}}(%rip), %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
|
|
|
|
<2 x double> <double 42.0, double 42.1>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %sqrt
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_sqrt_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_sqrt_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: sqrtss %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: sqrtss %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: sqrtss %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sqrt_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vsqrtss %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %sqrt
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_sqrt_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_sqrt_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: sqrtsd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: sqrtpd {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: movapd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sqrt_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vsqrtpd {{.*}}(%rip), %xmm1
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %sqrt
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_sqrt_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_sqrt_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: sqrtpd {{.*}}(%rip), %xmm1
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: sqrtpd {{.*}}(%rip), %xmm0
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sqrt_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vsqrtpd {{.*}}(%rip), %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:28:10 +08:00
|
|
|
entry:
|
2018-07-23 22:40:17 +08:00
|
|
|
%sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
|
|
|
|
<4 x double> <double 42.0, double 42.1,
|
|
|
|
double 42.2, double 42.3>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %sqrt
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_pow_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_pow_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq powf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_pow_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq powf
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
<1 x float> <float 3.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %pow
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_pow_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_pow_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq pow
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq pow
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_pow_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq pow
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq pow
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
|
|
|
|
<2 x double> <double 42.1, double 42.2>,
|
|
|
|
<2 x double> <double 3.0, double 3.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %pow
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_pow_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_pow_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq powf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq powf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq powf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_pow_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq powf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq powf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq powf
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
<3 x float> <float 3.0, float 3.0, float 3.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %pow
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_pow_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_pow_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq pow
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq pow
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq pow
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_pow_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq pow
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq pow
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq pow
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
2018-08-01 22:17:19 +08:00
|
|
|
%pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
<3 x double> <double 3.0, double 3.0, double 3.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %pow
|
2018-07-23 22:40:17 +08:00
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_pow_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_pow_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq pow
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq pow
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq pow
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq pow
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_pow_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq pow
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq pow
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq pow
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq pow
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
|
|
|
|
<4 x double> <double 42.1, double 42.2,
|
|
|
|
double 42.3, double 42.4>,
|
|
|
|
<4 x double> <double 3.0, double 3.0,
|
|
|
|
double 3.0, double 3.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <4 x double> %pow
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_powi_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_powi_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powisf2
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_powi_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powisf2
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
i32 3,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %powi
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_powi_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_powi_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powidf2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powidf2
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_powi_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powidf2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powidf2
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-06-16 04:57:55 +08:00
|
|
|
entry:
|
|
|
|
%powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
|
|
|
|
<2 x double> <double 42.1, double 42.2>,
|
|
|
|
i32 3,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-16 04:57:55 +08:00
|
|
|
ret <2 x double> %powi
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_powi_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_powi_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powisf2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powisf2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powisf2
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_powi_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powisf2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powisf2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powisf2
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
i32 3,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %powi
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_powi_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_powi_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powidf2
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powidf2
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powidf2
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_powi_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powidf2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powidf2
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq __powidf2
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
i32 3,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %powi
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_powi_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_powi_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powidf2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powidf2
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powidf2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movl $3, %edi
|
|
|
|
; CHECK-NEXT: callq __powidf2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_powi_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powidf2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powidf2
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powidf2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: movl $3, %edi
|
|
|
|
; AVX-NEXT: callq __powidf2
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
2018-07-23 22:40:17 +08:00
|
|
|
%powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
|
|
|
|
<4 x double> <double 42.1, double 42.2,
|
|
|
|
double 42.3, double 42.4>,
|
|
|
|
i32 3,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %powi
|
2018-06-13 22:32:12 +08:00
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_sin_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_sin_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq sinf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sin_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq sinf
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %sin
|
|
|
|
}
|
2018-07-23 22:40:17 +08:00
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_sin_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_sin_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq sin
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq sin
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sin_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq sin
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq sin
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
2018-07-23 22:40:17 +08:00
|
|
|
%sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
|
2018-06-13 22:32:12 +08:00
|
|
|
<2 x double> <double 42.0, double 42.1>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <2 x double> %sin
|
2018-06-13 22:32:12 +08:00
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_sin_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_sin_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq sinf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq sinf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq sinf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sin_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq sinf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq sinf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq sinf
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %sin
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_sin_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_sin_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq sin
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq sin
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq sin
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sin_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq sin
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq sin
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq sin
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %sin
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_sin_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_sin_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq sin
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq sin
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq sin
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq sin
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sin_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq sin
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq sin
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq sin
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq sin
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
|
|
|
|
<4 x double> <double 42.0, double 42.1,
|
|
|
|
double 42.2, double 42.3>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %sin
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_cos_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_cos_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq cosf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_cos_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq cosf
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %cos
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_cos_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_cos_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq cos
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq cos
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_cos_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq cos
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq cos
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
|
|
|
|
<2 x double> <double 42.0, double 42.1>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <2 x double> %cos
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_cos_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_cos_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq cosf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq cosf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq cosf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_cos_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq cosf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq cosf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq cosf
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %cos
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_cos_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_cos_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq cos
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq cos
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq cos
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_cos_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq cos
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq cos
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq cos
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %cos
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_cos_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_cos_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq cos
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq cos
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq cos
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq cos
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_cos_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq cos
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq cos
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq cos
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq cos
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
|
|
|
|
<4 x double> <double 42.0, double 42.1,
|
|
|
|
double 42.2, double 42.3>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %cos
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_exp_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_exp_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq expf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_exp_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq expf
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %exp
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_exp_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_exp_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_exp_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
|
|
|
|
<2 x double> <double 42.0, double 42.1>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %exp
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_exp_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_exp_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq expf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq expf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq expf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_exp_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq expf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq expf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq expf
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %exp
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_exp_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_exp_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_exp_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq exp
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %exp
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_exp_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_exp_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_exp_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
|
|
|
|
<4 x double> <double 42.0, double 42.1,
|
|
|
|
double 42.2, double 42.3>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %exp
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_exp2_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_exp2_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq exp2f
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_exp2_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq exp2f
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %exp2
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_exp2_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_exp2_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp2
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_exp2_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp2
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
|
|
|
|
<2 x double> <double 42.1, double 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %exp2
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_exp2_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_exp2_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq exp2f
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq exp2f
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq exp2f
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_exp2_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq exp2f
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq exp2f
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq exp2f
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %exp2
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_exp2_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_exp2_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp2
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp2
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp2
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_exp2_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp2
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq exp2
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %exp2
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_exp2_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_exp2_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp2
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq exp2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_exp2_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp2
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq exp2
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
|
|
|
|
<4 x double> <double 42.1, double 42.2,
|
|
|
|
double 42.3, double 42.4>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <4 x double> %exp2
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_log_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq logf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq logf
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%log = call <1 x float> @llvm.experimental.constrained.log.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %log
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_log_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
|
|
|
|
<2 x double> <double 42.0, double 42.1>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <2 x double> %log
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_log_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq logf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq logf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq logf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq logf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq logf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq logf
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
2018-08-01 22:17:19 +08:00
|
|
|
%log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
2018-07-23 22:40:17 +08:00
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %log
|
2018-07-23 22:40:17 +08:00
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_log_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq log
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
2018-08-01 22:17:19 +08:00
|
|
|
%log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %log
|
2018-06-13 22:32:12 +08:00
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_log_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
|
|
|
|
<4 x double> <double 42.0, double 42.1,
|
|
|
|
double 42.2, double 42.3>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %log
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_log10_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log10_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq log10f
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log10_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq log10f
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %log10
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_log10_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log10_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log10
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log10
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log10_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log10
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log10
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
|
|
|
|
<2 x double> <double 42.0, double 42.1>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %log10
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_log10_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log10_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq log10f
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq log10f
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq log10f
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log10_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq log10f
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq log10f
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq log10f
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %log10
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_log10_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log10_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log10
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log10
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log10
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log10_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log10
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log10
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq log10
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %log10
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_log10_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log10_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log10
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log10
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log10
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log10
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log10_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log10
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log10
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log10
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log10
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
|
|
|
|
<4 x double> <double 42.0, double 42.1,
|
|
|
|
double 42.2, double 42.3>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %log10
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_log2_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log2_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq log2f
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log2_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq log2f
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %log2
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_log2_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log2_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log2
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log2_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log2
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
|
|
|
|
<2 x double> <double 42.0, double 42.1>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %log2
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_log2_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log2_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq log2f
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq log2f
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq log2f
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log2_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq log2f
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq log2f
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq log2f
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %log2
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_log2_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log2_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log2
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log2
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log2
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log2_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log2
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq log2
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %log2
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_log2_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_log2_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log2
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq log2
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_log2_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log2
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log2
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq log2
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
|
|
|
|
<4 x double> <double 42.0, double 42.1,
|
|
|
|
double 42.2, double 42.3>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %log2
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_rint_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_rint_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq rintf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_rint_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %rint
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_rint_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_rint_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq rint
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq rint
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_rint_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vroundpd $4, {{.*}}(%rip), %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
|
|
|
|
<2 x double> <double 42.1, double 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %rint
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_rint_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_rint_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq rintf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq rintf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq rintf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_rint_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:28:10 +08:00
|
|
|
entry:
|
2018-08-01 22:17:19 +08:00
|
|
|
%rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %rint
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_rint_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_rint_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq rint
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq rint
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq rint
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_rint_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vroundpd $4, {{.*}}(%rip), %xmm1
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %rint
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_rint_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_rint_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq rint
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq rint
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq rint
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq rint
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_rint_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vroundpd $4, {{.*}}(%rip), %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
|
|
|
|
<4 x double> <double 42.1, double 42.2,
|
|
|
|
double 42.3, double 42.4>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %rint
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_nearbyint_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq nearbyintf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_nearbyint_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-15 06:13:11 +08:00
|
|
|
entry:
|
|
|
|
%nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
|
|
|
|
<1 x float> <float 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-15 06:13:11 +08:00
|
|
|
ret <1 x float> %nearby
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_nearbyint_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq nearbyint
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq nearbyint
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_nearbyint_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vroundpd $12, {{.*}}(%rip), %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-06-13 22:32:12 +08:00
|
|
|
entry:
|
|
|
|
%nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
|
|
|
|
<2 x double> <double 42.1, double 42.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-06-13 22:32:12 +08:00
|
|
|
ret <2 x double> %nearby
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_nearbyint_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq nearbyintf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq nearbyintf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq nearbyintf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_nearbyint_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $12, %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $12, %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
|
|
|
|
<3 x float> <float 42.0, float 43.0, float 44.0>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x float> %nearby
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_nearby_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_nearby_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq nearbyint
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq nearbyint
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq nearbyint
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_nearby_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vroundpd $12, {{.*}}(%rip), %xmm1
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-08-01 22:17:19 +08:00
|
|
|
entry:
|
|
|
|
%nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
|
|
|
|
<3 x double> <double 42.0, double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-08-01 22:17:19 +08:00
|
|
|
ret <3 x double> %nearby
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_nearbyint_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq nearbyint
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq nearbyint
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq nearbyint
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq nearbyint
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_nearbyint_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vroundpd $12, {{.*}}(%rip), %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-07-23 22:40:17 +08:00
|
|
|
entry:
|
|
|
|
%nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
|
|
|
|
<4 x double> <double 42.1, double 42.2,
|
|
|
|
double 42.3, double 42.4>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-07-23 22:40:17 +08:00
|
|
|
ret <4 x double> %nearby
|
|
|
|
}
|
2018-06-13 22:32:12 +08:00
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_maxnum_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_maxnum_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fmaxf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_maxnum_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fmaxf
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-10-31 05:01:29 +08:00
|
|
|
entry:
|
|
|
|
%max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
|
|
|
|
<1 x float> <float 42.0>, <1 x float> <float 41.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-10-31 05:01:29 +08:00
|
|
|
ret <1 x float> %max
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_maxnum_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_maxnum_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmax
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmax
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_maxnum_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmax
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmax
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-10-31 05:01:29 +08:00
|
|
|
entry:
|
|
|
|
%max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
|
|
|
|
<2 x double> <double 43.0, double 42.0>,
|
|
|
|
<2 x double> <double 41.0, double 40.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-10-31 05:01:29 +08:00
|
|
|
ret <2 x double> %max
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_maxnum_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_maxnum_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fmaxf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fmaxf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fmaxf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_maxnum_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fmaxf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fmaxf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fmaxf
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-10-31 05:01:29 +08:00
|
|
|
entry:
|
|
|
|
%max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
|
|
|
|
<3 x float> <float 43.0, float 44.0, float 45.0>,
|
|
|
|
<3 x float> <float 41.0, float 42.0, float 43.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-10-31 05:01:29 +08:00
|
|
|
ret <3 x float> %max
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_max_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_max_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmax
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmax
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmax
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_max_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmax
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmax
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq fmax
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-10-31 05:01:29 +08:00
|
|
|
entry:
|
|
|
|
%max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
|
|
|
|
<3 x double> <double 43.0, double 44.0, double 45.0>,
|
|
|
|
<3 x double> <double 40.0, double 41.0, double 42.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-10-31 05:01:29 +08:00
|
|
|
ret <3 x double> %max
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_maxnum_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_maxnum_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmax
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmax
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmax
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmax
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_maxnum_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmax
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmax
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmax
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmax
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-10-31 05:01:29 +08:00
|
|
|
entry:
|
|
|
|
%max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
|
|
|
|
<4 x double> <double 44.0, double 45.0,
|
|
|
|
double 46.0, double 47.0>,
|
|
|
|
<4 x double> <double 40.0, double 41.0,
|
|
|
|
double 42.0, double 43.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-10-31 05:01:29 +08:00
|
|
|
ret <4 x double> %max
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_minnum_v1f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_minnum_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fminf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_minnum_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fminf
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:28:10 +08:00
|
|
|
entry:
|
2018-10-31 05:01:29 +08:00
|
|
|
%min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
|
|
|
|
<1 x float> <float 42.0>, <1 x float> <float 41.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-10-31 05:01:29 +08:00
|
|
|
ret <1 x float> %min
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_minnum_v2f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_minnum_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmin
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmin
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_minnum_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmin
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmin
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-10-31 05:01:29 +08:00
|
|
|
entry:
|
|
|
|
%min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
|
|
|
|
<2 x double> <double 43.0, double 42.0>,
|
|
|
|
<2 x double> <double 41.0, double 40.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-10-31 05:01:29 +08:00
|
|
|
ret <2 x double> %min
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_minnum_v3f32() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_minnum_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fminf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fminf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq fminf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_minnum_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fminf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fminf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq fminf
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-10-31 05:01:29 +08:00
|
|
|
entry:
|
|
|
|
%min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
|
|
|
|
<3 x float> <float 43.0, float 44.0, float 45.0>,
|
|
|
|
<3 x float> <float 41.0, float 42.0, float 43.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-10-31 05:01:29 +08:00
|
|
|
ret <3 x float> %min
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_min_v3f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_min_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmin
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmin
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmin
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_min_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmin
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmin
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq fmin
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:28:10 +08:00
|
|
|
entry:
|
|
|
|
%min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
|
2018-10-31 05:01:29 +08:00
|
|
|
<3 x double> <double 43.0, double 44.0, double 45.0>,
|
|
|
|
<3 x double> <double 40.0, double 41.0, double 42.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-10-31 05:01:29 +08:00
|
|
|
ret <3 x double> %min
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_minnum_v4f64() #0 {
|
2018-11-05 23:28:10 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_minnum_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmin
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmin
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmin
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq fmin
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_minnum_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmin
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmin
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmin
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq fmin
|
|
|
|
; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-10-31 05:01:29 +08:00
|
|
|
entry:
|
|
|
|
%min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
|
|
|
|
<4 x double> <double 44.0, double 45.0,
|
|
|
|
double 46.0, double 47.0>,
|
|
|
|
<4 x double> <double 40.0, double 41.0,
|
|
|
|
double 42.0, double 43.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-10-31 05:01:29 +08:00
|
|
|
ret <4 x double> %min
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x i32> @constrained_vector_fptosi_v1i32_v1f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(
|
|
|
|
<1 x float><float 42.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <1 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-27 13:25:22 +08:00
|
|
|
; CHECK-NEXT: cvttps2dq {{.*}}(%rip), %xmm0
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
2019-12-27 13:25:22 +08:00
|
|
|
; AVX-NEXT: vcvttps2dq {{.*}}(%rip), %xmm0
|
2019-08-29 00:33:36 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(
|
|
|
|
<2 x float><float 42.0, float 43.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <2 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax
|
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax
|
|
|
|
; CHECK-NEXT: movd %eax, %xmm0
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax
|
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax
|
|
|
|
; AVX-NEXT: vmovd %eax, %xmm0
|
|
|
|
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax
|
|
|
|
; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax
|
|
|
|
; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(
|
|
|
|
<3 x float><float 42.0, float 43.0,
|
|
|
|
float 44.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <3 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x i32> @constrained_vector_fptosi_v4i32_v4f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-19 14:43:45 +08:00
|
|
|
; CHECK-NEXT: cvttps2dq {{.*}}(%rip), %xmm0
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
2019-12-19 14:43:45 +08:00
|
|
|
; AVX-NEXT: vcvttps2dq {{.*}}(%rip), %xmm0
|
2019-08-29 00:33:36 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(
|
|
|
|
<4 x float><float 42.0, float 43.0,
|
|
|
|
float 44.0, float 45.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <4 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x i64> @constrained_vector_fptosi_v1i64_v1f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(
|
|
|
|
<1 x float><float 42.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <1 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x i64> @constrained_vector_fptosi_v2i64_v2f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm1
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm0
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v2i64_v2f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(
|
|
|
|
<2 x float><float 42.0, float 43.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <2 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rcx
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rdx
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(
|
|
|
|
<3 x float><float 42.0, float 43.0,
|
|
|
|
float 44.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <3 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x i64> @constrained_vector_fptosi_v4i64_v4f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm1
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm0
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm2
|
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm1
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptosi_v4i64_v4f32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2019-12-19 14:43:45 +08:00
|
|
|
; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f32:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f32:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
|
|
|
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
|
|
|
|
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(
|
|
|
|
<4 x float><float 42.0, float 43.0,
|
|
|
|
float 44.0, float 45.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <4 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x i32> @constrained_vector_fptosi_v1i32_v1f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(
|
|
|
|
<1 x double><double 42.1>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <1 x i32> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-19 14:43:45 +08:00
|
|
|
; CHECK-NEXT: cvttpd2dq {{.*}}(%rip), %xmm0
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
2019-12-19 14:43:45 +08:00
|
|
|
; AVX-NEXT: vcvttpd2dqx {{.*}}(%rip), %xmm0
|
2019-08-29 00:33:36 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(
|
|
|
|
<2 x double><double 42.1, double 42.2>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <2 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax
|
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax
|
|
|
|
; CHECK-NEXT: movd %eax, %xmm0
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax
|
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax
|
|
|
|
; AVX-NEXT: vmovd %eax, %xmm0
|
|
|
|
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax
|
|
|
|
; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax
|
|
|
|
; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(
|
|
|
|
<3 x double><double 42.1, double 42.2,
|
|
|
|
double 42.3>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <3 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x i32> @constrained_vector_fptosi_v4i32_v4f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-19 14:43:45 +08:00
|
|
|
; CHECK-NEXT: cvttpd2dq {{.*}}(%rip), %xmm1
|
|
|
|
; CHECK-NEXT: cvttpd2dq {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
2019-12-19 14:43:45 +08:00
|
|
|
; AVX-NEXT: vcvttpd2dqy {{.*}}(%rip), %xmm0
|
2019-08-29 00:33:36 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(
|
|
|
|
<4 x double><double 42.1, double 42.2,
|
|
|
|
double 42.3, double 42.4>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <4 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x i64> @constrained_vector_fptosi_v1i64_v1f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(
|
|
|
|
<1 x double><double 42.1>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <1 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x i64> @constrained_vector_fptosi_v2i64_v2f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm1
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm0
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-12-19 14:43:45 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f64:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f64:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
|
|
|
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1]
|
|
|
|
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(
|
|
|
|
<2 x double><double 42.1, double 42.2>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <2 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rcx
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rdx
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(
|
|
|
|
<3 x double><double 42.1, double 42.2,
|
|
|
|
double 42.3>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <3 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x i64> @constrained_vector_fptosi_v4i64_v4f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm1
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm0
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm2
|
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm1
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptosi_v4i64_v4f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2019-12-19 14:43:45 +08:00
|
|
|
; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f64:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f64:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
|
|
|
; AVX512DQ-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
|
|
|
|
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(
|
|
|
|
<4 x double><double 42.1, double 42.2,
|
|
|
|
double 42.3, double 42.4>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <4 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x i32> @constrained_vector_fptoui_v1i32_v1f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v1i32_v1f32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v1i32_v1f32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(
|
|
|
|
<1 x float><float 42.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <1 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm0
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v2i32_v2f32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rcx
|
|
|
|
; AVX1-NEXT: vmovd %ecx, %xmm0
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
2019-12-27 13:25:22 +08:00
|
|
|
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,0.0E+0,0.0E+0]
|
|
|
|
; AVX512-NEXT: vcvttps2udq %zmm0, %zmm0
|
|
|
|
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
|
|
; AVX512-NEXT: vzeroupper
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(
|
|
|
|
<2 x float><float 42.0, float 43.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <2 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm0
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v3i32_v3f32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rcx
|
|
|
|
; AVX1-NEXT: vmovd %ecx, %xmm0
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax
|
|
|
|
; AVX512-NEXT: vmovd %eax, %xmm0
|
|
|
|
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax
|
|
|
|
; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
|
|
|
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax
|
|
|
|
; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(
|
|
|
|
<3 x float><float 42.0, float 43.0,
|
|
|
|
float 44.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <3 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x i32> @constrained_vector_fptoui_v4i32_v4f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2020-01-05 11:18:50 +08:00
|
|
|
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
|
|
|
|
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm2
|
|
|
|
; CHECK-NEXT: cmpltps %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: movaps %xmm2, %xmm3
|
|
|
|
; CHECK-NEXT: andnps {{.*}}(%rip), %xmm3
|
|
|
|
; CHECK-NEXT: andnps %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: subps %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: cvttps2dq %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: xorps %xmm3, %xmm0
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
2020-01-05 11:18:50 +08:00
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
|
|
|
|
; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
|
|
|
|
; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vsubps %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm4, %xmm0, %xmm0
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
2019-12-25 16:10:10 +08:00
|
|
|
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
|
|
|
|
; AVX512-NEXT: vcvttps2udq %zmm0, %zmm0
|
|
|
|
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
|
|
; AVX512-NEXT: vzeroupper
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(
|
|
|
|
<4 x float><float 42.0, float 43.0,
|
|
|
|
float 44.0, float 45.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <4 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: comiss %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: ja .LBB115_2
|
|
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
|
|
; CHECK-NEXT: movaps %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: .LBB115_2: # %entry
|
|
|
|
; CHECK-NEXT: subss %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: cvttss2si %xmm0, %rcx
|
|
|
|
; CHECK-NEXT: setbe %al
|
|
|
|
; CHECK-NEXT: movzbl %al, %eax
|
|
|
|
; CHECK-NEXT: shlq $63, %rax
|
|
|
|
; CHECK-NEXT: xorq %rcx, %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vcomiss %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: ja .LBB115_2
|
|
|
|
; AVX1-NEXT: # %bb.1: # %entry
|
|
|
|
; AVX1-NEXT: vmovaps %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: .LBB115_2: # %entry
|
|
|
|
; AVX1-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX1-NEXT: setbe %al
|
|
|
|
; AVX1-NEXT: movzbl %al, %eax
|
|
|
|
; AVX1-NEXT: shlq $63, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(
|
|
|
|
<1 x float><float 42.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <1 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: comiss %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: xorps %xmm3, %xmm3
|
|
|
|
; CHECK-NEXT: ja .LBB116_2
|
|
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm3
|
|
|
|
; CHECK-NEXT: .LBB116_2: # %entry
|
|
|
|
; CHECK-NEXT: subss %xmm3, %xmm2
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; CHECK-NEXT: cvttss2si %xmm2, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rax, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %xmm2
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: comiss %xmm3, %xmm1
|
|
|
|
; CHECK-NEXT: ja .LBB116_4
|
|
|
|
; CHECK-NEXT: # %bb.3: # %entry
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: .LBB116_4: # %entry
|
|
|
|
; CHECK-NEXT: subss %xmm0, %xmm3
|
|
|
|
; CHECK-NEXT: cvttss2si %xmm3, %rax
|
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rax, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %xmm0
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX1-NEXT: vcomiss %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: ja .LBB116_2
|
|
|
|
; AVX1-NEXT: # %bb.1: # %entry
|
|
|
|
; AVX1-NEXT: vmovaps %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: .LBB116_2: # %entry
|
|
|
|
; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
|
|
; AVX1-NEXT: vcomiss %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: ja .LBB116_4
|
|
|
|
; AVX1-NEXT: # %bb.3: # %entry
|
|
|
|
; AVX1-NEXT: vmovaps %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: .LBB116_4: # %entry
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v2i64_v2f32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(
|
|
|
|
<2 x float><float 42.0, float 43.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <2 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: comiss %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: xorps %xmm3, %xmm3
|
|
|
|
; CHECK-NEXT: ja .LBB117_2
|
|
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm3
|
|
|
|
; CHECK-NEXT: .LBB117_2: # %entry
|
|
|
|
; CHECK-NEXT: subss %xmm3, %xmm2
|
|
|
|
; CHECK-NEXT: cvttss2si %xmm2, %rcx
|
|
|
|
; CHECK-NEXT: setbe %al
|
|
|
|
; CHECK-NEXT: movzbl %al, %eax
|
|
|
|
; CHECK-NEXT: shlq $63, %rax
|
|
|
|
; CHECK-NEXT: xorq %rcx, %rax
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: comiss %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: xorps %xmm3, %xmm3
|
|
|
|
; CHECK-NEXT: ja .LBB117_4
|
|
|
|
; CHECK-NEXT: # %bb.3: # %entry
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm3
|
|
|
|
; CHECK-NEXT: .LBB117_4: # %entry
|
|
|
|
; CHECK-NEXT: subss %xmm3, %xmm2
|
|
|
|
; CHECK-NEXT: cvttss2si %xmm2, %rcx
|
|
|
|
; CHECK-NEXT: setbe %dl
|
|
|
|
; CHECK-NEXT: movzbl %dl, %edx
|
|
|
|
; CHECK-NEXT: shlq $63, %rdx
|
|
|
|
; CHECK-NEXT: xorq %rcx, %rdx
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: comiss %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: ja .LBB117_6
|
|
|
|
; CHECK-NEXT: # %bb.5: # %entry
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: .LBB117_6: # %entry
|
|
|
|
; CHECK-NEXT: subss %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: cvttss2si %xmm2, %rsi
|
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rsi, %rcx
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX1-NEXT: vcomiss %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: ja .LBB117_2
|
|
|
|
; AVX1-NEXT: # %bb.1: # %entry
|
|
|
|
; AVX1-NEXT: vmovaps %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: .LBB117_2: # %entry
|
|
|
|
; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vcvttss2si %xmm2, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
|
|
; AVX1-NEXT: vcomiss %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: ja .LBB117_4
|
|
|
|
; AVX1-NEXT: # %bb.3: # %entry
|
|
|
|
; AVX1-NEXT: vmovaps %xmm0, %xmm4
|
|
|
|
; AVX1-NEXT: .LBB117_4: # %entry
|
|
|
|
; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
|
|
; AVX1-NEXT: vcomiss %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: ja .LBB117_6
|
|
|
|
; AVX1-NEXT: # %bb.5: # %entry
|
|
|
|
; AVX1-NEXT: vmovaps %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: .LBB117_6: # %entry
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm0
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(
|
|
|
|
<3 x float><float 42.0, float 43.0,
|
|
|
|
float 44.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <3 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: comiss %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: xorps %xmm3, %xmm3
|
|
|
|
; CHECK-NEXT: ja .LBB118_2
|
|
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
|
|
; CHECK-NEXT: movaps %xmm2, %xmm3
|
|
|
|
; CHECK-NEXT: .LBB118_2: # %entry
|
|
|
|
; CHECK-NEXT: subss %xmm3, %xmm0
|
|
|
|
; CHECK-NEXT: cvttss2si %xmm0, %rcx
|
|
|
|
; CHECK-NEXT: setbe %al
|
|
|
|
; CHECK-NEXT: movzbl %al, %eax
|
|
|
|
; CHECK-NEXT: shlq $63, %rax
|
|
|
|
; CHECK-NEXT: xorq %rcx, %rax
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: comiss %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: xorps %xmm4, %xmm4
|
|
|
|
; CHECK-NEXT: ja .LBB118_4
|
|
|
|
; CHECK-NEXT: # %bb.3: # %entry
|
|
|
|
; CHECK-NEXT: movaps %xmm2, %xmm4
|
|
|
|
; CHECK-NEXT: .LBB118_4: # %entry
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm3
|
|
|
|
; CHECK-NEXT: subss %xmm4, %xmm0
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; CHECK-NEXT: cvttss2si %xmm0, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rax, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %xmm0
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: comiss %xmm4, %xmm2
|
|
|
|
; CHECK-NEXT: xorps %xmm5, %xmm5
|
|
|
|
; CHECK-NEXT: ja .LBB118_6
|
|
|
|
; CHECK-NEXT: # %bb.5: # %entry
|
|
|
|
; CHECK-NEXT: movaps %xmm2, %xmm5
|
|
|
|
; CHECK-NEXT: .LBB118_6: # %entry
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
|
|
|
; CHECK-NEXT: subss %xmm5, %xmm4
|
|
|
|
; CHECK-NEXT: cvttss2si %xmm4, %rax
|
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rax, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %xmm3
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: comiss %xmm4, %xmm2
|
|
|
|
; CHECK-NEXT: ja .LBB118_8
|
|
|
|
; CHECK-NEXT: # %bb.7: # %entry
|
|
|
|
; CHECK-NEXT: movaps %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: .LBB118_8: # %entry
|
|
|
|
; CHECK-NEXT: subss %xmm1, %xmm4
|
|
|
|
; CHECK-NEXT: cvttss2si %xmm4, %rax
|
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rax, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %xmm1
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX1-NEXT: vcomiss %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: ja .LBB118_2
|
|
|
|
; AVX1-NEXT: # %bb.1: # %entry
|
|
|
|
; AVX1-NEXT: vmovaps %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: .LBB118_2: # %entry
|
|
|
|
; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm2, %rcx
|
|
|
|
; AVX1-NEXT: setbe %al
|
|
|
|
; AVX1-NEXT: movzbl %al, %eax
|
|
|
|
; AVX1-NEXT: shlq $63, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vcomiss %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: ja .LBB118_4
|
|
|
|
; AVX1-NEXT: # %bb.3: # %entry
|
|
|
|
; AVX1-NEXT: vmovaps %xmm0, %xmm4
|
|
|
|
; AVX1-NEXT: .LBB118_4: # %entry
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
|
|
|
; AVX1-NEXT: vcomiss %xmm4, %xmm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm5, %xmm5, %xmm5
|
|
|
|
; AVX1-NEXT: ja .LBB118_6
|
|
|
|
; AVX1-NEXT: # %bb.5: # %entry
|
|
|
|
; AVX1-NEXT: vmovaps %xmm0, %xmm5
|
|
|
|
; AVX1-NEXT: .LBB118_6: # %entry
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; AVX1-NEXT: vsubss %xmm5, %xmm4, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
|
|
|
; AVX1-NEXT: vcomiss %xmm4, %xmm0
|
|
|
|
; AVX1-NEXT: ja .LBB118_8
|
|
|
|
; AVX1-NEXT: # %bb.7: # %entry
|
|
|
|
; AVX1-NEXT: vmovaps %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: .LBB118_8: # %entry
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm4, %xmm0
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2019-12-19 14:43:45 +08:00
|
|
|
; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f32:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f32:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
|
|
|
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
|
|
|
|
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(
|
|
|
|
<4 x float><float 42.0, float 43.0,
|
|
|
|
float 44.0, float 45.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <4 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x i32> @constrained_vector_fptoui_v1i32_v1f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v1i32_v1f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v1i32_v1f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(
|
|
|
|
<1 x double><double 42.1>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <1 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm0
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v2i32_v2f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rcx
|
|
|
|
; AVX1-NEXT: vmovd %ecx, %xmm0
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
2019-12-27 04:26:39 +08:00
|
|
|
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,0.0E+0,0.0E+0]
|
|
|
|
; AVX512-NEXT: vcvttpd2udq %zmm0, %ymm0
|
|
|
|
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
|
|
|
; AVX512-NEXT: vzeroupper
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(
|
|
|
|
<2 x double><double 42.1, double 42.2>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <2 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm0
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v3i32_v3f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rcx
|
|
|
|
; AVX1-NEXT: vmovd %ecx, %xmm0
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
|
|
|
; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax
|
|
|
|
; AVX512-NEXT: vmovd %eax, %xmm0
|
|
|
|
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax
|
|
|
|
; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
|
|
|
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax
|
|
|
|
; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(
|
|
|
|
<3 x double><double 42.1, double 42.2,
|
|
|
|
double 42.3>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <3 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x i32> @constrained_vector_fptoui_v4i32_v4f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm0
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm1
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm2
|
2019-11-20 07:51:19 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: movd %eax, %xmm0
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
2020-01-05 11:18:50 +08:00
|
|
|
; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
|
|
|
|
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
|
|
|
|
; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm2
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
|
|
|
|
; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm3[0,2]
|
|
|
|
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
|
|
|
|
; AVX1-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3
|
|
|
|
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vblendvpd %ymm2, %ymm4, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vsubpd %ymm0, %ymm1, %ymm0
|
|
|
|
; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vzeroupper
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
2019-12-25 16:10:10 +08:00
|
|
|
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
|
|
|
|
; AVX512-NEXT: vcvttpd2udq %zmm0, %ymm0
|
|
|
|
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
|
|
|
; AVX512-NEXT: vzeroupper
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(
|
|
|
|
<4 x double><double 42.1, double 42.2,
|
|
|
|
double 42.3, double 42.4>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <4 x i32> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
|
|
|
; CHECK-NEXT: comisd %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: xorpd %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: ja .LBB123_2
|
|
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
|
|
; CHECK-NEXT: movapd %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: .LBB123_2: # %entry
|
|
|
|
; CHECK-NEXT: subsd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: cvttsd2si %xmm0, %rcx
|
|
|
|
; CHECK-NEXT: setbe %al
|
|
|
|
; CHECK-NEXT: movzbl %al, %eax
|
|
|
|
; CHECK-NEXT: shlq $63, %rax
|
|
|
|
; CHECK-NEXT: xorq %rcx, %rax
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vcomisd %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: ja .LBB123_2
|
|
|
|
; AVX1-NEXT: # %bb.1: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: .LBB123_2: # %entry
|
|
|
|
; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
|
|
|
|
; AVX1-NEXT: setbe %al
|
|
|
|
; AVX1-NEXT: movzbl %al, %eax
|
|
|
|
; AVX1-NEXT: shlq $63, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(
|
|
|
|
<1 x double><double 42.1>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <1 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: comisd %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: xorpd %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: xorpd %xmm3, %xmm3
|
|
|
|
; CHECK-NEXT: ja .LBB124_2
|
|
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
|
|
; CHECK-NEXT: movapd %xmm1, %xmm3
|
|
|
|
; CHECK-NEXT: .LBB124_2: # %entry
|
|
|
|
; CHECK-NEXT: subsd %xmm3, %xmm2
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; CHECK-NEXT: cvttsd2si %xmm2, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rax, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %xmm2
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
|
|
|
|
; CHECK-NEXT: comisd %xmm3, %xmm1
|
|
|
|
; CHECK-NEXT: ja .LBB124_4
|
|
|
|
; CHECK-NEXT: # %bb.3: # %entry
|
|
|
|
; CHECK-NEXT: movapd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: .LBB124_4: # %entry
|
|
|
|
; CHECK-NEXT: subsd %xmm0, %xmm3
|
|
|
|
; CHECK-NEXT: cvttsd2si %xmm3, %rax
|
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rax, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %xmm0
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX1-NEXT: vcomisd %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: ja .LBB124_2
|
|
|
|
; AVX1-NEXT: # %bb.1: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: .LBB124_2: # %entry
|
|
|
|
; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm2, %rax
|
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
|
|
|
; AVX1-NEXT: vcomisd %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: ja .LBB124_4
|
|
|
|
; AVX1-NEXT: # %bb.3: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: .LBB124_4: # %entry
|
|
|
|
; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2019-12-19 14:43:45 +08:00
|
|
|
; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f64:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f64:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
|
|
|
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1]
|
|
|
|
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(
|
|
|
|
<2 x double><double 42.1, double 42.2>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <2 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: comisd %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: xorpd %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: xorpd %xmm3, %xmm3
|
|
|
|
; CHECK-NEXT: ja .LBB125_2
|
|
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
|
|
; CHECK-NEXT: movapd %xmm1, %xmm3
|
|
|
|
; CHECK-NEXT: .LBB125_2: # %entry
|
|
|
|
; CHECK-NEXT: subsd %xmm3, %xmm2
|
|
|
|
; CHECK-NEXT: cvttsd2si %xmm2, %rcx
|
|
|
|
; CHECK-NEXT: setbe %al
|
|
|
|
; CHECK-NEXT: movzbl %al, %eax
|
|
|
|
; CHECK-NEXT: shlq $63, %rax
|
|
|
|
; CHECK-NEXT: xorq %rcx, %rax
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
|
|
|
; CHECK-NEXT: comisd %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: xorpd %xmm3, %xmm3
|
|
|
|
; CHECK-NEXT: ja .LBB125_4
|
|
|
|
; CHECK-NEXT: # %bb.3: # %entry
|
|
|
|
; CHECK-NEXT: movapd %xmm1, %xmm3
|
|
|
|
; CHECK-NEXT: .LBB125_4: # %entry
|
|
|
|
; CHECK-NEXT: subsd %xmm3, %xmm2
|
|
|
|
; CHECK-NEXT: cvttsd2si %xmm2, %rcx
|
|
|
|
; CHECK-NEXT: setbe %dl
|
|
|
|
; CHECK-NEXT: movzbl %dl, %edx
|
|
|
|
; CHECK-NEXT: shlq $63, %rdx
|
|
|
|
; CHECK-NEXT: xorq %rcx, %rdx
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
|
|
|
; CHECK-NEXT: comisd %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: ja .LBB125_6
|
|
|
|
; CHECK-NEXT: # %bb.5: # %entry
|
|
|
|
; CHECK-NEXT: movapd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: .LBB125_6: # %entry
|
|
|
|
; CHECK-NEXT: subsd %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: cvttsd2si %xmm2, %rsi
|
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rsi, %rcx
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX1-NEXT: vcomisd %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: ja .LBB125_2
|
|
|
|
; AVX1-NEXT: # %bb.1: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: .LBB125_2: # %entry
|
|
|
|
; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm2, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
|
|
|
; AVX1-NEXT: vcomisd %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: ja .LBB125_4
|
|
|
|
; AVX1-NEXT: # %bb.3: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd %xmm0, %xmm4
|
|
|
|
; AVX1-NEXT: .LBB125_4: # %entry
|
|
|
|
; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
|
|
|
; AVX1-NEXT: vcomisd %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: ja .LBB125_6
|
|
|
|
; AVX1-NEXT: # %bb.5: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: .LBB125_6: # %entry
|
|
|
|
; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm0
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
2019-11-20 07:51:19 +08:00
|
|
|
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(
|
|
|
|
<3 x double><double 42.1, double 42.2,
|
|
|
|
double 42.3>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <3 x i64> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: comisd %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: xorpd %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: xorpd %xmm3, %xmm3
|
|
|
|
; CHECK-NEXT: ja .LBB126_2
|
|
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
|
|
; CHECK-NEXT: movapd %xmm2, %xmm3
|
|
|
|
; CHECK-NEXT: .LBB126_2: # %entry
|
|
|
|
; CHECK-NEXT: subsd %xmm3, %xmm0
|
|
|
|
; CHECK-NEXT: cvttsd2si %xmm0, %rcx
|
|
|
|
; CHECK-NEXT: setbe %al
|
|
|
|
; CHECK-NEXT: movzbl %al, %eax
|
|
|
|
; CHECK-NEXT: shlq $63, %rax
|
|
|
|
; CHECK-NEXT: xorq %rcx, %rax
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: comisd %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: xorpd %xmm4, %xmm4
|
|
|
|
; CHECK-NEXT: ja .LBB126_4
|
|
|
|
; CHECK-NEXT: # %bb.3: # %entry
|
|
|
|
; CHECK-NEXT: movapd %xmm2, %xmm4
|
|
|
|
; CHECK-NEXT: .LBB126_4: # %entry
|
|
|
|
; CHECK-NEXT: movq %rax, %xmm3
|
|
|
|
; CHECK-NEXT: subsd %xmm4, %xmm0
|
|
|
|
; CHECK-NEXT: cvttsd2si %xmm0, %rax
|
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rax, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %xmm0
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero
|
|
|
|
; CHECK-NEXT: comisd %xmm4, %xmm2
|
|
|
|
; CHECK-NEXT: xorpd %xmm5, %xmm5
|
|
|
|
; CHECK-NEXT: ja .LBB126_6
|
|
|
|
; CHECK-NEXT: # %bb.5: # %entry
|
|
|
|
; CHECK-NEXT: movapd %xmm2, %xmm5
|
|
|
|
; CHECK-NEXT: .LBB126_6: # %entry
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
|
|
|
; CHECK-NEXT: subsd %xmm5, %xmm4
|
|
|
|
; CHECK-NEXT: cvttsd2si %xmm4, %rax
|
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rax, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %xmm3
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero
|
|
|
|
; CHECK-NEXT: comisd %xmm4, %xmm2
|
|
|
|
; CHECK-NEXT: ja .LBB126_8
|
|
|
|
; CHECK-NEXT: # %bb.7: # %entry
|
|
|
|
; CHECK-NEXT: movapd %xmm2, %xmm1
|
|
|
|
; CHECK-NEXT: .LBB126_8: # %entry
|
|
|
|
; CHECK-NEXT: subsd %xmm1, %xmm4
|
|
|
|
; CHECK-NEXT: cvttsd2si %xmm4, %rax
|
|
|
|
; CHECK-NEXT: setbe %cl
|
|
|
|
; CHECK-NEXT: movzbl %cl, %ecx
|
|
|
|
; CHECK-NEXT: shlq $63, %rcx
|
|
|
|
; CHECK-NEXT: xorq %rax, %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, %xmm1
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
2019-08-29 00:33:36 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX1-NEXT: vcomisd %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: ja .LBB126_2
|
|
|
|
; AVX1-NEXT: # %bb.1: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: .LBB126_2: # %entry
|
|
|
|
; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
|
|
|
|
; AVX1-NEXT: setbe %al
|
|
|
|
; AVX1-NEXT: movzbl %al, %eax
|
|
|
|
; AVX1-NEXT: shlq $63, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: vcomisd %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: ja .LBB126_4
|
|
|
|
; AVX1-NEXT: # %bb.3: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd %xmm0, %xmm4
|
|
|
|
; AVX1-NEXT: .LBB126_4: # %entry
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
|
|
|
|
; AVX1-NEXT: vcomisd %xmm4, %xmm0
|
|
|
|
; AVX1-NEXT: vxorpd %xmm5, %xmm5, %xmm5
|
|
|
|
; AVX1-NEXT: ja .LBB126_6
|
|
|
|
; AVX1-NEXT: # %bb.5: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd %xmm0, %xmm5
|
|
|
|
; AVX1-NEXT: .LBB126_6: # %entry
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; AVX1-NEXT: vsubsd %xmm5, %xmm4, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm3
|
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
|
|
|
|
; AVX1-NEXT: vcomisd %xmm4, %xmm0
|
|
|
|
; AVX1-NEXT: ja .LBB126_8
|
|
|
|
; AVX1-NEXT: # %bb.7: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: .LBB126_8: # %entry
|
|
|
|
; AVX1-NEXT: vsubsd %xmm1, %xmm4, %xmm0
|
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT
D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)
Instead, I'd suggest to use the following sequence:
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).
In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.
There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)
Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper
Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: setbe %cl
|
|
|
|
; AVX1-NEXT: movzbl %cl, %ecx
|
|
|
|
; AVX1-NEXT: shlq $63, %rcx
|
|
|
|
; AVX1-NEXT: xorq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
2019-11-12 07:51:00 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2019-12-19 14:43:45 +08:00
|
|
|
; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f64:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f64:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
|
|
|
; AVX512DQ-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
|
|
|
|
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-08-29 00:33:36 +08:00
|
|
|
entry:
|
|
|
|
%result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(
|
|
|
|
<4 x double><double 42.1, double 42.2,
|
|
|
|
double 42.3, double 42.4>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-08-29 00:33:36 +08:00
|
|
|
ret <4 x i64> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_fptrunc_v1f64() #0 {
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptrunc_v1f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptrunc_v1f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
|
|
|
|
<1 x double><double 42.1>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-05-13 21:23:30 +08:00
|
|
|
ret <1 x float> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x float> @constrained_vector_fptrunc_v2f64() #0 {
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptrunc_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-26 17:47:37 +08:00
|
|
|
; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptrunc_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
2019-12-26 17:47:37 +08:00
|
|
|
; AVX-NEXT: vcvtpd2psx {{.*}}(%rip), %xmm0
|
2019-05-13 21:23:30 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
|
|
|
|
<2 x double><double 42.1, double 42.2>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-05-13 21:23:30 +08:00
|
|
|
ret <2 x float> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_fptrunc_v3f64() #0 {
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptrunc_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptrunc_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
|
|
|
|
<3 x double><double 42.1, double 42.2,
|
|
|
|
double 42.3>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-05-13 21:23:30 +08:00
|
|
|
ret <3 x float> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x float> @constrained_vector_fptrunc_v4f64() #0 {
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fptrunc_v4f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-26 17:47:37 +08:00
|
|
|
; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm1
|
|
|
|
; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fptrunc_v4f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
|
|
|
|
<4 x double><double 42.1, double 42.2,
|
|
|
|
double 42.3, double 42.4>,
|
|
|
|
metadata !"round.dynamic",
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-05-13 21:23:30 +08:00
|
|
|
ret <4 x float> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x double> @constrained_vector_fpext_v1f32() #0 {
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fpext_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fpext_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
|
|
|
|
<1 x float><float 42.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-05-13 21:23:30 +08:00
|
|
|
ret <1 x double> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_fpext_v2f32() #0 {
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fpext_v2f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-26 17:47:37 +08:00
|
|
|
; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm0
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fpext_v2f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
2019-12-26 17:47:37 +08:00
|
|
|
; AVX-NEXT: vcvtps2pd {{.*}}(%rip), %xmm0
|
2019-05-13 21:23:30 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
|
|
|
|
<2 x float><float 42.0, float 43.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-05-13 21:23:30 +08:00
|
|
|
ret <2 x double> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_fpext_v3f32() #0 {
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fpext_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: cvtss2sd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: cvtss2sd %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fpext_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
|
|
|
|
<3 x float><float 42.0, float 43.0,
|
|
|
|
float 44.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-05-13 21:23:30 +08:00
|
|
|
ret <3 x double> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <4 x double> @constrained_vector_fpext_v4f32() #0 {
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_fpext_v4f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-26 17:47:37 +08:00
|
|
|
; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm1
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm0
|
2019-05-13 21:23:30 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_fpext_v4f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvtps2pd {{.*}}(%rip), %ymm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
|
|
|
|
<4 x float><float 42.0, float 43.0,
|
|
|
|
float 44.0, float 45.0>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2019-05-13 21:23:30 +08:00
|
|
|
ret <4 x double> %result
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_ceil_v1f32() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_ceil_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq ceilf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_ceil_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
|
|
|
|
<1 x float> <float 1.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <1 x float> %ceil
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_ceil_v2f64() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_ceil_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq ceil
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq ceil
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_ceil_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vroundpd $10, {{.*}}(%rip), %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
|
|
|
|
<2 x double> <double 1.1, double 1.9>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <2 x double> %ceil
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_ceil_v3f32() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_ceil_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq ceilf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq ceilf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq ceilf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_ceil_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $10, %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $10, %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
|
|
|
|
<3 x float> <float 1.5, float 2.5, float 3.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <3 x float> %ceil
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_ceil_v3f64() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_ceil_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq ceil
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq ceil
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq ceil
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_ceil_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vroundpd $10, {{.*}}(%rip), %xmm1
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
|
|
|
|
<3 x double> <double 1.1, double 1.9, double 1.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <3 x double> %ceil
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_floor_v1f32() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_floor_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq floorf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_floor_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
|
|
|
|
<1 x float> <float 1.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <1 x float> %floor
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_floor_v2f64() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_floor_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq floor
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq floor
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_floor_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vroundpd $9, {{.*}}(%rip), %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
|
|
|
|
<2 x double> <double 1.1, double 1.9>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <2 x double> %floor
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_floor_v3f32() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_floor_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq floorf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq floorf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq floorf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_floor_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $9, %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $9, %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
|
|
|
|
<3 x float> <float 1.5, float 2.5, float 3.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <3 x float> %floor
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_floor_v3f64() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_floor_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq floor
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq floor
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq floor
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_floor_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vroundpd $9, {{.*}}(%rip), %xmm1
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
|
|
|
|
<3 x double> <double 1.1, double 1.9, double 1.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <3 x double> %floor
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_round_v1f32() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_round_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq roundf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_round_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: pushq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq roundf
|
|
|
|
; AVX-NEXT: popq %rax
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
|
|
|
|
<1 x float> <float 1.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <1 x float> %round
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_round_v2f64() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_round_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq round
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq round
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_round_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq round
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq round
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
|
|
|
|
<2 x double> <double 1.1, double 1.9>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <2 x double> %round
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_round_v3f32() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_round_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq roundf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq roundf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq roundf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_round_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq roundf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq roundf
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: callq roundf
|
|
|
|
; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
|
|
|
; AVX-NEXT: addq $40, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
|
|
|
|
<3 x float> <float 1.5, float 2.5, float 3.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <3 x float> %round
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_round_v3f64() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_round_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq round
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq round
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq round
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_round_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: subq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq round
|
|
|
|
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: callq round
|
|
|
|
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: callq round
|
|
|
|
; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: addq $56, %rsp
|
|
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
|
|
|
|
<3 x double> <double 1.1, double 1.9, double 1.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <3 x double> %round
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <1 x float> @constrained_vector_trunc_v1f32() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_trunc_v1f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: pushq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq truncf
|
|
|
|
; CHECK-NEXT: popq %rax
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_trunc_v1f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
|
|
|
|
<1 x float> <float 1.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <1 x float> %trunc
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <2 x double> @constrained_vector_trunc_v2f64() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_trunc_v2f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq trunc
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq trunc
|
|
|
|
; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_trunc_v2f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vroundpd $11, {{.*}}(%rip), %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
|
|
|
|
<2 x double> <double 1.1, double 1.9>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <2 x double> %trunc
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x float> @constrained_vector_trunc_v3f32() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_trunc_v3f32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq truncf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq truncf
|
|
|
|
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: callq truncf
|
|
|
|
; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_trunc_v3f32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $11, %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; AVX-NEXT: vroundss $11, %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
|
|
|
|
<3 x float> <float 1.5, float 2.5, float 3.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <3 x float> %trunc
|
|
|
|
}
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
define <3 x double> @constrained_vector_trunc_v3f64() #0 {
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-LABEL: constrained_vector_trunc_v3f64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: subq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq trunc
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq trunc
|
|
|
|
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
|
|
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: callq trunc
|
|
|
|
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
2018-11-05 23:59:49 +08:00
|
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero
|
|
|
|
; CHECK-NEXT: addq $24, %rsp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; CHECK-NEXT: retq
|
2019-04-30 00:06:04 +08:00
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_trunc_v3f64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vroundpd $11, {{.*}}(%rip), %xmm1
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2018-11-05 23:59:49 +08:00
|
|
|
entry:
|
|
|
|
%trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
|
|
|
|
<3 x double> <double 1.1, double 1.9, double 1.5>,
|
2019-10-05 01:03:46 +08:00
|
|
|
metadata !"fpexcept.strict") #0
|
2018-11-05 23:59:49 +08:00
|
|
|
ret <3 x double> %trunc
|
|
|
|
}
|
|
|
|
|
2019-10-17 03:24:47 +08:00
|
|
|
define <1 x double> @constrained_vector_sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvtsi2sd %edi, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x double>
|
|
|
|
@llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <1 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <1 x float> @constrained_vector_sitofp_v1f32_v1i32(<1 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvtsi2ss %edi, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x float>
|
|
|
|
@llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <1 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <1 x double> @constrained_vector_sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rdi, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x double>
|
|
|
|
@llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <1 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <1 x float> @constrained_vector_sitofp_v1f32_v1i64(<1 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x float>
|
|
|
|
@llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <1 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @constrained_vector_sitofp_v2f64_v2i32(<2 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-24 09:44:22 +08:00
|
|
|
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <2 x double>
|
|
|
|
@llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <2 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-27 15:57:33 +08:00
|
|
|
; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
|
|
|
; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
2019-12-27 15:57:33 +08:00
|
|
|
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
|
|
|
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <2 x float>
|
|
|
|
@llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <2 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rax, %xmm1
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
|
|
|
|
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; CHECK-NEXT: movapd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_sitofp_v2f64_v2i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: constrained_vector_sitofp_v2f64_v2i64:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
|
|
|
|
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_sitofp_v2f64_v2i64:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
2019-12-27 06:43:33 +08:00
|
|
|
; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-10-17 03:24:47 +08:00
|
|
|
entry:
|
|
|
|
%result = call <2 x double>
|
|
|
|
@llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <2 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i64:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <2 x float>
|
|
|
|
@llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <2 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movd %xmm0, %eax
|
|
|
|
; CHECK-NEXT: cvtsi2sd %eax, %xmm2
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
|
|
|
; CHECK-NEXT: movd %xmm1, %eax
|
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: cvtsi2sd %eax, %xmm1
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movd %xmm0, %eax
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: cvtsi2sd %eax, %xmm0
|
|
|
|
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: movapd %xmm2, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vextractps $1, %xmm0, %eax
|
|
|
|
; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovd %xmm0, %eax
|
|
|
|
; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm2
|
|
|
|
; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX-NEXT: vcvtsi2sd %eax, %xmm3, %xmm0
|
|
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x double>
|
|
|
|
@llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <3 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movd %xmm0, %eax
|
|
|
|
; CHECK-NEXT: cvtsi2ss %eax, %xmm1
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
|
|
|
; CHECK-NEXT: movd %xmm2, %eax
|
|
|
|
; CHECK-NEXT: xorps %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: cvtsi2ss %eax, %xmm2
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movd %xmm0, %eax
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: cvtsi2ss %eax, %xmm0
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vextractps $1, %xmm0, %eax
|
|
|
|
; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vmovd %xmm0, %eax
|
|
|
|
; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm2
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX-NEXT: vcvtsi2ss %eax, %xmm3, %xmm0
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x float>
|
|
|
|
@llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <3 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rsi, %xmm1
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: cvtsi2sd %rdi, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: cvtsi2sd %rdx, %xmm2
|
|
|
|
; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_sitofp_v3f64_v3i64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
|
|
|
|
; AVX512-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
|
|
|
; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; AVX512-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
|
|
|
|
; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x double>
|
|
|
|
@llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <3 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rsi, %xmm1
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_sitofp_v3f32_v3i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_sitofp_v3f32_v3i64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
|
|
|
; AVX512-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
|
|
|
; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; AVX512-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
|
|
|
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX512-NEXT: vzeroupper
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x float>
|
|
|
|
@llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <3 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-24 09:44:22 +08:00
|
|
|
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm2
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
2019-12-24 09:44:22 +08:00
|
|
|
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: movaps %xmm2, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v4f64_v4i32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <4 x double>
|
|
|
|
@llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <4 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @constrained_vector_sitofp_v4f32_v4i32(<4 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: constrained_vector_sitofp_v4f32_v4i32:
|
|
|
|
; AVX: # %bb.0: # %entry
|
|
|
|
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <4 x float>
|
|
|
|
@llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <4 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rax, %xmm2
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
|
|
|
|
; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
|
|
|
|
; CHECK-NEXT: movq %xmm1, %rax
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rax, %xmm3
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
|
|
|
|
; CHECK-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
|
|
|
|
; CHECK-NEXT: movapd %xmm2, %xmm0
|
|
|
|
; CHECK-NEXT: movapd %xmm3, %xmm1
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_sitofp_v4f64_v4i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vmovq %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512F-LABEL: constrained_vector_sitofp_v4f64_v4i64:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vmovq %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
|
|
|
|
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
|
|
|
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
|
|
|
|
; AVX512F-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
|
|
|
|
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
|
|
|
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_sitofp_v4f64_v4i64:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
2019-12-27 06:43:33 +08:00
|
|
|
; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-10-17 03:24:47 +08:00
|
|
|
entry:
|
|
|
|
%result = call <4 x double>
|
|
|
|
@llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <4 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movq %xmm1, %rax
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rax, %xmm2
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movq %xmm1, %rax
|
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_sitofp_v4f32_v4i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512F-LABEL: constrained_vector_sitofp_v4f32_v4i64:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; AVX512F-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
|
|
|
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
|
|
|
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
|
|
|
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
|
|
; AVX512F-NEXT: vzeroupper
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_sitofp_v4f32_v4i64:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
2019-12-27 06:43:33 +08:00
|
|
|
; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-10-17 03:24:47 +08:00
|
|
|
entry:
|
|
|
|
%result = call <4 x float>
|
|
|
|
@llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <4 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <1 x double> @constrained_vector_uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movl %edi, %eax
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: movl %edi, %eax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x double>
|
|
|
|
@llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <1 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <1 x float> @constrained_vector_uitofp_v1f32_v1i32(<1 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movl %edi, %eax
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: movl %edi, %eax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x float>
|
|
|
|
@llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <1 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movq %rdi, %xmm1
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
|
|
|
|
; CHECK-NEXT: subpd {{.*}}(%rip), %xmm1
|
|
|
|
; CHECK-NEXT: movapd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: addpd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vmovq %rdi, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
|
|
|
|
; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x double>
|
|
|
|
@llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <1 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movq %rdi, %rax
|
|
|
|
; CHECK-NEXT: shrq %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movl %edi, %ecx
|
|
|
|
; CHECK-NEXT: andl $1, %ecx
|
|
|
|
; CHECK-NEXT: orq %rax, %rcx
|
|
|
|
; CHECK-NEXT: testq %rdi, %rdi
|
|
|
|
; CHECK-NEXT: cmovnsq %rdi, %rcx
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rcx, %xmm0
|
|
|
|
; CHECK-NEXT: jns .LBB170_2
|
|
|
|
; CHECK-NEXT: # %bb.1:
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: addss %xmm0, %xmm0
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: .LBB170_2: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: movq %rdi, %rax
|
|
|
|
; AVX1-NEXT: shrq %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: movl %edi, %ecx
|
|
|
|
; AVX1-NEXT: andl $1, %ecx
|
|
|
|
; AVX1-NEXT: orq %rax, %rcx
|
|
|
|
; AVX1-NEXT: testq %rdi, %rdi
|
|
|
|
; AVX1-NEXT: cmovnsq %rdi, %rcx
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: jns .LBB170_2
|
|
|
|
; AVX1-NEXT: # %bb.1:
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: .LBB170_2: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <1 x float>
|
|
|
|
@llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <1 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2020-01-04 02:56:09 +08:00
|
|
|
; CHECK-NEXT: xorpd %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
|
|
|
|
; CHECK-NEXT: orpd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: subpd %xmm1, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
2020-01-04 02:56:09 +08:00
|
|
|
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
|
|
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
|
|
|
|
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
2019-12-27 05:18:24 +08:00
|
|
|
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0
|
|
|
|
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
|
|
; AVX512-NEXT: vzeroupper
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <2 x double>
|
|
|
|
@llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <2 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-27 09:27:44 +08:00
|
|
|
; CHECK-NEXT: xorpd %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
|
|
|
|
; CHECK-NEXT: orpd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: subpd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
2019-12-27 16:15:45 +08:00
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
|
|
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
|
|
|
|
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
|
|
|
; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0
|
|
|
|
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
|
|
; AVX512-NEXT: vzeroupper
|
|
|
|
; AVX512-NEXT: retq
|
2019-10-17 03:24:47 +08:00
|
|
|
entry:
|
|
|
|
%result = call <2 x float>
|
|
|
|
@llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <2 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295]
|
|
|
|
; CHECK-NEXT: pand %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: por {{.*}}(%rip), %xmm1
|
|
|
|
; CHECK-NEXT: psrlq $32, %xmm0
|
|
|
|
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0
|
|
|
|
; CHECK-NEXT: addpd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
|
|
|
; AVX1-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512F-LABEL: constrained_vector_uitofp_v2f64_v2i64:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
|
|
|
; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0
|
|
|
|
; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX512F-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_uitofp_v2f64_v2i64:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
2019-12-27 06:43:33 +08:00
|
|
|
; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-10-17 03:24:47 +08:00
|
|
|
entry:
|
|
|
|
%result = call <2 x double>
|
|
|
|
@llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <2 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movdqa %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %rcx
|
|
|
|
; CHECK-NEXT: shrq %rcx
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movl %eax, %edx
|
|
|
|
; CHECK-NEXT: andl $1, %edx
|
|
|
|
; CHECK-NEXT: orq %rcx, %rdx
|
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
|
|
|
; CHECK-NEXT: cmovnsq %rax, %rdx
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: cvtsi2ss %rdx, %xmm0
|
|
|
|
; CHECK-NEXT: jns .LBB174_2
|
|
|
|
; CHECK-NEXT: # %bb.1:
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: addss %xmm0, %xmm0
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: .LBB174_2: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movq %xmm1, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movq %rax, %rcx
|
|
|
|
; CHECK-NEXT: shrq %rcx
|
|
|
|
; CHECK-NEXT: movl %eax, %edx
|
|
|
|
; CHECK-NEXT: andl $1, %edx
|
|
|
|
; CHECK-NEXT: orq %rcx, %rdx
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: cmovnsq %rax, %rdx
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
|
|
|
|
; CHECK-NEXT: jns .LBB174_4
|
|
|
|
; CHECK-NEXT: # %bb.3:
|
|
|
|
; CHECK-NEXT: addss %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: .LBB174_4: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
2020-01-06 09:01:57 +08:00
|
|
|
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1
|
|
|
|
; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vmovq %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
|
|
|
|
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
|
|
|
|
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
|
|
|
|
; AVX512-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
|
|
|
|
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <2 x float>
|
|
|
|
@llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <2 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movd %xmm0, %eax
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rax, %xmm2
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
|
|
|
; CHECK-NEXT: movd %xmm1, %eax
|
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rax, %xmm1
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movd %xmm0, %eax
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
|
|
|
|
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: movapd %xmm2, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vextractps $1, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vmovd %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vextractps $1, %xmm0, %eax
|
|
|
|
; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1
|
|
|
|
; AVX512-NEXT: vmovd %xmm0, %eax
|
|
|
|
; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm2
|
|
|
|
; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX512-NEXT: vcvtusi2sd %eax, %xmm3, %xmm0
|
|
|
|
; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x double>
|
|
|
|
@llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <3 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movd %xmm0, %eax
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
|
|
|
; CHECK-NEXT: movd %xmm2, %eax
|
|
|
|
; CHECK-NEXT: xorps %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rax, %xmm2
|
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movd %xmm0, %eax
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vextractps $1, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vmovd %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX1-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vextractps $1, %xmm0, %eax
|
|
|
|
; AVX512-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1
|
|
|
|
; AVX512-NEXT: vmovd %xmm0, %eax
|
|
|
|
; AVX512-NEXT: vcvtusi2ss %eax, %xmm2, %xmm2
|
|
|
|
; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX512-NEXT: vpextrd $2, %xmm0, %eax
|
|
|
|
; AVX512-NEXT: vcvtusi2ss %eax, %xmm3, %xmm0
|
|
|
|
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x float>
|
|
|
|
@llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <3 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movq %rdi, %xmm1
|
|
|
|
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,1.9342813113834067E+25]
|
|
|
|
; CHECK-NEXT: subpd %xmm3, %xmm1
|
|
|
|
; CHECK-NEXT: movapd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: addpd %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: movq %rsi, %xmm4
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
|
|
|
; CHECK-NEXT: subpd %xmm3, %xmm4
|
|
|
|
; CHECK-NEXT: movapd %xmm4, %xmm1
|
|
|
|
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
|
|
|
|
; CHECK-NEXT: addpd %xmm4, %xmm1
|
|
|
|
; CHECK-NEXT: movq %rdx, %xmm4
|
|
|
|
; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
|
|
|
; CHECK-NEXT: subpd %xmm3, %xmm4
|
|
|
|
; CHECK-NEXT: movapd %xmm4, %xmm2
|
|
|
|
; CHECK-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm4[1]
|
|
|
|
; CHECK-NEXT: addpd %xmm4, %xmm2
|
|
|
|
; CHECK-NEXT: movlpd %xmm2, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
|
2020-01-16 10:49:59 +08:00
|
|
|
; CHECK-NEXT: wait
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
|
|
|
|
; AVX1-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [4.503599627370496E+15,1.9342813113834067E+25]
|
|
|
|
; AVX1-NEXT: vsubpd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,0]
|
|
|
|
; AVX1-NEXT: vaddpd %xmm2, %xmm4, %xmm2
|
|
|
|
; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
|
|
|
; AVX1-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
|
|
|
; AVX1-NEXT: vsubpd %xmm3, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm5 = xmm4[1,0]
|
|
|
|
; AVX1-NEXT: vaddpd %xmm4, %xmm5, %xmm4
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm4[0]
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; AVX1-NEXT: vsubpd %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1
|
|
|
|
; AVX512-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2
|
|
|
|
; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; AVX512-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0
|
|
|
|
; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x double>
|
|
|
|
@llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <3 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movq %rsi, %rax
|
|
|
|
; CHECK-NEXT: shrq %rax
|
|
|
|
; CHECK-NEXT: movl %esi, %ecx
|
|
|
|
; CHECK-NEXT: andl $1, %ecx
|
|
|
|
; CHECK-NEXT: orq %rax, %rcx
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: testq %rsi, %rsi
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: cmovnsq %rsi, %rcx
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rcx, %xmm1
|
|
|
|
; CHECK-NEXT: jns .LBB178_2
|
|
|
|
; CHECK-NEXT: # %bb.1:
|
|
|
|
; CHECK-NEXT: addss %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: .LBB178_2: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: movq %rdi, %rax
|
|
|
|
; CHECK-NEXT: shrq %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movl %edi, %ecx
|
|
|
|
; CHECK-NEXT: andl $1, %ecx
|
|
|
|
; CHECK-NEXT: orq %rax, %rcx
|
|
|
|
; CHECK-NEXT: testq %rdi, %rdi
|
|
|
|
; CHECK-NEXT: cmovnsq %rdi, %rcx
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rcx, %xmm0
|
|
|
|
; CHECK-NEXT: jns .LBB178_4
|
|
|
|
; CHECK-NEXT: # %bb.3:
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: addss %xmm0, %xmm0
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: .LBB178_4: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
|
|
; CHECK-NEXT: movq %rdx, %rax
|
|
|
|
; CHECK-NEXT: shrq %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movl %edx, %ecx
|
|
|
|
; CHECK-NEXT: andl $1, %ecx
|
|
|
|
; CHECK-NEXT: orq %rax, %rcx
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: testq %rdx, %rdx
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: cmovnsq %rdx, %rcx
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: cvtsi2ss %rcx, %xmm1
|
|
|
|
; CHECK-NEXT: jns .LBB178_6
|
|
|
|
; CHECK-NEXT: # %bb.5:
|
|
|
|
; CHECK-NEXT: addss %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: .LBB178_6: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: shrq %rcx
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: movl %eax, %edx
|
|
|
|
; AVX1-NEXT: andl $1, %edx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: testq %rax, %rax
|
|
|
|
; AVX1-NEXT: cmovnsq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: jns .LBB178_2
|
|
|
|
; AVX1-NEXT: # %bb.1:
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: .LBB178_2: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: shrq %rcx
|
|
|
|
; AVX1-NEXT: movl %eax, %edx
|
|
|
|
; AVX1-NEXT: andl $1, %edx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: testq %rax, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: cmovnsq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: jns .LBB178_4
|
|
|
|
; AVX1-NEXT: # %bb.3:
|
|
|
|
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: .LBB178_4: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX1-NEXT: movq %rax, %rcx
|
|
|
|
; AVX1-NEXT: shrq %rcx
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: movl %eax, %edx
|
|
|
|
; AVX1-NEXT: andl $1, %edx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: testq %rax, %rax
|
|
|
|
; AVX1-NEXT: cmovnsq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: jns .LBB178_6
|
|
|
|
; AVX1-NEXT: # %bb.5:
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
2019-12-24 04:11:45 +08:00
|
|
|
; AVX1-NEXT: .LBB178_6: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i64:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
|
|
|
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
|
|
|
|
; AVX512-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
|
|
|
|
; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; AVX512-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
|
|
|
|
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
|
|
; AVX512-NEXT: vzeroupper
|
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <3 x float>
|
|
|
|
@llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <3 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2020-01-04 02:56:09 +08:00
|
|
|
; CHECK-NEXT: xorpd %xmm2, %xmm2
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movapd %xmm0, %xmm1
|
2020-01-04 02:56:09 +08:00
|
|
|
; CHECK-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
|
2020-01-04 02:56:09 +08:00
|
|
|
; CHECK-NEXT: orpd %xmm3, %xmm1
|
|
|
|
; CHECK-NEXT: subpd %xmm3, %xmm1
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
|
|
|
; CHECK-NEXT: orpd %xmm3, %xmm0
|
|
|
|
; CHECK-NEXT: subpd %xmm3, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2020-01-07 06:06:20 +08:00
|
|
|
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
|
|
|
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
2020-01-15 02:18:32 +08:00
|
|
|
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
|
2020-01-07 06:06:20 +08:00
|
|
|
; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
2019-12-27 06:43:33 +08:00
|
|
|
; AVX512-NEXT: vmovaps %xmm0, %xmm0
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0
|
|
|
|
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <4 x double>
|
|
|
|
@llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <4 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2019-12-24 09:44:22 +08:00
|
|
|
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
|
|
|
|
; CHECK-NEXT: pand %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: por {{.*}}(%rip), %xmm1
|
|
|
|
; CHECK-NEXT: psrld $16, %xmm0
|
|
|
|
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
|
2020-01-03 12:51:13 +08:00
|
|
|
; CHECK-NEXT: subps {{.*}}(%rip), %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: addps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i32:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
|
2020-01-03 12:51:13 +08:00
|
|
|
; AVX1-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32:
|
|
|
|
; AVX512: # %bb.0: # %entry
|
2019-12-27 06:43:33 +08:00
|
|
|
; AVX512-NEXT: vmovaps %xmm0, %xmm0
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0
|
|
|
|
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
|
|
; AVX512-NEXT: vzeroupper
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX512-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <4 x float>
|
|
|
|
@llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <4 x float> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movdqa %xmm1, %xmm3
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: pand %xmm2, %xmm3
|
|
|
|
; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
|
|
|
|
; CHECK-NEXT: por %xmm4, %xmm3
|
|
|
|
; CHECK-NEXT: psrlq $32, %xmm1
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: por %xmm5, %xmm1
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: subpd %xmm6, %xmm1
|
2020-01-13 21:37:07 +08:00
|
|
|
; CHECK-NEXT: addpd %xmm3, %xmm1
|
|
|
|
; CHECK-NEXT: pand %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: por %xmm4, %xmm2
|
|
|
|
; CHECK-NEXT: psrlq $32, %xmm0
|
|
|
|
; CHECK-NEXT: por %xmm5, %xmm0
|
|
|
|
; CHECK-NEXT: subpd %xmm6, %xmm0
|
|
|
|
; CHECK-NEXT: addpd %xmm2, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
|
|
|
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
|
|
|
; AVX1-NEXT: vorps {{.*}}(%rip), %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
|
|
|
; AVX1-NEXT: vorpd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512F-LABEL: constrained_vector_uitofp_v4f64_v4i64:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
|
|
|
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
|
|
|
|
; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0
|
|
|
|
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
|
|
|
|
; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
|
|
|
|
; AVX512F-NEXT: vsubpd %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX512F-NEXT: vaddpd %ymm0, %ymm1, %ymm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_uitofp_v4f64_v4i64:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
2019-12-27 06:43:33 +08:00
|
|
|
; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
2019-10-17 03:24:47 +08:00
|
|
|
entry:
|
|
|
|
%result = call <4 x double>
|
|
|
|
@llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <4 x double> %result
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
|
|
|
|
; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movq %xmm1, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movq %rax, %rcx
|
|
|
|
; CHECK-NEXT: shrq %rcx
|
|
|
|
; CHECK-NEXT: movl %eax, %edx
|
|
|
|
; CHECK-NEXT: andl $1, %edx
|
|
|
|
; CHECK-NEXT: orq %rcx, %rdx
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: cmovnsq %rax, %rdx
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rdx, %xmm2
|
|
|
|
; CHECK-NEXT: jns .LBB182_2
|
|
|
|
; CHECK-NEXT: # %bb.1:
|
|
|
|
; CHECK-NEXT: addss %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: .LBB182_2: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movq %xmm1, %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %rcx
|
|
|
|
; CHECK-NEXT: shrq %rcx
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movl %eax, %edx
|
|
|
|
; CHECK-NEXT: andl $1, %edx
|
|
|
|
; CHECK-NEXT: orq %rcx, %rdx
|
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
|
|
|
; CHECK-NEXT: cmovnsq %rax, %rdx
|
|
|
|
; CHECK-NEXT: cvtsi2ss %rdx, %xmm3
|
|
|
|
; CHECK-NEXT: jns .LBB182_4
|
|
|
|
; CHECK-NEXT: # %bb.3:
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: addss %xmm3, %xmm3
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: .LBB182_4: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %rcx
|
|
|
|
; CHECK-NEXT: shrq %rcx
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movl %eax, %edx
|
|
|
|
; CHECK-NEXT: andl $1, %edx
|
|
|
|
; CHECK-NEXT: orq %rcx, %rdx
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: cmovnsq %rax, %rdx
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
|
|
|
|
; CHECK-NEXT: jns .LBB182_6
|
|
|
|
; CHECK-NEXT: # %bb.5:
|
|
|
|
; CHECK-NEXT: addss %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: .LBB182_6: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
|
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
|
|
; CHECK-NEXT: movq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: movq %rax, %rcx
|
|
|
|
; CHECK-NEXT: shrq %rcx
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: movl %eax, %edx
|
|
|
|
; CHECK-NEXT: andl $1, %edx
|
|
|
|
; CHECK-NEXT: orq %rcx, %rdx
|
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
|
|
|
; CHECK-NEXT: cmovnsq %rax, %rdx
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: cvtsi2ss %rdx, %xmm0
|
|
|
|
; CHECK-NEXT: jns .LBB182_8
|
|
|
|
; CHECK-NEXT: # %bb.7:
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: addss %xmm0, %xmm0
|
2019-12-24 04:11:45 +08:00
|
|
|
; CHECK-NEXT: .LBB182_8: # %entry
|
2019-10-17 03:24:47 +08:00
|
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
|
|
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64:
|
|
|
|
; AVX1: # %bb.0: # %entry
|
2020-01-06 09:01:57 +08:00
|
|
|
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm3
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm3
|
|
|
|
; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1
|
2020-01-20 18:48:40 +08:00
|
|
|
; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
|
|
|
|
; AVX1-NEXT: vmovq %xmm1, %rax
|
2020-01-06 09:01:57 +08:00
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
|
2020-01-20 18:48:40 +08:00
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
|
2020-01-06 09:01:57 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
|
|
|
; AVX1-NEXT: vmovq %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
|
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
|
|
|
|
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
|
|
|
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
|
|
|
|
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: constrained_vector_uitofp_v4f32_v4i64:
|
|
|
|
; AVX512F: # %bb.0: # %entry
|
|
|
|
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
|
|
|
|
; AVX512F-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
|
|
|
|
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
|
|
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; AVX512F-NEXT: vmovq %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
|
|
|
|
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
|
|
|
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
|
|
|
|
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
|
|
; AVX512F-NEXT: vzeroupper
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64:
|
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
2019-12-27 06:43:33 +08:00
|
|
|
; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
|
2019-12-24 09:44:22 +08:00
|
|
|
; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
|
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
2019-10-17 03:24:47 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%result = call <4 x float>
|
|
|
|
@llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x,
|
|
|
|
metadata !"round.dynamic",
|
|
|
|
metadata !"fpexcept.strict") #0
|
|
|
|
ret <4 x float> %result
|
|
|
|
}
|
|
|
|
|
2019-12-19 06:19:55 +08:00
|
|
|
; Simple test to make sure we don't fuse vselect+strict_fadd into a masked operation.
|
|
|
|
define <16 x float> @vpaddd_mask_test(<16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone strictfp {
|
|
|
|
; CHECK-LABEL: vpaddd_mask_test:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: pxor %xmm10, %xmm10
|
|
|
|
; CHECK-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
|
|
|
|
; CHECK-NEXT: pcmpeqd %xmm10, %xmm8
|
|
|
|
; CHECK-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
|
|
|
|
; CHECK-NEXT: pcmpeqd %xmm10, %xmm9
|
|
|
|
; CHECK-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
|
|
|
|
; CHECK-NEXT: pcmpeqd %xmm10, %xmm11
|
|
|
|
; CHECK-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm10
|
|
|
|
; CHECK-NEXT: addps %xmm3, %xmm7
|
|
|
|
; CHECK-NEXT: addps %xmm2, %xmm6
|
|
|
|
; CHECK-NEXT: addps %xmm1, %xmm5
|
|
|
|
; CHECK-NEXT: addps %xmm0, %xmm4
|
|
|
|
; CHECK-NEXT: andps %xmm10, %xmm0
|
|
|
|
; CHECK-NEXT: andnps %xmm4, %xmm10
|
|
|
|
; CHECK-NEXT: orps %xmm10, %xmm0
|
|
|
|
; CHECK-NEXT: andps %xmm11, %xmm1
|
|
|
|
; CHECK-NEXT: andnps %xmm5, %xmm11
|
|
|
|
; CHECK-NEXT: orps %xmm11, %xmm1
|
|
|
|
; CHECK-NEXT: andps %xmm9, %xmm2
|
|
|
|
; CHECK-NEXT: andnps %xmm6, %xmm9
|
|
|
|
; CHECK-NEXT: orps %xmm9, %xmm2
|
|
|
|
; CHECK-NEXT: andps %xmm8, %xmm3
|
|
|
|
; CHECK-NEXT: andnps %xmm7, %xmm8
|
|
|
|
; CHECK-NEXT: orps %xmm8, %xmm3
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: vpaddd_mask_test:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6
|
|
|
|
; AVX1-NEXT: vpxor %xmm7, %xmm7, %xmm7
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm6
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm5, %xmm5
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm6
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
|
|
|
|
; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm3
|
|
|
|
; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm2
|
|
|
|
; AVX1-NEXT: vblendvps %ymm4, %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX1-NEXT: vblendvps %ymm5, %ymm1, %ymm3, %ymm1
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: vpaddd_mask_test:
|
|
|
|
; AVX512: # %bb.0:
|
|
|
|
; AVX512-NEXT: vptestmd %zmm2, %zmm2, %k1
|
2019-12-19 06:24:20 +08:00
|
|
|
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm1
|
|
|
|
; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1}
|
2019-12-19 06:19:55 +08:00
|
|
|
; AVX512-NEXT: retq
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %i, <16 x float> %j, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %i
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata)
|
|
|
|
|
2019-10-05 01:03:46 +08:00
|
|
|
attributes #0 = { strictfp }
|
2018-11-05 23:59:49 +08:00
|
|
|
|
2018-07-23 22:40:17 +08:00
|
|
|
; Single width declarations
|
2018-06-13 22:32:12 +08:00
|
|
|
declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
|
2018-08-21 03:28:56 +08:00
|
|
|
declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata)
|
2018-06-13 22:32:12 +08:00
|
|
|
declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
|
2018-06-16 04:57:55 +08:00
|
|
|
declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
|
2018-06-13 22:32:12 +08:00
|
|
|
declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
|
2019-12-18 04:08:18 +08:00
|
|
|
declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata)
|
2019-08-29 00:33:36 +08:00
|
|
|
declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata)
|
|
|
|
declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata)
|
|
|
|
declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata)
|
|
|
|
declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
|
|
|
|
declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata)
|
|
|
|
declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata)
|
|
|
|
declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata)
|
|
|
|
declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
|
2019-05-13 21:23:30 +08:00
|
|
|
declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
|
2019-12-18 04:08:18 +08:00
|
|
|
declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata)
|
2019-10-17 03:24:47 +08:00
|
|
|
declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
|
|
|
|
declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
|
|
|
|
declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
|
|
|
|
declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
|
|
|
|
declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
|
|
|
|
declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
|
2018-07-23 22:40:17 +08:00
|
|
|
|
2018-08-15 06:13:11 +08:00
|
|
|
; Scalar width declarations
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata)
|
2018-08-21 03:28:56 +08:00
|
|
|
declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata)
|
2018-08-15 06:13:11 +08:00
|
|
|
declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
|
2019-12-18 04:08:18 +08:00
|
|
|
declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata)
|
2019-08-29 00:33:36 +08:00
|
|
|
declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(<1 x float>, metadata)
|
|
|
|
declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(<1 x float>, metadata)
|
|
|
|
declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double>, metadata)
|
|
|
|
declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double>, metadata)
|
|
|
|
declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(<1 x float>, metadata)
|
|
|
|
declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(<1 x float>, metadata)
|
|
|
|
declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double>, metadata)
|
|
|
|
declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double>, metadata)
|
2019-05-13 21:23:30 +08:00
|
|
|
declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
|
|
|
|
declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
|
2019-12-18 04:08:18 +08:00
|
|
|
declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata)
|
2019-10-17 03:24:47 +08:00
|
|
|
declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32>, metadata, metadata)
|
|
|
|
declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64>, metadata, metadata)
|
|
|
|
declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32>, metadata, metadata)
|
|
|
|
declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64>, metadata, metadata)
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64>, metadata, metadata)
|
2018-08-15 06:13:11 +08:00
|
|
|
|
2018-08-01 22:17:19 +08:00
|
|
|
; Illegal width declarations
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
|
2018-08-21 03:28:56 +08:00
|
|
|
declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata)
|
2018-08-01 22:17:19 +08:00
|
|
|
declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
|
2019-12-18 04:08:18 +08:00
|
|
|
declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata)
|
2019-08-29 00:33:36 +08:00
|
|
|
declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(<3 x float>, metadata)
|
|
|
|
declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(<3 x float>, metadata)
|
|
|
|
declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(<3 x double>, metadata)
|
|
|
|
declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(<3 x double>, metadata)
|
|
|
|
declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(<3 x float>, metadata)
|
|
|
|
declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(<3 x float>, metadata)
|
|
|
|
declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(<3 x double>, metadata)
|
|
|
|
declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(<3 x double>, metadata)
|
2019-05-13 21:23:30 +08:00
|
|
|
declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
|
2019-12-18 04:08:18 +08:00
|
|
|
declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata)
|
2019-10-17 03:24:47 +08:00
|
|
|
declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32>, metadata, metadata)
|
|
|
|
declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64>, metadata, metadata)
|
|
|
|
declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64>, metadata, metadata)
|
2018-08-01 22:17:19 +08:00
|
|
|
|
2018-07-23 22:40:17 +08:00
|
|
|
; Double width declarations
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
|
2018-08-21 03:28:56 +08:00
|
|
|
declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata)
|
2018-07-23 22:40:17 +08:00
|
|
|
declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
|
2019-12-18 04:08:18 +08:00
|
|
|
declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata)
|
2019-08-29 00:33:36 +08:00
|
|
|
declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata)
|
|
|
|
declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata)
|
|
|
|
declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double>, metadata)
|
|
|
|
declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata)
|
|
|
|
declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata)
|
|
|
|
declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata)
|
|
|
|
declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double>, metadata)
|
|
|
|
declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double>, metadata)
|
2019-05-13 21:23:30 +08:00
|
|
|
declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
|
2019-12-18 04:08:18 +08:00
|
|
|
declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata)
|
2019-10-17 03:24:47 +08:00
|
|
|
declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
|
|
|
|
declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
|
|
|
|
declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
|
|
|
|
declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
|
|
|
|
declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
|
|
|
|
declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
|
|
|
|
|